简洁的写法
fn load_blacklist(filename: &str) -> Result> { File::open(Path::new(filename)).and_then(|mut f| { let mut buffer = String::new(); f.read_to_string( &mut buffer ).and_then(|_| { Ok(buffer.as_str().lines().map(|s| s.trim().to_owned()).filter(|s| s.len() > 0).collect:: <_>>()) }) }).map_err(|e| e.into())}
如果文件体积不大,可以用这种一步读取全文的方式进行处理。
另外,简洁的写法,会牺牲行数、时间等调试和统计数据的获取。
下面这种写法似乎更直观,更清晰;但是没有了.map, .and_then 之类方法的起承转合,似乎缺少了一些 Rust 的感觉。
fn load_category(filename: &str) -> Result> { let mut f = File::open(Path::new(filename))?; let mut buffer = String::new(); f.read_to_string( &mut buffer )?; let cats : HashSet<_> = buffer.as_str().lines().map(|s| s.trim().to_owned()).filter(|s| s.len() > 0).collect(); Ok(cats)}
引入 BufReader,增加读缓冲
#[allow(dead_code)]fn load_blacklist(filename: &str) -> Result> { let fp = File::open(Path::new(filename)).unwrap(); let clock = Instant::now(); let mut cats = HashSet::new(); let mut n_in = 0; let mut n_bytes = 0; let reader = BufReader::with_capacity( 1024*1024*4, fp ); for mut l in reader.lines().map(|r| r.unwrap()) { if !l.is_empty() && l.chars().rev().next().unwrap() == '\n' { l.pop(); } let line = l.trim(); if line.len() < 1 { continue; } n_in += 1; n_bytes += line.len(); cats.insert( line.to_owned() ); } let n_out = cats.len(); let time_elapsed = clock.elapsed(); let time_last = (time_elapsed.as_secs() as f64) + (time_elapsed.subsec_nanos() as f64 * 1e-9); let tps = if time_last > 0.0 { n_in as f64 / time_last as f64 } else { n_in as f64 }; let bw = (if time_last > 0.0 { n_bytes as f64 / time_last as f64 } else { n_bytes as f64 }) / 1048576_f64; info!("[STAT] parse {} last {:.2} seconds, in={} out={} tps={:.2} bw={:.2} MB/s\n", filename, time_last, n_in, n_out, tps, bw); Ok(cats)}
使用 enumerate(),记录输入数据的行号
#[allow(dead_code)]fn load_blacklist(filename: &str) -> Result> { let fp = File::open(Path::new(filename)).unwrap(); let clock = Instant::now(); let mut cats = HashSet::new(); let mut n_in = 0; let mut n_bytes = 0; let reader = BufReader::with_capacity( 1024*1024*4, fp ); for (ln, lo) in reader.lines().enumerate() { let mut l = match lo { Ok(s) => s, Err(e) => { stderr().write_all(format!("BAD LINE#{} error -- {:?}\n", ln, e).as_bytes()).unwrap(); continue; }, }; if !l.is_empty() && l.chars().rev().next().unwrap() == '\n' { l.pop(); } let line = l.trim(); if line.len() < 1 { continue; } n_in += 1; n_bytes += line.len(); cats.insert( line.to_owned() ); } let n_out = cats.len(); let time_elapsed = clock.elapsed(); let time_last = (time_elapsed.as_secs() as f64) + (time_elapsed.subsec_nanos() as f64 * 1e-9); let tps = if time_last > 0.0 { n_in as f64 / time_last as f64 } else { n_in as f64 }; let bw = (if time_last > 0.0 { n_bytes as f64 / time_last as f64 } else { n_bytes as f64 }) / 1048576_f64; info!("[STAT] parse {} last {:.2} seconds, in={} out={} tps={:.2} bw={:.2} MB/s\n", filename, time_last, n_in, n_out, tps, bw); Ok(cats)}
当 filename == '-' 时,从标准输入(STDIN)读取
#[allow(dead_code)]fn load_blacklist(filename: &str) -> Result> { let fp = match filename { "-" => Box::new(stdin()) as Box , filename => { let path = Path::new(filename); let fp = File::open(path).unwrap(); Box::new(fp) as Box }, }; let clock = Instant::now(); let mut cats = HashSet::new(); let mut n_in = 0; let mut n_bytes = 0; let reader = BufReader::with_capacity( 1024*1024*4, fp ); for mut l in reader.lines().map(|r| r.unwrap()) { if !l.is_empty() && l.chars().rev().next().unwrap() == '\n' { l.pop(); } let line = l.trim(); if line.len() < 1 { continue; } n_in += 1; n_bytes += line.len(); cats.insert( line.to_owned() ); } if filename != "-" { let n_out = cats.len(); let time_elapsed = clock.elapsed(); let time_last = (time_elapsed.as_secs() as f64) + (time_elapsed.subsec_nanos() as f64 * 1e-9); let tps = if time_last > 0.0 { n_in as f64 / time_last as f64 } else { n_in as f64 }; let bw = (if time_last > 0.0 { n_bytes as f64 / time_last as f64 } else { n_bytes as f64 }) / 1048576_f64; stderr().write_all(format!("[STAT] parse {} last {:.2} seconds, in={} out={} tps={:.2} bw={:.2} MB/s\n", filename, time_last, n_in, n_out, tps, bw).as_bytes()).unwrap(); } Ok(cats)}