-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathmain.rs
119 lines (100 loc) · 3.45 KB
/
main.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
//! Runiq is a utility to filter unique lines from input.
//!
//! It operates in a much faster pattern than either the Unix `sort`
//! or `uniq` utilities, and without the constraints the two provide
//! (either sorting input or only filtering sequential duplicates).
//!
//! Runiq has a focus on memory space rather than throughput, simply
//! because it comes from a need of filtering large streams of data.
//! Having said this, it should be a goal to perform at least as fast
//! as other tools of the same ilk.
//!
//! Runiq is built mainly as a command line tool, although it can be
//! used as a library as the `Filter` trait is exposed publicly. If
//! you are using Runiq as a library, do **not** rely on any modules
//! hidden from the public documentation.
use bytelines::ByteLinesReader;
mod options;
mod statistics;
use crate::options::Options;
use crate::statistics::Stats;
use runiq::Filter;
use std::env;
use std::fs::File;
use std::io::{self, BufReader, Read, Write};
const EOL: &[u8; 1] = &[b'\n'];
fn main() -> io::Result<()> {
let result = run();
if let Err(ref err) = result {
if err.kind() != io::ErrorKind::BrokenPipe {
return result;
}
}
Ok(())
}
fn run() -> io::Result<()> {
// parse in our options from the command line args
let options = Options::from(env::args_os());
// borrow IO for checker
let stdin = io::stdin();
let stdout = io::stdout();
// ensure all sources exist as readers
let readers: Vec<Box<dyn Read>> = options
.inputs
.iter()
.map(|input| -> Box<dyn Read> {
match input.as_ref() {
"-" => Box::new(stdin.lock()),
any => Box::new(File::open(any).unwrap()),
}
})
.collect();
// create boxed filter from provided option filter
let mut filter: Box<dyn Filter> = options.filter.into();
// create statistics container for filters
let mut statistics = Stats::new();
// lock stdout to speed up the writes
let mut stdout = stdout.lock();
// sequential readers for now
for reader in readers {
// construct our line reader to iterate lines of bytes
let mut lines = BufReader::new(reader).byte_lines();
// iterate all lines as &[u8] slices
while let Some(input) = lines.next().transpose()? {
// track input sizing
if options.statistics {
statistics.add_size(input.len() + 1)
}
// detect duplicate value
if filter.detect(input) {
// handle stats or print
if options.statistics {
// add a unique count
statistics.add_unique();
} else if !options.inverted {
// echo if not inverted
stdout.write_all(input)?;
stdout.write_all(EOL)?;
}
} else {
// handle stats or print
if options.statistics {
// add a duplicate count
statistics.add_duplicate();
} else if options.inverted {
// echo if we're inverted
stdout.write_all(input)?;
stdout.write_all(EOL)?;
}
}
}
}
// handle stats logging
if options.statistics {
statistics.print();
}
// flush buffers
stdout.flush()?;
// done
Ok(())
}