forked from mjpost/bin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mycut
executable file
·69 lines (52 loc) · 2.07 KB
/
mycut
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
"""
Addresses limitations in cut's field handling.
* Allows redundant output of columns
* Columns can be output in any order
* Supports intermingled ranges (x-y) and lone columns (x).
* Pads input stream to requested column
* Supports open-ended ranges at the start (-y) or end (x-)
"""
import sys
import gzip
def main(args):
fields = []
maxfieldi = 0
for fieldtoken in args.fields.split(","):
if "-" in fieldtoken:
start, stop = fieldtoken.split("-")
assert start != "" or stop != "", f"invalid range {fieldtoken}: only one end can be open-ended"
if start and stop:
assert int(stop) >= int(start), f"invalid range {fieldtoken}: must be non-decreasing"
start = int(start) if start else 1
stop = int(stop) if stop else -1
fields.append((start, stop))
maxfieldi = max(maxfieldi, start, stop)
else:
fields.append(int(fieldtoken))
maxfieldi = max(maxfieldi, fields[-1])
for line in args.infile:
# front-pad for 1-indexing
columns = [""] + line.rstrip().split(args.delimiter)
# pad columns to the maximum field length
if maxfieldi >= len(columns):
columns += [""] * (maxfieldi - len(columns) + 1)
print("EXTENDING", maxfieldi, len(columns))
output = []
for field in fields:
if type(field) is int:
output.append(columns[field])
elif type(field) is tuple:
start, stop = field
if stop == -1:
stop = len(columns) - 1
output += [columns[f] for f in range(start, stop + 1)]
print(*output, sep=args.delimiter)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("infile", nargs="?", type=argparse.FileType("r"), default=sys.stdin)
parser.add_argument("--delimiter", "-d", default="\t")
parser.add_argument("--fields", "-f", default="1")
args = parser.parse_args()
main(args)