-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_exclusive.py
95 lines (80 loc) · 2.35 KB
/
check_exclusive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import sys
from pysam import VariantFile
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def parse_vcf(vcf_path):
vcf = VariantFile(vcf_path)
Vs = {}
for rec in vcf.fetch():
if rec.chrom not in Vs:
Vs[rec.chrom] = []
Vs[rec.chrom].append(rec.pos)
return Vs
def main():
exclusive_vcf_path = sys.argv[1]
dipcall_vcf_path = sys.argv[2]
exclusive_vars = parse_vcf(exclusive_vcf_path)
dipcall_vars = parse_vcf(dipcall_vcf_path)
for chrom, exVs in exclusive_vars.items():
for exV in exVs:
min_d = float("inf")
count = 0
for dipV in dipcall_vars[chrom]:
if abs(dipV - exV) < min_d:
min_d = abs(dipV - exV)
count = 1
elif abs(dipV - exV) == min_d:
count += 1
if min_d == 0 and count == 1:
min_d = float("inf")
count = 0
for dipV in dipcall_vars[chrom]:
if abs(dipV - exV) < min_d and abs(dipV - exV) != 0:
min_d = abs(dipV - exV)
count = 1
elif abs(dipV - exV) == min_d:
count += 1
print(chrom, exV, min_d, count)
def bars():
txt_path = sys.argv[1]
Ds = []
for line in open(txt_path):
d = int(line.strip("\n").split(" ")[2])
Ds.append(d)
max_d = 250
bars = {}
for d in Ds:
if d == 0:
key = "0bp"
elif d > max_d:
key = f">{max_d}bp"
else:
min_split, max_split = int(d / 50) * 50 + 1, int(d / 50) * 50 + 50
key = f"{min_split}-{max_split}bp"
if key not in bars:
bars[key] = 0
bars[key] += 1
keys = [
"0bp",
"1-50bp",
"51-100bp",
"101-150bp",
"151-200bp",
"201-250bp",
">250bp",
]
values = [bars[k] for k in keys]
bars_dict = {"Distance": keys, "Count": values}
df = pd.DataFrame.from_dict(bars_dict)
print(df)
sns.barplot(x="Distance", y="Count", data=df)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("dist-bars.png")
if __name__ == "__main__":
mode = sys.argv.pop(1)
if mode == "get":
main()
elif mode == "plot":
bars()