-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStructures.py
119 lines (101 loc) · 4.59 KB
/
Structures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#Expansion class used for variable and function storage for analysis and presentation
class Expansion:
def __init__(self, chr, start, end, repeat_id, repeat_unit, ref_motif, allele1_size, allele2_size, wt_size, pathogenic_range, copy_numberA1, copy_numberA2, allele1_support, allele2_support, sample_id):
# Data imported from straglr output bamfile in ResultBedReader = required
self.chr = chr
self.start = start
self.end = end
self.repeat_id = repeat_id
self.repeat_unit = repeat_unit
self.ref_motif = ref_motif
self.allele1_size = allele1_size
self.allele2_size = allele2_size
self.copy_numberA1 = copy_numberA1
self.copy_numberA2 = copy_numberA2
self.allele1_support = allele1_support
self.allele2_support = allele2_support
self.wt_size = wt_size
self.sample_id = sample_id
# Data imported and inferred from loci bed file in ResultBedReader = required
self.pathogenic_range = pathogenic_range
# Data inferred in analyze genotype
self.in_pathogenic_range = None
self.size_difference = None
# Data imported and inferred from tsv file in getHistData = optional
self.title = ""
self.read_list = []
self.read_dict = {}
# Data inferred in expansionScorer = optional
self.norm_score = None
# Data inferred from newGenotyping = optional
self.new_read_list = []
self.new_allele1 = None
self.new_allele2 = None
self.new_in_pathogenic_range = "None"
self.new_size_difference = None
self.new_allele1_support = None
self.new_allele2_support = None
self.new_copy_numberA1 = None
self.new_copy_numberA2 = None
self.new_allele1_support = None
self.new_allele2_support = None
self.new_in_pathogenic_range = None
self.new_norm_score = None
@property
def title(self):
return self._title
@title.setter
def title(self, string: str):
subs = string.replace(":", "_")
self._title = subs
#Locus class imported from bed/txt file used in straglr run
class Locus:
def __init__(self, name, chromosome, start, end, motif, locus, associated_disease, reference_size, normal_range, min_pathogenic):
self.name = name
self.chromosome = chromosome
self.start = start
self.end = end
self.motif = motif
self.locus = locus
self.associated_disease = associated_disease
self.reference_size = reference_size
self.normal_range = normal_range
self.min_pathogenic = min_pathogenic
#self.pathogenic_motif = pathogenic_motiv
#self.pathogenic_motif_range = pathogenic_motif_range
# Methylation call class used in extract_repeats.py
class MethylationCall:
def __init__(self, position: int, modifications: dict = None):
self.position = position
self.modifications = modifications or {} # Format: {'m': qual, 'h': qual}
def get_modification_state(self):
"""Returns the most likely modification state based on quality scores"""
total_qual = sum(self.modifications.values())
# Base is unmodified if total modification probability < 50%
if total_qual < 127: # 255/2 rounded down
return 'unmodified'
# Return modification with highest quality score
return max(self.modifications.items(), key=lambda x: x[1])[0]
def is_modified(self):
"""Returns True if base is likely methylated (m modification)"""
return self.get_modification_state() in ['m', 'h']
def quality_score(self):
"""Returns the quality score for the most likely modification"""
if not self.modifications:
return 0
return max(self.modifications.values())
class RepeatSequence:
def __init__(self, locus: str, read_name: str, repeat_size: int, sequence: str,
start_position: int, end_position: int,
left_flank: str, repeat_sequence: str, right_flank: str,
methylation_calls=None):
self.locus = locus
self.read_name = read_name
self.repeat_size = repeat_size
self.sequence = sequence
self.start_position = start_position
self.end_position = end_position
self.left_flank = left_flank
self.repeat_sequence = repeat_sequence
self.right_flank = right_flank
self.methylation_calls = methylation_calls if methylation_calls else []