1
1
package org .broad .igv .sam ;
2
2
3
3
4
+ import org .apache .log4j .Logger ;
4
5
import org .broad .igv .feature .genome .Genome ;
5
6
import org .broad .igv .ui .util .MessageUtils ;
6
- import org .broad .igv .util .Pair ;
7
7
8
8
import java .util .*;
9
9
16
16
17
17
public class HaplotypeUtils {
18
18
19
+ private static Logger log = Logger .getLogger (HaplotypeUtils .class );
19
20
20
21
private final AlignmentInterval alignmentInterval ;
21
22
Genome genome ;
@@ -25,104 +26,111 @@ public HaplotypeUtils(AlignmentInterval alignmentInterval, Genome genome) {
25
26
this .genome = genome ;
26
27
}
27
28
28
- public void clusterAlignments (String chr , int start , int end , int nClasses ) {
29
+ public boolean clusterAlignments (String chr , int start , int end , int nClasses ) {
29
30
30
31
31
- AlignmentCounts counts = this .alignmentInterval .getCounts ();
32
+ try {
33
+ AlignmentCounts counts = this .alignmentInterval .getCounts ();
32
34
33
- final byte [] reference = genome .getSequence (chr , start , end );
35
+ final byte [] reference = genome .getSequence (chr , start , end );
34
36
35
- // Find snp positions
36
- List <Integer > snpPos = findVariantPositions (start , end , counts , reference );
37
+ // Find snp positions
38
+ List <Integer > snpPos = findVariantPositions (start , end , counts , reference );
37
39
38
- if (snpPos .size () == 0 ) {
39
- MessageUtils .showMessage ("No variants in selected range." );
40
- return ;
41
- }
40
+ if (snpPos .size () == 0 ) {
41
+ MessageUtils .showMessage ("No variants in selected range." );
42
+ return false ;
43
+ }
42
44
43
- if (snpPos .size () < nClasses - 1 ) {
44
- nClasses = snpPos .size () + 1 ;
45
- MessageUtils .showMessage ("Not enough variants, reducing # of clusters: " + nClasses );
46
- }
45
+ if (snpPos .size () < nClasses - 1 ) {
46
+ nClasses = snpPos .size () + 1 ;
47
+ MessageUtils .showMessage ("Not enough variants, reducing # of clusters: " + nClasses );
48
+ }
47
49
48
50
49
- // Adjust start and end to min and max snp positions, there is no information outside these bounds
50
- start = snpPos .get (0 ) - 1 ;
51
- end = snpPos .get (snpPos .size () - 1 ) + 1 ;
51
+ // Adjust start and end to min and max snp positions, there is no information outside these bounds
52
+ start = snpPos .get (0 ) - 1 ;
53
+ end = snpPos .get (snpPos .size () - 1 ) + 1 ;
52
54
53
- // Label alignments
54
- Map <String , List <Alignment >> labelAlignmentMap = labelAlignments (start , end , snpPos , reference , this .alignmentInterval .getAlignmentIterator ());
55
+ // Label alignments
56
+ Map <String , List <Alignment >> labelAlignmentMap = labelAlignments (start , end , snpPos , reference , this .alignmentInterval .getAlignmentIterator ());
57
+ List <String > labels = new ArrayList (labelAlignmentMap .keySet ());
58
+ if (labels .size () < nClasses ) {
59
+ MessageUtils .showMessage ("Not enough features to create " + nClasses + " classes. Max # of classes = " + labels .size ());
60
+ return false ;
61
+ }
55
62
56
- // Sort labels (entries) by # of associated alignments
57
- List <String > labels = new ArrayList (labelAlignmentMap .keySet ());
58
- labels .sort ((o1 , o2 ) -> {
59
- return labelAlignmentMap .get (o2 ).size () - labelAlignmentMap .get (o1 ).size ();
60
- });
63
+ // Sort labels (entries) by # of associated alignments
64
+ labels .sort ((o1 , o2 ) -> {
65
+ return labelAlignmentMap .get (o2 ).size () - labelAlignmentMap .get (o1 ).size ();
66
+ });
67
+
68
+ // Create initial cluster centroids
69
+ List <V > clusters = new ArrayList <>();
70
+ for (int i = 0 ; i < nClasses ; i ++) {
71
+ String label = labels .get (i );
72
+ V v = new V (i + 1 , label );
73
+ clusters .add (v );
74
+ }
61
75
62
- // Create initial cluster centroids
63
- List <V > clusters = new ArrayList <>();
64
- for (int i = 0 ; i < nClasses ; i ++) {
65
- String label = labels .get (i );
66
- V v = new V (i + 1 , label );
67
- clusters .add (v );
68
- }
76
+ // Now assign all labels to a cluster
69
77
70
- // Now assign all labels to a cluster
78
+ int n = 0 ;
79
+ int max = 50 ;
80
+ while (true ) {
81
+ for (String label : labels ) {
71
82
72
- int n = 0 ;
73
- int max = 50 ;
74
- while (true ) {
75
- for (String label : labels ) {
83
+ double min = Double .MAX_VALUE ;
84
+ V centroid = null ;
76
85
77
- double min = Double .MAX_VALUE ;
78
- V centroid = null ;
86
+ for (V c : clusters ) {
87
+ double dist = c .distance (label );
88
+ if (dist < min ) {
89
+ centroid = c ;
90
+ min = dist ;
91
+ }
92
+ }
79
93
80
- for (V c : clusters ) {
81
- double dist = c .distance (label );
82
- if (dist < min ) {
83
- centroid = c ;
84
- min = dist ;
94
+ if (centroid != null ) {
95
+ centroid .add (label );
85
96
}
86
97
}
87
98
88
- if (centroid != null ) {
89
- centroid .add (label );
99
+ boolean movement = false ;
100
+ for (V c : clusters ) {
101
+ if (c .movement ()) {
102
+ movement = true ;
103
+ break ;
104
+ }
90
105
}
91
- }
92
106
93
- boolean movement = false ;
94
- for (V c : clusters ) {
95
- if (c .movement ()) {
96
- movement = true ;
107
+ if (movement && n ++ < max ) {
108
+ for (V c : clusters ) {
109
+ c .reset ();
110
+ }
111
+ } else {
97
112
break ;
98
113
}
99
114
}
100
115
101
- if (movement && n ++ < max ) {
102
- for (V c : clusters ) {
103
- c .reset ();
104
- }
105
- } else {
106
- break ;
107
- }
108
- }
109
-
110
- System .out .println ("Converged in: " + n );
111
-
112
- // Now label alignments
113
- for (int i = 0 ; i < clusters .size (); i ++) {
116
+ // Now label alignments by cluster
117
+ for (int i = 0 ; i < clusters .size (); i ++) {
118
+ V c = clusters .get (i );
119
+ String label = "" + c .id ;
120
+ for (String l : c .allLabels ) {
114
121
115
- V c = clusters .get (i );
116
- String label = "" + c .id ;
117
- for (String l : c .allLabels ) {
118
-
119
- List <Alignment > alignments = labelAlignmentMap .get (l );
120
- for (Alignment a : alignments ) {
121
- a .setHaplotypeName (label );
122
+ List <Alignment > alignments = labelAlignmentMap .get (l );
123
+ for (Alignment a : alignments ) {
124
+ a .setHaplotypeName (label );
125
+ }
122
126
}
123
127
}
128
+ return true ;
129
+ } catch (Exception e ) {
130
+ log .error ("Error clustering alignments" , e );
131
+ MessageUtils .showMessage ("Error clustering alignments: " + e .getMessage ());
132
+ return false ;
124
133
}
125
-
126
134
}
127
135
128
136
private List <Integer > findVariantPositions (int start , int end , AlignmentCounts counts , byte [] reference ) {
@@ -142,35 +150,31 @@ private List<Integer> findVariantPositions(int start, int end, AlignmentCounts c
142
150
return snpPos ;
143
151
}
144
152
153
+ /**
154
+ * Label the alignments by the base value at each snp position over the region defined by [start, end].
155
+ *
156
+ * @param start
157
+ * @param end
158
+ * @param positions
159
+ * @param reference
160
+ * @param iter
161
+ * @return a map of label -> alignment.
162
+ */
145
163
public Map <String , List <Alignment >> labelAlignments (int start , int end , List <Integer > positions , byte [] reference , Iterator <Alignment > iter ) {
146
164
147
165
Map <String , List <Alignment >> alignmentMap = new HashMap <>();
148
-
149
166
while (iter .hasNext ()) {
150
-
151
167
Alignment alignment = iter .next ();
152
-
153
168
if (start >= alignment .getStart () && end <= alignment .getEnd ()) {
154
-
155
169
String hapName = "" ;
156
- int dist = 0 ;
157
-
158
170
for (Integer pos : positions ) {
159
-
160
- byte ref = reference [pos - start ];
161
171
boolean found = false ;
162
172
for (AlignmentBlock block : alignment .getAlignmentBlocks ()) {
163
-
164
173
if (block .isSoftClipped ()) continue ;
165
174
if (block .contains (pos )) {
166
175
int blockOffset = pos - block .getStart ();
167
176
hapName += (char ) block .getBase (blockOffset );
168
177
found = true ;
169
-
170
- if (ref != block .getBase (blockOffset )) {
171
- dist ++;
172
- }
173
-
174
178
break ;
175
179
}
176
180
}
@@ -190,7 +194,6 @@ public Map<String, List<Alignment>> labelAlignments(int start, int end, List<Int
190
194
191
195
}
192
196
}
193
-
194
197
return alignmentMap ;
195
198
}
196
199
0 commit comments