Skip to content

Commit

Permalink
Merge pull request pymzml#291 from pymzml/fix/pymzml#289
Browse files Browse the repository at this point in the history
Fix issue with parsing multiple chromatograms
  • Loading branch information
MKoesters authored Apr 11, 2022
2 parents 98ab67d + c6b3a3b commit a913cda
Showing 1 changed file with 8 additions and 9 deletions.
17 changes: 8 additions & 9 deletions pymzml/file_classes/standardMzml.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,9 +551,12 @@ def _interpol_search(self, target_index, chunk_size=8, fallback_cutoff=100):
current_position = seeker.tell()

elif len(data) == 0:
sorted_keys = sorted(self.offset_dict.keys())
sorted_int_keys = {
k: v for k, v in self.offset_dict.items() if isinstance(k, int)
}
sorted_keys = sorted(sorted_int_keys.keys())
pos = (
bisect.bisect_left(sorted_keys, target_index) - 2
bisect.bisect_left(sorted_int_keys, target_index) - 2
) # dat magic number :)
try:
key = sorted_keys[pos]
Expand Down Expand Up @@ -587,20 +590,16 @@ def _read_to_spec_end(self, seeker, chunks_to_read=8):
start_pos = seeker.tell()
data_chunk = seeker.read(chunk_size)
while end_found is False:
chunk_offset = seeker.tell()
data_chunk += seeker.read(chunk_size)
tag_end, seeker = self._read_until_tag_end(seeker)
data_chunk += tag_end
if regex_patterns.SPECTRUM_CLOSE_PATTERN.search(data_chunk):
match = regex_patterns.SPECTRUM_CLOSE_PATTERN.search(data_chunk)
relative_pos_in_chunk = match.end()
end_pos = chunk_offset + relative_pos_in_chunk
end_pos = match.end()
end_found = True
elif regex_patterns.CHROMATOGRAM_CLOSE_PATTERN.search(data_chunk):
match = regex_patterns.CHROMATOGRAM_CLOSE_PATTERN.search(data_chunk)
relative_pos_in_chunk = match.end()
end_pos = chunk_offset + relative_pos_in_chunk
end_pos = match.end()
end_found = True
return (start_pos, end_pos)

Expand Down Expand Up @@ -743,7 +742,7 @@ def _search_string_identifier(self, search_string, chunk_size=8):
file_pointer = seeker.tell()

data = seeker.read(total_chunk_size)
string, seeker = self._read_until_tag_end(seeker, byte_mode=True)
string, seeker = self._read_until_tag_end(seeker)
data += string
spec_start = regex_string.search(data)
chrom_start = regex_patterns.CHROMO_OPEN_PATTERN.search(data)
Expand All @@ -769,7 +768,7 @@ def _search_string_identifier(self, search_string, chunk_size=8):
elif len(data) == 0:
raise Exception("cant find specified string")

def _read_until_tag_end(self, seeker, max_search_len=12, byte_mode=False):
def _read_until_tag_end(self, seeker, max_search_len=12):
"""
Help make sure no splitted text appear in chunked data, so regex always find
<spectrum ...>
Expand Down

0 comments on commit a913cda

Please sign in to comment.