Skip to content

Commit

Permalink
Merge branch 'master' into quant
Browse files Browse the repository at this point in the history
  • Loading branch information
wfondrie committed Mar 11, 2021
2 parents c0c8bce + 2aa978c commit d75aa1d
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 1 deletion.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog for mokapot

## [Unreleased]
### Fixed
- Parsing Percolator tab-delimited files with a "DefaultDirection" line.
- `Label` column is now converted to boolean during PIN file parsing.
Previously, problems occurred if the `Label` column was of dtype `object`.

## [0.6.0] - 2021-03-03
### Added
- Support for parsing PSMs from PepXML input files.
Expand Down
11 changes: 10 additions & 1 deletion mokapot/parsers/pin.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,17 @@ def read_pin(pin_files, group_column=None, to_df=False, copy_data=False):
raise ValueError(f"More than one '{name}' column found.")

if not all([specid, peptides, proteins, labels, spectra]):
print([specid, peptides, proteins, labels, spectra])
raise ValueError(
"This PIN format is incompatible with mokapot. Please"
" verify that the required columns are present."
)

# Convert labels to the correct format.
print(pin_df[labels[0]])
pin_df[labels[0]] = pin_df[labels[0]].astype(int)
if any(pin_df[labels[0]] == -1):
pin_df[labels[0]] = (pin_df[labels[0]] + 1) / 2
pin_df[labels[0]] = ((pin_df[labels[0]] + 1) / 2).astype(bool)

if to_df:
return pin_df
Expand Down Expand Up @@ -138,8 +141,14 @@ def read_percolator(perc_file):

with fopen(perc_file) as perc:
cols = perc.readline().rstrip().split("\t")
dir_line = perc.readline().rstrip().split("\t")[0]
if dir_line.lower() != "defaultdirection":
perc.seek(0)
_ = perc.readline()

psms = pd.concat((c for c in _parse_in_chunks(perc, cols)), copy=False)

print(psms.head())
return psms


Expand Down
37 changes: 37 additions & 0 deletions tests/unit_tests/test_parser_pin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Test that parsing Percolator input files works correctly"""
import pytest
import mokapot
import pandas as pd


@pytest.fixture
def std_pin(tmp_path):
"""Create a standard pin file"""
out_file = tmp_path / "std_pin"
with open(str(out_file), "w+") as pin:
dat = (
"sPeCid\tLaBel\tpepTide\tsCore\tscanNR\tpRoteins\n"
"DefaultDirection\t-\t-\t-\t1\t-\t-\n"
"a\t1\tABC\t5\t2\tprotein1\tprotein2\n"
"b\t-1\tCBA\t10\t3\tdecoy_protein1\tdecoy_protein2"
)
pin.write(dat)

return out_file


def test_pin_parsing(std_pin):
"""Test pin parsing"""
df = mokapot.read_pin(std_pin, to_df=True)
assert df["LaBel"].dtype == "bool"
assert len(df) == 2
assert len(df[df["LaBel"]]) == 1
assert len(df[df["LaBel"]]) == 1

dat = mokapot.read_pin(std_pin)
pd.testing.assert_frame_equal(df.loc[:, ("sCore",)], dat.features)


def test_pin_wo_dir():
"""Test a PIN file without a DefaultDirection line"""
dat = mokapot.read_pin("data/scope2_FP97AA.pin")

0 comments on commit d75aa1d

Please sign in to comment.