Skip to content

Commit

Permalink
Added mokapot.to_txt(). Now time for tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
wfondrie committed Mar 17, 2021
1 parent fd3d97b commit c3626d4
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 41 deletions.
22 changes: 18 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,24 @@
### Added
- Support for downstream quantitation with
[FlashLFQ](https://github.com/smith-chem-wisc/FlashLFQ). This is accomplished
through the `to_flashlfq()` method of `LinearConfidence` objects. Note that
to support the FlashLFQ format, you'll need to specify additional columns in
`read_pin()` or use a PepXML input file (`read_pepxml()`).
- Tests accompanying the support for the above format.
through the `mokapot.to_flashlfq()` function or the `to_flashlfq()` method of
`LinearConfidence` objects. Note that to support the FlashLFQ format, you'll
need to specify additional columns in `read_pin()` or use a PepXML input file
(`read_pepxml()`).
- Added a top-level function for exporting confident PSMs, peptides, and
proteins from one or more `LinearConfidence` objects as a tab-delimited file:
`mokapot.to_txt()`.
- Tests accompanying the support for the features above.

### Changed
- Corresponding with support for new formats, the `mokapot.read_pin()` function
and the `LinearPsmDataset` constructor now have many new optional parameters.
These specify the columns containing the metadata needed to write the added
formats.
- Starting mokapot should be slightly faster for Python >= 3.8. We were able to
eliminate the runtime call to setuptools, because of the recent addition of
`importlib.metadata` to the standard library, saving a few hundred
milliseconds.

## [0.6.1] - 2021-03-11
### Fixed
Expand Down
80 changes: 43 additions & 37 deletions mokapot/confidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from . import qvalues
from . import utils
from .picked_protein import picked_protein
from .writers import to_flashlfq
from .writers import to_flashlfq, to_txt

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -100,29 +100,52 @@ def groups(self):
"""The groups for confidence estimation"""
return list(self._group_confidence_estimates.keys())

def to_txt(self, dest_dir=None, file_root=None, sep="\t", decoys=False):
"""
Save confidence estimates to delimited text files.
def to_txt(
self,
dest_dir=None,
file_root=None,
sep="\t",
decoys=False,
combine=False,
):
"""Save confidence estimates to delimited text files.
Parameters
----------
dest_dir : str or None, optional
The directory in which to save the files. `None` will use the
current working directory.
file_root : str or None, optional
An optional prefix for the confidence estimate files. The
suffix will always be `mokapot.psms.txt` and
`mokapot.peptides.txt`.
An optional prefix for the confidence estimate files. The suffix
will be "mokapot.{level}.txt", where "{level}" indicates the level
at which confidence estimation was performed (i.e. PSMs, peptides,
proteins) if :code:`combine=True`. If :code:`combine=False` (the
default), additionally the group value is prepended, yeilding a
suffix "{group}.mokapot.{level}.txt".
sep : str, optional
The delimiter to use.
decoys : bool, optional
Save decoys confidence estimates as well?
combine : bool, optional
Should groups be combined into a single file?
Returns
-------
list of str
The paths to the saved files.
"""
if combine:
res = self.group_confidence_estimates.values()
ret_files = to_txt(
res,
dest_dir=dest_dir,
file_root=file_root,
sep=sep,
decoys=decoys,
)
return ret_files

ret_files = []
for group, res in self.group_confidence_estimates.items():
prefix = file_root + f".{group}"
Expand Down Expand Up @@ -205,18 +228,18 @@ def levels(self):
return list(self.confidence_estimates.keys())

def to_txt(self, dest_dir=None, file_root=None, sep="\t", decoys=False):
"""
Save confidence estimates to delimited text files.
"""Save confidence estimates to delimited text files.
Parameters
----------
dest_dir : str or None, optional
The directory in which to save the files. `None` will use the
current working directory.
file_root : str or None, optional
An optional prefix for the confidence estimate files. The
suffix will always be `mokapot.psms.txt` and
`mokapot.peptides.txt`.
An optional prefix for the confidence estimate files. The suffix
will always be "mokapot.{level}.txt", where "{level}" indicates the
level at which confidence estimation was performed (i.e. PSMs,
peptides, proteins).
sep : str, optional
The delimiter to use.
decoys : bool, optional
Expand All @@ -226,32 +249,15 @@ def to_txt(self, dest_dir=None, file_root=None, sep="\t", decoys=False):
-------
list of str
The paths to the saved files.
"""
file_base = "mokapot"
if file_root is not None:
file_base = file_root + "." + file_base
if dest_dir is not None:
file_base = Path(dest_dir, file_base)

out_files = []
for level, qvals in self.confidence_estimates.items():
if qvals is None:
continue

out_file = str(file_base) + f".{level}.txt"
qvals.to_csv(out_file, sep=sep, index=False)
out_files.append(out_file)

if decoys:
for level, qvals in self.decoy_confidence_estimates.items():
if qvals is None:
continue

out_file = str(file_base) + f".decoys.{level}.txt"
qvals.to_csv(out_file, sep=sep, index=False)
out_files.append(out_file)

return out_files
return to_txt(
self,
dest_dir=dest_dir,
file_root=file_root,
sep=sep,
decoys=decoys,
)

def _perform_tdc(self, psm_columns):
"""Perform target-decoy competition.
Expand Down
1 change: 1 addition & 0 deletions mokapot/writers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
"""Define the public functions for the writers"""
from .txt import to_txt
from .flashlfq import to_flashlfq
91 changes: 91 additions & 0 deletions mokapot/writers/txt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""Writer to save results in a tab-delmited format"""
from pathlib import Path
from collections import defaultdict

import pandas as pd


def to_txt(conf, dest_dir=None, file_root=None, sep="\t", decoys=False):
"""Save confidence estimates to delimited text files.
Parameters
----------
conf : Confidence object or tuple of Confidence objects
One or more :py:class:`~mokapot.confidence.LinearConfidence` objects.
dest_dir : str or None, optional
The directory in which to save the files. `None` will use the current
working directory.
file_root : str or None, optional
An optional prefix for the confidence estimate files. The suffix will
always be "mokapot.{level}.txt" where "{level}" indicates the level at
which confidence estimation was performed (i.e. PSMs, peptides,
proteins).
sep : str, optional
The delimiter to use.
decoys : bool, optional
Save decoys confidence estimates as well?
Returns
-------
list of str
The paths to the saved files.
"""
try:
assert not isinstance(conf, str)
iter(conf)
except TypeError:
conf = [conf]
except AssertionError:
raise ValueError("'conf' should be a Confidence object, not a string.")

file_base = "mokapot"
if file_root is not None:
file_base = file_root + "." + file_base
if dest_dir is not None:
file_base = Path(dest_dir, file_base)

results = defaultdict(list)
for res in conf:
for level, qval_list in _get_level_data(res, decoys).items():
results[level] += qval_list

out_files = []
for level, qval_list in results.items():
out_file = str(file_base) + f".{level}.txt"
pd.concat(qval_list).to_csv(out_file, sep=sep, index=False)
out_files.append(out_file)

return out_files


def _get_level_data(conf, decoys):
"""Return the dataframes for each level.
Parameters
----------
conf : a Confidence object
A LinearConfidence object.
decoys : bool
Should decoys be included?
Returns
-------
Dict
Each entry contains a level, dataframe pair.
"""
results = defaultdict(list)
for level, qvals in conf.confidence_estimates.items():
if qvals is None:
continue

results[level].append(qvals)

if decoys:
for level, qvals in conf.decoy_confidence_estiamtes.items():
if qvals is None:
continue

results[f"decoy.{level}"].append(qvals)

return results

0 comments on commit c3626d4

Please sign in to comment.