forked from Eduardo-vsouza/uproteins
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpercolator.py
127 lines (114 loc) · 5.03 KB
/
percolator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import sys
from Bio import SeqIO
class Decoy(object):
def __init__(self, db, db_type):
self.df = db
self.seqs, self.entries = self.__get_seqs()
self.reversed = []
self.type = db_type
self.__create_dir()
self.path = sys.path[0]
def __create_dir(self):
if not os.path.exists(f"{self.type}/Percolator"):
cmd_dir = f'mkdir {self.type}/Percolator'
os.system(cmd_dir)
def __get_seqs(self):
seqs = []
entries = []
records = SeqIO.parse(self.df, 'fasta')
for record in records:
seqs.append(record.seq)
entries.append(str(record.description))
return seqs, entries
def reverse_sequences(self):
""" Reverses the amino acid sequence of a protein, except for the aa in the c-terminal. """
for seq in self.seqs:
# cterminus = seq[len(seq)-1]
to_reverse = seq[:-1]
reversed = to_reverse[::-1]
# reversed += cterminus
self.reversed.append(reversed)
return self
def add_contaminants(self):
seqs = []
records = SeqIO.parse(f'{self.path}/seqlib/contaminants.txt', 'fasta')
for record in records:
seqs.append(f">{record.id}\n{record.seq}\n")
return seqs
def to_fasta(self):
out = []
for i in range(len(self.reversed)):
string = f">decoy_{self.entries[i]}\n{self.reversed[i]}\n"
out.append(string)
seqs = self.add_contaminants()
for seq in seqs:
to_add = seq.replace("B", "")
to_add = to_add.replace("X", "")
to_add = to_add.replace("Z", "")
out.append(to_add)
with open(f"{self.type}/Percolator/{self.type}_decoy.fasta", 'w') as fa:
fa.writelines(out)
return self
class PercolatorProcessing(object):
def __init__(self, folder, filetype, args=None):
self.folder = folder
self.args = args
self.target = None
self.decoy = None
self.filetype = filetype
def create_metafiles(self):
""" Creates a metafile containing the paths to the mzid files, created during the peptide search step. """
files = os.listdir(self.folder)
real = []
decoy = []
for file in files:
if "mzid" in file:
if "decoy" in file:
decoy.append(os.path.abspath(f"{self.folder}/{file}\n"))
else:
real.append(os.path.abspath(f"{self.folder}/{file}\n"))
with open(f"{self.folder}/{self.folder}_target_metafile.txt", 'w') as out:
out.writelines(real)
with open(f"{self.folder}/{self.folder}_decoy_metafile.txt", 'w') as decoy_out:
decoy_out.writelines(decoy)
self.target = f"{self.folder}/{self.folder}_target_metafile.txt"
self.decoy = f"{self.folder}/{self.folder}_decoy_metafile.txt"
return self
def convert_to_pin(self):
""" Converts the mzid files to a percolator input file using the metafiles created with create_metafiles()
method. """
msgf2pin_path = "msgf2pin"
if self.args is not None:
if self.args.msgf2pin_path is not None:
msgf2pin_path = self.args.msgf2pin_path
if not os.path.exists(f"{self.folder}/Percolator"):
cmd_dir = f'mkdir {self.folder}/Percolator'
os.system(cmd_dir)
cmd_pin = f'{msgf2pin_path} {self.target} {self.decoy} -o {self.folder}/Percolator/{self.folder}_pin.txt' \
f' -F {self.filetype}_database.fasta,{self.folder}/Percolator/{self.folder}_decoy.fasta -c 2'
os.system(cmd_pin)
return self
def percolate(self):
""" Runs percolator on the converted mzid files. """
perc_path = "percolator"
enz = "trypsin"
folder = f"{self.folder}/Percolator"
if self.args is not None:
if self.args.enzyme is not None:
enz = self.args.enzyme
if self.args.percolator_path is not None:
perc_path = self.args.percolator_path
cmd_perc = f'{perc_path} -X {self.folder}/Percolator/{self.filetype}_percolator_out.xml --tab-out ' \
f'{folder}/comp_features.txt -w {folder}/feature_weights.txt -v 3 -r {folder}/pep_results.txt -m ' \
f'{folder}/results_psm.txt --search-input separate --results-proteins {folder}/protein_results.txt' \
f' --protein-enzyme {enz} --protein-report-duplicates --protein-report-fragments ' \
f'--spectral-counting-fdr 0.01 --picked-protein auto {folder}/{self.folder}_pin.txt'
os.system(cmd_perc)
return self
# os.chdir("/home/eduardo/Documents/R/R_for_proteomics/20190503_PePTID_20190530_INhAMAIS1_10uL_120min_1/")
# perc = PercolatorProcessing("Genome", filetype="genome")
# perc.create_metafiles().convert_to_pin()
# perc.percolate()
# genome_decoy = Decoy(db="genome_ORFs.fasta", db_type="Genome")
# genome_decoy.reverse_sequences().to_fasta()