-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMetaProfiler.Rd
204 lines (165 loc) · 10.6 KB
/
MetaProfiler.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/MetaProfiler.R
\name{MetaProfiler}
\alias{MetaProfiler}
\title{Create MetaProfiler class object from protein-SIP results.}
\usage{
MetaProfiler(
design,
data,
time_unit,
time_zero = 0,
isotope = "N",
peptide_centric = T,
light_peptide = T,
as_percentage = T,
incorporation_name,
incorporation_columns = "auto",
intensity_name,
intensity_columns = "auto",
labeling_ratio_name = NULL,
labeling_ratio_columns = "auto",
labeling_ratio_threshold = NA,
score_name = NULL,
higher_score_better = T,
score_columns = "auto",
score_threshold = NA,
peptide_column_PTMs = "guess",
peptide_column_no_PTMs = "guess",
accession_column = "guess",
accession_pattern = "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}",
compute_razor_protein = F,
pep2pro = NULL,
pep2pro_peptide_column = "guess",
pep2pro_accession_column = "guess",
pep2pro_accession_pattern = accession_pattern,
pep2taxon = NULL,
pep2taxon_peptide_column = "guess",
rank_columns = "guess",
pro2func = NULL,
pro2func_accession_column = "guess",
pro2func_accession_pattern = accession_pattern,
function_columns = "guess",
annotate_by = c("unmodified", "modified"),
feature_type_column = NULL,
feature_type = c("feature", "id"),
progress = T,
trace = T,
...
)
}
\arguments{
\item{design}{A \code{data.frame} or \code{data.table} containing the experimental design. Each row must
correspond to the file paths listed in \code{data} or in the file column of table \code{design}. Additionally,
a timepoint column with the same name as \code{time_unit} must be present.}
\item{data}{Either a \code{data.frame}/\code{data.table} or a character vector of the list of files containing
the result from the protein-SIP experiment. Must be tab or comma delimited. Can be set to \code{NULL}
if table \code{design} contains a column with the filepaths of the result files.}
\item{time_unit}{The unit for the timepoints.}
\item{time_zero}{Numeric value denoting the timepoint when the diet was switched. Defaults to zero.}
\item{isotope}{A character value specifying the stable isotope. Should correspond to one of the elements in the
periodic table.}
\item{peptide_centric}{Logical value specifying if analysis is done at the peptide or protein level.}
\item{as_percentage}{Should incorporation and labeling ratio values be presented as percentages?}
\item{incorporation_name}{A character value denoting the name of the incorporation value.
If \code{incorporation_columns} is set to \code{"auto"}, then the function will look for columns
that start with the character value followed by a unique identifier (i.e. "RIA 1", "RIA 2", "RIA 3", ...).
See details for the difference between incorporation and labeling ratio.}
\item{incorporation_columns}{A character vector detailing the names of the columns containing the incorporation
values. Can be set to \code{"auto"} if \code{incorporation_name} is specified. Can also be set to \code{NULL}
if no incorporation value was measured.}
\item{intensity_name}{Silmilar to \code{incorporation_name}, but with the intensity values instead.}
\item{intensity_columns}{Silmilar to \code{incorporation_columns}, but with the intensity columns instead.
Can be set to \code{NULL} if labeling ratio was measured instead.}
\item{labeling_ratio_name}{Silmilar to \code{incorporation_name}, but with the labeling ratio values instead.
See details for the difference between incorporation and labeling ratio.}
\item{labeling_ratio_columns}{Silmilar to \code{incorporation_columns}, but with the labeling ratio columns
instead. If set to \code{NULL}, then labeling ratio is calculated from intensity.}
\item{labeling_ratio_threshold}{A numeric value that specifies the minimum labeling ratio needed for the
heavy peptide or protein to be kept for downstream analysis.}
\item{score_name}{Silmilar to \code{incorporation_name}, but with the heavy peptide identification score values
instead.}
\item{score_columns}{Silmilar to \code{incorporation_columns}, but with the heavy peptide identification score
columns instead. Can be set to \code{NULL} if score was not measured.}
\item{score_threshold}{A numeric value that specifies the minimum or maximum score needed for the heavy peptide or
protein to be kept for downstream analysis.}
\item{peptide_column_PTMs}{A character value. Specifies the name of the column containing the peptide sequence
with post translational modifications (PTMs). If set to \code{"guess"}, then the function will guess the column
based off the headers and the characters contained in the column.}
\item{peptide_column_no_PTMs}{Similar to \code{peptide_column_PTMs}, but instead with peptide sequences without
post translational modifications (PTMs). If set to \code{"guess"}, but the function only detects the column
containing PTMs, then the function will add a new column containing the sequences from \code{peptide_column_PTMs},
but with modications removed. This also aplies to when the value is set to \code{NULL}. When removing PTMs,
the function assumes that the name of the modications in the sequences follow UniProt convention.}
\item{accession_column}{A character value which specifies the name of the column containing the protein accession
IDs. Can be set to \code{NULL} if \code{pep2pro} is provided.}
\item{accession_pattern}{A string regex. Only used when \code{accession_column} is set to \code{"guess"}. In this
case, the function will find columns containing regex strings from \code{accession_pattern}.
Defaults to standard naming convention patterns for UniProt IDs.}
\item{compute_razor_protein}{If set to \code{TRUE}, then the razor protein for each peptide will be computed.
See details for information about razor proteins.}
\item{pep2pro}{A character value for the filename or a \code{data.frame}/\code{data.table} with the peptide to protein table.
Optional if \code{accession_column} is provided.}
\item{pep2pro_peptide_column}{A character value for the peptide column in \code{pep2pro}.
By default, the function will guess the columns similarly to \code{peptide_column_PTMs} or
\code{peptide_column_no_PTMs}, depending on \code{annotate_peptide_by}.}
\item{pep2pro_accession_column}{A character value for the protein accessions IDs column in \code{pep2pro}.
By default, the function will guess the column similarly to how \code{accession_column} is guessed.}
\item{pep2pro_accession_pattern}{See \code{accession_pattern}.}
\item{pep2taxon}{A character value for the filename or a \code{data.frame}/\code{data.table} with the peptide to taxon table.}
\item{pep2taxon_peptide_column}{A character value for the peptide column in \code{pep2taxon}.
By default, the function will guess the columns similarly to \code{peptide_column_PTMs} or
\code{peptide_column_no_PTMs}, depending on \code{annotate_peptide_by}.}
\item{rank_columns}{A character vector for the phylogenetic rank columns in \code{pep2taxon}. By default,
the function will look for columns with the names `superkingdom`, `kingdom`, `subkingdom`, `superphylum`,
`phylum`, `subphylum`, `superclass`, `class`, `subclass`, `infraclass`, `superorder`, `order`, `suborder`,
`infraorder`, `parvorder`, `superfamily`, `family`, `subfamily`, `tribe`, `subtribe`, `genus`, `subgenus`,
`species group`, `species subgroup`, `species`, `subspecies`, `varietas`, and `forma`.}
\item{pro2func}{A character value for the filename or a \code{data.frame}/\code{data.table} with the protein to function table.}
\item{pro2func_accession_column}{A character value for the protein accessions IDs column in \code{pro2func}.
By default, the function will guess the column similarly to how \code{accession_column} is guessed.}
\item{pro2func_accession_pattern}{See \code{accession_pattern}.}
\item{function_columns}{A character vector for the protein function columns in \code{pro2func}.
By default, the function will guess the columns by looking for columns starting with common functional annotation databases such as
KEGG, BRITE, GO, COG, and NOG.}
\item{feature_type_column}{A character value specifying the name of the column containing the type of feature
the heavy peptide was identified/quantified from. If using results from MetaProSIP, the reference used for heavy
peptide identification can be either from a feature (i.e. the group of peaks in the retention time and mass to
charge ratio dimension belonging to a single peptide entity) or from a pseudo-feature (i.e. the theoretical position
of the unlabeled feature using sequence information only).}
\item{feature_type}{A character vector for the types of feature contained in \code{feature_type_column}.
Defaults to \code{"feature"} and \code{"id"} (i.e. the pseudo-feature).}
\item{progress}{If \code{TRUE}, then progress is printed.}
\item{trace}{If \code{TRUE}, then the log is printed.}
\item{annotate_by_peptide_with_PTMs}{A logical value that specifies whether functional and taxonomic annotation
is done using peptide sequences with PTMs, \code{TRUE}, or without, \code{FALSE}. Only used when peptide_centric
is set to \code{TRUE}.}
}
\value{
Returns an object of class MetaProfiler.
}
\description{
Converts the results obtained from protein-SIP experiment into a MetaProfiler class. If using result
files, then the extension must be either be a csv or tsv.
}
\details{
# Labeling Ratio
\code{labeling_ratio_name} denotes the relative ratio between the light peptide and the estimated
intensity of the heavy peptide. When using an unlabeled protein-spike in, LR measures the proportion of proteins
that are produced using the heavy stable isotope relative to the protein at \code{time_zero}. By taking this
measure over time, the rate of newly synthesized proteins that incorporate the stable isotope can be estimated.
Labeling ratio is calculated using equation:
\deqn{(IH)/(IL+IH),}
where \eqn{IL} is the sum of the intensities of the unlabeled peptides or proteins and \eqn{IH} is the sum of the
intensities of the heavy peptides or proteins.
# Elemental Flux
\code{incorporation_name} describes the elemental flux of the isotope, which is measured using the average proportion of the stable isotope
incorporated in a peptide of interest. By characterizing the functional and taxonomic origin of the peptide,
it gives insight on how and where the stable isotopic substrate is being converted into biomass. Thus, measuring
the elemental flux can predict where this substrate is limited. Incorporation is calculated using equation:
\deqn{(H-M)/(F-M),}
where \eqn{H} is the m/z position at the center of the predicted isotopic distribution of the heavy peptide,
\eqn{M} is the monoisotopic peak of the light peptide, and \eqn{F} is the m/z position of the fully labeled peptide.
}
\examples{
}