v3.16.2 - 2019.08.15

jpfeuffer · Aug 15, 2019 · 0ecfeca · 0ecfeca
1 parent 7cce48c
commit 0ecfeca
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 17 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: MSstats
 Title: Protein Significance Analysis in DDA, SRM and DIA for Label-free
         or Label-based Proteomics Experiments
 Version: 3.16.2
-Date: 2019-08-08
+Date: 2019-08-15
 Description: A set of tools for statistical relative protein significance analysis in DDA, SRM and DIA experiments.
 Authors@R: c(
   person("Meena","Choi", , "[email protected]", c("aut", "cre")),

diff --git a/R/DataProcess.R b/R/DataProcess.R
@@ -2370,17 +2370,29 @@ dataProcess  <-  function(raw,
   		## how to decide top3 for DIA?
         work$remove <- FALSE
 
-		temp1 <- aggregate(INTENSITY~PROTEIN+FEATURE,data=work, function(x) mean(x, na.rm=TRUE))
-
-		temp2 <- split(temp1, temp1$PROTEIN)
+        worktemp <- work[!is.na(work$ABUNDANCE) & work$ABUNDANCE != 0, ]
+
+        ## updated on 2019.08.09, due to big memory consumption for lapply and unlist
+		#temp1 <- aggregate(INTENSITY~PROTEIN+FEATURE, data=work, function(x) mean(x, na.rm=TRUE))
+
+        #temp2 <- split(temp1, temp1$PROTEIN)
 
-		temp3 <- lapply(temp2, function(x) { 
-			x <- x[order(x$INTENSITY, decreasing=TRUE), ]
-			x <- x$FEATURE[1:3]
-			})
+		#temp3 <- lapply(tmp2, function(x) { 
+		#	x <- x[order(x$INTENSITY, decreasing=TRUE), ]
+		#	x <- x$FEATURE[1:3]
+		#	})
 
-		selectfeature <- unlist(temp3, use.names=FALSE)
+		#selectfeature <- unlist(temp3, use.names=FALSE)
+
+		temp1 <- worktemp %>% group_by(PROTEIN, FEATURE) %>%
+		    summarize(mean = mean(INTENSITY, na.rm = TRUE)) %>%
+		    group_by(PROTEIN) %>%
+		    filter(row_number(desc(mean)) <= 3)  ## updated on 2019.08.15, in order to get first row if there are ties.
+		    #top_n(3)
+
+		selectfeature <- temp1$FEATURE
 		selectfeature <- selectfeature[!is.na(selectfeature)]
+		## end 2019.08.09
 
 		## get subset
 		work[-which(work$FEATURE %in% selectfeature), 'remove']	<- TRUE
@@ -2403,17 +2415,28 @@ dataProcess  <-  function(raw,
 	    work$remove <- FALSE
 
         worktemp <- work[!is.na(work$ABUNDANCE) & work$ABUNDANCE != 0, ]
-	    temp1 <- aggregate(INTENSITY ~ PROTEIN+FEATURE, data=worktemp, function(x) mean(x, na.rm=TRUE))
+
+        ## updated on 2019.08.09, due to big memory consumption for lapply and unlist
+	    #temp1 <- aggregate(INTENSITY ~ PROTEIN+FEATURE, data=worktemp, function(x) mean(x, na.rm=TRUE))
 
-	    temp2 <- split(temp1, temp1$PROTEIN)
+	    #temp2 <- split(temp1, temp1$PROTEIN)
 
-	    temp3 <- lapply(temp2, function(x) { 
-	        x <- x[order(x$INTENSITY, decreasing=TRUE), ]
-	        x <- x$FEATURE[1:n_top_feature]
-	    })
+	    #temp3 <- lapply(temp2, function(x) { 
+	    #    x <- x[order(x$INTENSITY, decreasing=TRUE), ]
+	    #    x <- x$FEATURE[1:n_top_feature]
+	    #})
 
-	    selectfeature <- unlist(temp3, use.names=FALSE)
+	    #selectfeature <- unlist(temp3, use.names=FALSE)
+
+	    temp1 <- worktemp %>% group_by(PROTEIN, FEATURE) %>%
+	        summarize(mean = mean(INTENSITY, na.rm = TRUE)) %>%
+	        group_by(PROTEIN) %>%
+	        filter(row_number(desc(mean)) <= n_top_feature) ## updated on 2019.08.15, in order to get first row if there are ties.
+	    #top_n(n_top_feature)
+
+	    selectfeature <- temp1$FEATURE
 	    selectfeature <- selectfeature[!is.na(selectfeature)]
+	    ## end 2019.08.09
 
 	    ## get subset
 	    work[-which(work$FEATURE %in% selectfeature), 'remove']	<- TRUE

diff --git a/R/MaxQtoMSstatsFormat.R b/R/MaxQtoMSstatsFormat.R
@@ -58,7 +58,7 @@ MaxQtoMSstatsFormat <- function(evidence,
     annotinfo <- unique(annot[, c("Raw.file", "Condition", 'BioReplicate')])	
 
 	## Each Run should has unique information about condition and bioreplicate
-	check.annot <- xtabs(~Run, annotinfo)
+	check.annot <- xtabs(~Raw.file, annotinfo)
 	if ( any(check.annot > 1) ) {
 	    stop('** Please check annotation. Each MS run (Raw.file) can\'t have multiple conditions or BioReplicates.' )
 	}

diff --git a/inst/NEWS b/inst/NEWS
@@ -1,3 +1,10 @@
+CHANGES IN VERSION 3.16.2 [2019-08-15]
+-------------------------
+    BUG FIXES
+	 - dataProcess, featureSubset='top3' or 'topN' with n_top_feature : fix the bug for featureSubset = 'top3' and 'topN' option and improve the memory consumption.
+	 - groupComparison : remove the warning message for singularity issue.
+
+
 CHANGES IN VERSION 3.16.1 [2019-05-07]
 -------------------------
     BUG FIXES