diff xcms_get_alignment_eic.r @ 49:f772a5caa86a

Added more options and better documentation. Added MsClust support for parsing XCMS alignment results. Improved output reports for XCMS wrappers. New tools.
author pieter.lukasse@wur.nl
date Wed, 10 Dec 2014 22:03:27 +0100
parents
children 35f506f30ae4
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms_get_alignment_eic.r	Wed Dec 10 22:03:27 2014 +0100
@@ -0,0 +1,153 @@
+# shows all alignment results in a rt region
+## read args:
+args <- commandArgs(TRUE)
+# xset data:
+args.xsetData <- args[1]
+
+args.rtStart  <- strtoi(args[2])
+args.rtEnd <- strtoi(args[3])
+
+# limit max diff to 600 and minNrSamples to at least 2 :
+if (args.rtEnd - args.rtStart > 600)
+	stop("The RT window should be <= 600")
+
+args.minNrSamples <- strtoi(args[4]) #min nr samples
+if (args.minNrSamples < 2)
+	stop("Set 'Minimum number of samples' to 2 or higher")
+
+
+args.sampleNames <- strsplit(args[5], ",")[[1]]
+# trim leading and trailing spaces:
+args.sampleNames <- gsub("^\\s+|\\s+$", "", args.sampleNames)
+
+## report files
+args.htmlReportFile <- args[6]
+args.htmlReportFile.files_path <- args[7]
+
+
+if (length(args) == 8) 
+{
+	args.outLogFile <- args[8]
+	# suppress messages:
+	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
+	msg <- file(args.outLogFile, open="wt")
+	sink(msg, type="message") 
+	sink(msg, type="output")
+}
+
+
+
+tryCatch(
+        {
+	        library(metaMS)
+	
+			# load the xset data :
+			xsetData <- readRDS(args.xsetData)
+			# if here to support both scenarios:
+			if ("xcmsSet" %in% slotNames(xsetData) )
+			{
+				xsetData <- xsetData@xcmsSet
+			}
+			
+			# report
+			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
+			message(paste("\nGenerating report.........in ", args.htmlReportFile.files_path))
+			
+			write(paste("<html><body><h1>Extracted Ion Chromatograms (EIC) of alignments with peaks in ",args.minNrSamples, " or more samples</h1>"),
+			      file=args.htmlReportFile)
+			
+			gt <- groups(xsetData)
+			message("\nParsed groups... ")
+			groupidx1 <- which(gt[,"rtmed"] > args.rtStart & gt[,"rtmed"] < args.rtEnd & gt[,"npeaks"] >= args.minNrSamples)  # this should be only on samples selected
+			
+			if (length(groupidx1) > 0)
+			{
+				message("\nGetting EIC... ")
+				eiccor <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames)
+				#eicraw <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames, rt = "raw")
+				
+				#sampleNamesIdx <- which(sampnames(LC$xset@xcmsSet) %in% args.sampleNames, arr.ind = TRUE)
+				#or (from bioconductor code for getEIC):  NB: this is assuming sample indexes used in data are ordered after the order of sample names!!
+				sampNames <- sampnames(xsetData)
+				sampleNamesIdx <- match( args.sampleNames, sampNames)
+				message(paste("Samples: ", sampleNamesIdx))
+				
+				#TODO html <- paste(html, "<table><tbody>")
+				message(paste("\nPlotting figures... "))
+				
+				#get the mz list (interestingly, this [,"mz"] is relatively slow):
+				peakMzList <- xsetData@peaks[,"mz"]
+				peakSampleList <- xsetData@peaks[,"sample"]
+				#signal to noise list:
+				peakSnList <- xsetData@peaks[,"sn"]
+				
+				message(paste("Total nr of peaks: ", length(peakMzList)))
+				
+				for (i in 1:length(groupidx1)) 
+				{
+					groupMembers <- xsetData@groupidx[[groupidx1[i]]]
+					
+					groupMzList <- ""
+					groupSampleList <- ""
+					finalGroupSize <- 0
+					
+					for (j in 1:length(groupMembers))
+					{
+						#get only the peaks from the selected samples:
+						memberSample <- peakSampleList[groupMembers[j]]
+						memberSn <- peakSnList[groupMembers[j]]
+						if (!is.na(memberSn) && memberSample %in% sampleNamesIdx)
+						{
+							message(paste("Group: ", groupidx1[i], " / Member sample: ", memberSample))
+							memberMz <- peakMzList[groupMembers[j]]
+							
+							
+							if (finalGroupSize == 0)
+							{
+								groupMzList <- memberMz
+								groupSampleList <- sampNames[memberSample]
+							} else {
+								groupMzList <- paste(groupMzList,",",memberMz, sep="")
+								groupSampleList <- paste(groupSampleList,",",sampNames[memberSample], sep="")
+							}
+							
+							finalGroupSize <- finalGroupSize +1	
+						}
+					}
+					# here we do the real check on group size and only the groups that have enough
+					# members with signal to noise > 0 will be plotted here:
+					if (finalGroupSize >= args.minNrSamples)
+					{
+						message(paste("Plotting figure ",i, " of max ", length(groupidx1)," figures... "))
+						
+						figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="")
+						write(paste("<img src='", "figure", i,".png' />", sep="") ,
+			      				file=args.htmlReportFile, append=TRUE)
+			
+						png( figureName, type="cairo" ) 
+						plot(eiccor, xsetData, groupidx = i)
+						devname = dev.off()
+						
+						write(paste("<p>Alignment id: [", groupidx1[i], "]. m/z list of peaks in alignment with signal/noise <> NA:<br/>", groupMzList,"</p>", sep="") ,
+			      				file=args.htmlReportFile, append=TRUE)
+			      		write(paste("<p>m/z values found in the following samples respectively: <br/>", groupSampleList,"</p>", sep="") ,
+			      				file=args.htmlReportFile, append=TRUE)
+					}
+				}
+				
+			}
+			
+			write("</body><html>",
+			      	file=args.htmlReportFile, append=TRUE)
+			message("finished generating report")
+			# unlink(args.htmlReportFile)
+			cat("\nWarnings================:\n")
+			str( warnings() ) 
+		},
+        error=function(cond) {
+            sink(NULL, type="message") # default setting
+			sink(stderr(), type="output")
+            message("\nERROR: ===========\n")
+            print(cond)
+        }
+    )