view xcms_get_alignment_eic.r @ 49:f772a5caa86a

Added more options and better documentation. Added MsClust support for parsing XCMS alignment results. Improved output reports for XCMS wrappers. New tools.
author pieter.lukasse@wur.nl
date Wed, 10 Dec 2014 22:03:27 +0100
parents
children 35f506f30ae4
line wrap: on
line source

# shows all alignment results in a rt region
## read args:
args <- commandArgs(TRUE)
# xset data:
args.xsetData <- args[1]

args.rtStart  <- strtoi(args[2])
args.rtEnd <- strtoi(args[3])

# limit max diff to 600 and minNrSamples to at least 2 :
if (args.rtEnd - args.rtStart > 600)
	stop("The RT window should be <= 600")

args.minNrSamples <- strtoi(args[4]) #min nr samples
if (args.minNrSamples < 2)
	stop("Set 'Minimum number of samples' to 2 or higher")


args.sampleNames <- strsplit(args[5], ",")[[1]]
# trim leading and trailing spaces:
args.sampleNames <- gsub("^\\s+|\\s+$", "", args.sampleNames)

## report files
args.htmlReportFile <- args[6]
args.htmlReportFile.files_path <- args[7]


if (length(args) == 8) 
{
	args.outLogFile <- args[8]
	# suppress messages:
	# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
	msg <- file(args.outLogFile, open="wt")
	sink(msg, type="message") 
	sink(msg, type="output")
}



tryCatch(
        {
	        library(metaMS)
	
			# load the xset data :
			xsetData <- readRDS(args.xsetData)
			# if here to support both scenarios:
			if ("xcmsSet" %in% slotNames(xsetData) )
			{
				xsetData <- xsetData@xcmsSet
			}
			
			# report
			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE)
			message(paste("\nGenerating report.........in ", args.htmlReportFile.files_path))
			
			write(paste("<html><body><h1>Extracted Ion Chromatograms (EIC) of alignments with peaks in ",args.minNrSamples, " or more samples</h1>"),
			      file=args.htmlReportFile)
			
			gt <- groups(xsetData)
			message("\nParsed groups... ")
			groupidx1 <- which(gt[,"rtmed"] > args.rtStart & gt[,"rtmed"] < args.rtEnd & gt[,"npeaks"] >= args.minNrSamples)  # this should be only on samples selected
			
			if (length(groupidx1) > 0)
			{
				message("\nGetting EIC... ")
				eiccor <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames)
				#eicraw <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames, rt = "raw")
				
				#sampleNamesIdx <- which(sampnames(LC$xset@xcmsSet) %in% args.sampleNames, arr.ind = TRUE)
				#or (from bioconductor code for getEIC):  NB: this is assuming sample indexes used in data are ordered after the order of sample names!!
				sampNames <- sampnames(xsetData)
				sampleNamesIdx <- match( args.sampleNames, sampNames)
				message(paste("Samples: ", sampleNamesIdx))
				
				#TODO html <- paste(html, "<table><tbody>")
				message(paste("\nPlotting figures... "))
				
				#get the mz list (interestingly, this [,"mz"] is relatively slow):
				peakMzList <- xsetData@peaks[,"mz"]
				peakSampleList <- xsetData@peaks[,"sample"]
				#signal to noise list:
				peakSnList <- xsetData@peaks[,"sn"]
				
				message(paste("Total nr of peaks: ", length(peakMzList)))
				
				for (i in 1:length(groupidx1)) 
				{
					groupMembers <- xsetData@groupidx[[groupidx1[i]]]
					
					groupMzList <- ""
					groupSampleList <- ""
					finalGroupSize <- 0
					
					for (j in 1:length(groupMembers))
					{
						#get only the peaks from the selected samples:
						memberSample <- peakSampleList[groupMembers[j]]
						memberSn <- peakSnList[groupMembers[j]]
						if (!is.na(memberSn) && memberSample %in% sampleNamesIdx)
						{
							message(paste("Group: ", groupidx1[i], " / Member sample: ", memberSample))
							memberMz <- peakMzList[groupMembers[j]]
							
							
							if (finalGroupSize == 0)
							{
								groupMzList <- memberMz
								groupSampleList <- sampNames[memberSample]
							} else {
								groupMzList <- paste(groupMzList,",",memberMz, sep="")
								groupSampleList <- paste(groupSampleList,",",sampNames[memberSample], sep="")
							}
							
							finalGroupSize <- finalGroupSize +1	
						}
					}
					# here we do the real check on group size and only the groups that have enough
					# members with signal to noise > 0 will be plotted here:
					if (finalGroupSize >= args.minNrSamples)
					{
						message(paste("Plotting figure ",i, " of max ", length(groupidx1)," figures... "))
						
						figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="")
						write(paste("<img src='", "figure", i,".png' />", sep="") ,
			      				file=args.htmlReportFile, append=TRUE)
			
						png( figureName, type="cairo" ) 
						plot(eiccor, xsetData, groupidx = i)
						devname = dev.off()
						
						write(paste("<p>Alignment id: [", groupidx1[i], "]. m/z list of peaks in alignment with signal/noise <> NA:<br/>", groupMzList,"</p>", sep="") ,
			      				file=args.htmlReportFile, append=TRUE)
			      		write(paste("<p>m/z values found in the following samples respectively: <br/>", groupSampleList,"</p>", sep="") ,
			      				file=args.htmlReportFile, append=TRUE)
					}
				}
				
			}
			
			write("</body><html>",
			      	file=args.htmlReportFile, append=TRUE)
			message("finished generating report")
			# unlink(args.htmlReportFile)
			cat("\nWarnings================:\n")
			str( warnings() ) 
		},
        error=function(cond) {
            sink(NULL, type="message") # default setting
			sink(stderr(), type="output")
            message("\nERROR: ===========\n")
            print(cond)
        }
    )