Mercurial > repos > pieterlukasse > prims_metabolomics
changeset 49:f772a5caa86a
Added more options and better documentation.
Added MsClust support for parsing XCMS alignment results.
Improved output reports for XCMS wrappers.
New tools.
author | pieter.lukasse@wur.nl |
---|---|
date | Wed, 10 Dec 2014 22:03:27 +0100 |
parents | 26b93438f30e |
children | 93102202ab79 |
files | MsClust.jar README.rst metaMS_cmd_annotate.r metaMS_cmd_interface.r metaMS_cmd_pick_and_group.r metams_lcms_annotate.xml metams_lcms_pick_and_group.xml msclust.xml next.r static/images/CAMERA_results.png static/images/massEIC.png xcms_differential_analysis.r xcms_differential_analysis.xml xcms_get_alignment_eic.r xcms_get_alignment_eic.xml xcms_get_mass_eic.r xcms_get_mass_eic.xml |
diffstat | 17 files changed, 1036 insertions(+), 188 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Wed Nov 19 15:11:45 2014 +0100 +++ b/README.rst Wed Dec 10 22:03:27 2014 +0100 @@ -19,6 +19,8 @@ ============== ====================================================================== Date Changes -------------- ---------------------------------------------------------------------- +December 2014 * Added MsClust support for parsing XCMS alignment results. + * Improved output reports for XCMS wrappers. November 2014 * Added XCMS related tool wrappers (for metaMS and diffreport) September 2014 * Added new membership cutoff option for final clusters in MsClust * Improved MsClust memory usage for large datasets
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metaMS_cmd_annotate.r Wed Dec 10 22:03:27 2014 +0100 @@ -0,0 +1,83 @@ +## read args: +args <- commandArgs(TRUE) +## the constructed DB, e.g. "E:/Rworkspace/metaMS/data/LCDBtest.RData" +args.constructedDB <- args[1] +## data file in xset format: +args.xsetData <- args[2] +## settings file, e.g. "E:/Rworkspace/metaMS/data/settings.r", should contain assignment to an object named "customMetaMSsettings" +args.settings <- args[3] + +## output file names, e.g. "E:/Rworkspace/metaMS/data/out.txt" +args.outAnnotationTable <- args[4] + +## report files +args.htmlReportFile <- args[5] +args.htmlReportFile.files_path <- args[6] + +if (length(args) == 7) +{ + args.outLogFile <- args[7] + # suppress messages: + # Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888 + msg <- file(args.outLogFile, open="wt") + sink(msg, type="message") + sink(msg, type="output") +} + +cat("\nSettings used===============:\n") +cat(readChar(args.settings, 1e5)) + + +tryCatch( + { + library(metaMS) + + ## load the constructed DB : + tempEnv <- new.env() + testDB <- load(args.constructedDB, envir=tempEnv) + xsetData <- readRDS(args.xsetData) + + ## load settings "script" into "customMetaMSsettings" + source(args.settings, local=tempEnv) + message(paste(" loaded : ", args.settings)) + + # Just to highlight: if you want to use more than one + # trigger runLC: + LC <- runLC(xset=xsetData, settings = tempEnv[["customMetaMSsettings"]], DB = tempEnv[[testDB[1]]]$DB, nSlaves=20, returnXset = TRUE) + + # write out runLC annotation results: + write.table(LC$PeakTable, args.outAnnotationTable, sep="\t", row.names=FALSE) + + # the used constructed DB (write to log): + cat("\nConstructed DB info===============:\n") + str(tempEnv[[testDB[1]]]$Info) + cat("\nConstructed DB table===============:\n") + if (length(args) == 7) + { + write.table(tempEnv[[testDB[1]]]$DB, args.outLogFile, append=TRUE, row.names=FALSE) + write.table(tempEnv[[testDB[1]]]$Reftable, args.outLogFile, sep="\t", append=TRUE, row.names=FALSE) + } + + message("\nGenerating report.........") + # report + dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE) + html <- "<html><body><h1>Summary of annotation results:</h1>" + nrTotalFeatures <- nrow(LC$PeakTable) + nrAnnotatedFeatures <- nrow(LC$Annotation$annotation.table) + html <- paste(html,"<p>Total nr of features: ", nrTotalFeatures,"</p>", sep="") + html <- paste(html,"<p>Total nr of annotated features: ", nrAnnotatedFeatures,"</p>", sep="") + + html <- paste(html,"</body><html>") + message("finished generating report") + write(html,file=args.htmlReportFile) + # unlink(args.htmlReportFile) + cat("\nWarnings================:\n") + str( warnings() ) + }, + error=function(cond) { + sink(NULL, type="message") # default setting + sink(stderr(), type="output") + message("\nERROR: ===========\n") + print(cond) + } + )
--- a/metaMS_cmd_interface.r Wed Nov 19 15:11:45 2014 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,99 +0,0 @@ -## read args: -args <- commandArgs(TRUE) -## the constructed DB, e.g. "E:/Rworkspace/metaMS/data/LCDBtest.RData" -args.constructedDB <- args[1] -## data files, e.g. "E:/Rworkspace/metaMS/data/data.zip" (with e.g. .CDF files) and unzip output dir, e.g. "E:/" -args.dataZip <- args[2] -args.zipExtrDir <- paste(args[2],"dir/") -## settings file, e.g. "E:/Rworkspace/metaMS/data/settings.r", should contain assignment to an object named "customMetaMSsettings" -args.settings <- args[3] - -## output file names, e.g. "E:/Rworkspace/metaMS/data/out.txt" -args.outAnnotationTable <- args[4] -args.outLogFile <- args[5] -args.xsetOut <- args[6] - -## report files -args.htmlReportFile <- args[7] -args.htmlReportFile.files_path <- args[8] - -# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888 -msg <- file(args.outLogFile, open="wt") -sink(msg, type="message") -sink(msg, type="output") - -cat("\nSettings used===============:\n") -cat(readChar(args.settings, 1e5)) - - -tryCatch( - { - library(metaMS) - - ## load the constructed DB : - tempEnv <- new.env() - testDB <- load(args.constructedDB, envir=tempEnv) - - ## load the data files from a zip file - files <- unzip(args.dataZip, exdir=args.zipExtrDir) - - ## load settings "script" into "customMetaMSsettings" - source(args.settings, local=tempEnv) - message(paste(" loaded : ", args.settings)) - - # Just to highlight: if you want to use more than one - # trigger runLC: - LC <- runLC(files, settings = tempEnv[["customMetaMSsettings"]], DB = tempEnv[[testDB[1]]]$DB, nSlaves=20, returnXset = TRUE) - - # write out runLC annotation results: - write.table(LC$Annotation$annotation.table, args.outAnnotationTable, sep="\t", row.names=FALSE) - - # the used constructed DB (write to log): - cat("\nConstructed DB info===============:\n") - str(tempEnv[[testDB[1]]]$Info) - cat("\nConstructed DB table===============:\n") - write.table(tempEnv[[testDB[1]]]$DB, args.outLogFile, append=TRUE, row.names=FALSE) - write.table(tempEnv[[testDB[1]]]$Reftable, args.outLogFile, sep="\t", append=TRUE, row.names=FALSE) - # save xset as rdata: - xsetData <- LC$xset@xcmsSet - saveRDS(xsetData, file=args.xsetOut) - - message("\nGenerating report.........") - # report - dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE) - setwd(file.path(args.htmlReportFile.files_path)) - html <- "<html><body><h1>Extracted Ion Chromatograms of groups with more than 3 peaks</h1>" - - LC$xset@xcmsSet - gt <- groups(LC$xset@xcmsSet) - colnames(gt) - groupidx1 <- which(gt[,"rtmed"] > 0 & gt[,"rtmed"] < 3000 & gt[,"npeaks"] > 3) - if (length(groupidx1) > 0) - { - eiccor <- getEIC(LC$xset@xcmsSet, groupidx = c(groupidx1)) - eicraw <- getEIC(LC$xset@xcmsSet, groupidx = c(groupidx1), rt = "raw") - for (i in 1:length(groupidx1)) - { - figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="") - html <- paste(html,"<img src='", "figure", i,".png' />", sep="") - png( figureName, type="cairo" ) - plot(eiccor, LC$xset@xcmsSet, groupidx = i) - devname = dev.off() - } - } - - - html <- paste(html,"</body><html>") - message("finished generating report") - write(html,file=args.htmlReportFile) - # unlink(args.htmlReportFile) - cat("\nWarnings================:\n") - str( warnings() ) - }, - error=function(cond) { - sink(NULL, type="message") # default setting - sink(stderr(), type="output") - message("\nERROR: ===========\n") - print(cond) - } - )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metaMS_cmd_pick_and_group.r Wed Dec 10 22:03:27 2014 +0100 @@ -0,0 +1,89 @@ +## read args: +args <- commandArgs(TRUE) +## data files, e.g. "E:/Rworkspace/metaMS/data/data.zip" (with e.g. .CDF files) and unzip output dir, e.g. "E:/" +args.dataZip <- args[1] +args.zipExtrDir <- sub("\\.","_",paste(args[1],"dir", sep="")) +dir.create(file.path(args.zipExtrDir), showWarnings = FALSE, recursive = TRUE) +## settings file, e.g. "E:/Rworkspace/metaMS/data/settings.r", should contain assignment to an object named "customMetaMSsettings" +args.settings <- args[2] + +## output file names, e.g. "E:/Rworkspace/metaMS/data/out.txt" +args.outPeakTable <- args[3] +args.xsetOut <- args[4] + +## report files +args.htmlReportFile <- args[5] +args.htmlReportFile.files_path <- args[6] + + +if (length(args) == 7) +{ + args.outLogFile <- args[7] + # suppress messages: + # Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888 + msg <- file(args.outLogFile, open="wt") + sink(msg, type="message") + sink(msg, type="output") +} + +cat("\nSettings used===============:\n") +cat(readChar(args.settings, 1e5)) + + +tryCatch( + { + library(metaMS) + + ## load the data files from a zip file + files <- unzip(args.dataZip, exdir=args.zipExtrDir) + + ## load settings "script" into "customMetaMSsettings" + tempEnv <- new.env() + source(args.settings, local=tempEnv) + message(paste(" loaded : ", args.settings)) + allSettings <- tempEnv[["customMetaMSsettings"]] + + # trigger runLC: + LC <- runLC(files, settings = allSettings, nSlaves=20, returnXset = TRUE) + + # write out runLC annotation results: + write.table(LC$PeakTable, args.outPeakTable, sep="\t", row.names=FALSE) + + # save xset as rdata: + xsAnnotatePreparedData <- LC$xset + saveRDS(xsAnnotatePreparedData, file=args.xsetOut) + + message("\nGenerating report.........") + # report + dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE) + html <- "<html><body><h1>Info on alignment quality </h1>" + # TODO add (nr and mass error) and group size + + message("\nPlotting figures... ") + figureName <- paste(args.htmlReportFile.files_path, "/figure_retcor.png", sep="") + html <- paste(html,"<img src='figure_retcor.png' /><br/>", sep="") + png( figureName, type="cairo", width=1100,height=600 ) + retcor(LC$xset@xcmsSet, method="peakgroups", plottype = "mdevden") + html <- paste(html,"<a>*NB: retention time correction plot based on 'peakgroups' option with default settings. This is not the plot matching the exact settings used in the run, + but just intended to give a rough estimate of the retention time shifts present in the data. A more accurate plot will be available once + this option is added in metaMS API. </a><br/>", sep="") + devname = dev.off() + + + gt <- groups(LC$xset@xcmsSet) + groupidx1 <- which(gt[,"rtmed"] > 0 & gt[,"rtmed"] < 3000 & gt[,"npeaks"] > 3) + + html <- paste(html,"</body><html>") + message("finished generating report") + write(html,file=args.htmlReportFile) + # unlink(args.htmlReportFile) + cat("\nWarnings================:\n") + str( warnings() ) + }, + error=function(cond) { + sink(NULL, type="message") # default setting + sink(stderr(), type="output") + message("\nERROR: ===========\n") + print(cond) + } + )
--- a/metams_lcms_annotate.xml Wed Nov 19 15:11:45 2014 +0100 +++ b/metams_lcms_annotate.xml Wed Dec 10 22:03:27 2014 +0100 @@ -1,58 +1,28 @@ -<tool id="metams_lcms_annotate" name="METAMS-LC/MS Annotate" version="0.0.3"> - <description> Runs metaMS process for LC/MS feature grouping and annotation</description> +<tool id="metams_lcms_annotate" name="METAMS-LC/MS Annotate" version="0.0.4"> + <description> Runs metaMS process for LC/MS feature annotation</description> <requirements> <requirement type="package" version="3.1.1">R_bioc_metams</requirement> </requirements> <command interpreter="Rscript"> - metaMS_cmd_interface.r + metaMS_cmd_annotate.r $constructed_db - $data_files + $xsetData $customMetaMSsettings $outputFile - $outputLog - $xsetOut $htmlReportFile $htmlReportFile.files_path + $outputLog </command> <inputs> <param name="constructed_db" type="select" label="Constructed DB" help="Reference annotation database generated from matching measurements of a mixture of chemical standards against a manually validated reference table which contains the key analytical information for each standard." dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/metaMS")'/> - <param name="data_files" type="data" format="prims.fileset.zip" label="Data files (.zip file with CDFs)" help=".zip file containing the CDF files of the new measurements"/> - - + <param name="xsetData" type="data" format="rdata" label="xcmsSet data file (xset RDATA)" help="E.g. output data file resulting from METAMS 'feature picking, aligning and grouping' run"/> <param name="protocolName" type="text" size="30" label="protocolName" value="Synapt.QTOF.RP" help="protocolName"/> - <param name="method" type="select" size="30" label="PEAK PICKING method ====================================================="> - <option value="matchedFilter" selected="true">matchedFilter</option> - </param> - <param name="step" type="float" size="10" value="0.05" label="step" help="step"/> - <param name="fwhm" type="integer" size="10" value="20" label="fwhm" help="fwhm" /> - <param name="snthresh" type="integer" size="10" value="4" label="snthresh" help="snthresh" /> - <param name="max" type="integer" size="10" value="50" label="max" help="max" /> - - <param name="min_class_fraction" type="float" size="10" value="0.3" label="ALIGNMENT min.class.fraction =====================================================" help="min.class.fraction"/> - <param name="min_class_size" type="integer" size="10" value="3" label="min.class.size" help="min.class.size" /> - <param name="mzwid" type="float" size="10" value="0.1" label="mzwid" help="mzwid"/> - <param name="bws" type="text" size="10" value="30,10" label="bws" help="bws"/> - <param name="missingratio" type="float" size="10" value="0.2" label="missingratio" help="missingratio"/> - <param name="extraratio" type="float" size="10" value="0.1" label="extraratio" help="extraratio"/> - <param name="retcormethod" type="select" size="30" label="retcormethod" help="retcormethod"> - <option value="linear" selected="true">linear</option> - </param> - <param name="retcorfamily" type="select" size="30" label="retcorfamily" help="retcorfamily"> - <option value="symmetric" selected="true">symmetric</option> - </param> - <param name="fillPeaks" type="select" size="30" label="fillPeaks" help="fillPeaks"> - <option value="TRUE" selected="true">Yes</option> - <option value="FALSE">No</option> - </param> - <param name="perfwhm" type="float" size="10" value="0.6" label="CAMERA perfwhm =====================================================" help="perfwhm"/> - <param name="cor_eic_th" type="float" size="10" value="0.7" label="cor_eic_th" help="cor_eic_th" /> - <param name="ppm" type="float" size="10" value="5.0" label="ppm" help="ppm" /> - <param name="rtdiff" type="float" size="10" value="1.5" label="MATCH2DB rtdiff =====================================================" help="rtdiff"/> + <param name="rtdiff" type="float" size="10" value="1.5" label="rtdiff" help="rtdiff"/> <param name="rtval" type="float" size="10" value="0.1" label="rtval" help="rtval" /> <param name="mzdiff" type="float" size="10" value="0.005" label="mzdiff" help="mzdiff" /> <param name="match2DB_ppm" type="float" size="10" value="5.0" label="ppm" help="ppm" /> @@ -64,27 +34,7 @@ <configfile name="customMetaMSsettings">## start comment ## metaMS process settings customMetaMSsettings <- metaMSsettings(protocolName = "${protocolName}", - chrom = "LC", - PeakPicking = list( - method = "${method}", - step = ${step}, - fwhm = ${fwhm}, - snthresh = ${snthresh}, - max = ${max}), - Alignment = list( - min.class.fraction = ${min_class_fraction}, - min.class.size = ${min_class_size}, - mzwid = ${mzwid}, - bws = c(${bws}), - missingratio = ${missingratio}, - extraratio = ${extraratio}, - retcormethod = "${retcormethod}", - retcorfamily = "${retcorfamily}", - fillPeaks = ${fillPeaks}), - CAMERA = list( - perfwhm = ${perfwhm}, - cor_eic_th = ${cor_eic_th}, - ppm= ${ppm})) + chrom = "LC") metaSetting(customMetaMSsettings, "match2DB") <- list( rtdiff = ${rtdiff}, rtval = ${rtval}, @@ -96,8 +46,7 @@ <outputs> <data name="outputFile" format="tabular" label="${tool.name} on ${on_string} - metaMS annotated file (TSV)"/> - <data name="outputLog" format="txt" label="${tool.name} on ${on_string} - metaMS LOG"/> - <data name="xsetOut" format="rdata" label="${tool.name} on ${on_string} - metaMS xcmsSet (RDATA)"/> + <data name="outputLog" format="txt" label="${tool.name} on ${on_string} - metaMS LOG" hidden="True"/> <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - metaMS report (HTML)"/> </outputs> <tests> @@ -109,12 +58,17 @@ .. class:: infomark -Runs metaMS process for LC/MS feature grouping and annotation. Parts of the metaMS process also make use of the XCMS and CAMERA tools and algorithms. -The figure below shows the main parts of the metaMS process. +Runs metaMS process for LC/MS feature annotation. +The figure below shows the main parts of the metaMS process. +This tool related to the last step of this process. .. image:: $PATH_TO_IMAGES/metaMS.png +From CAMERA documentation: + +.. image:: $PATH_TO_IMAGES/CAMERA_results.png + **References** If you use this Galaxy tool in work leading to a scientific publication please
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metams_lcms_pick_and_group.xml Wed Dec 10 22:03:27 2014 +0100 @@ -0,0 +1,296 @@ +<tool id="metams_lcms_pick_and_group" name="METAMS-LC/MS Pick, Align and Group" version="0.0.4"> + <description> Runs metaMS process for LC/MS feature picking, aligning and grouping</description> + <requirements> + <requirement type="package" version="3.1.1">R_bioc_metams</requirement> + </requirements> + <command interpreter="Rscript"> + metaMS_cmd_pick_and_group.r + $data_files + $customMetaMSsettings + $outputFile + $xsetOut + $htmlReportFile + $htmlReportFile.files_path + $outputLog + </command> +<inputs> + <param name="data_files" type="data" format="prims.fileset.zip" label="Data files (.zip file with CDF, mzML or mzXML files)" help=".zip file containing the CDF, mzML or mzXML files of the new measurements"/> + + <param name="protocolName" type="text" size="30" label="protocolName" value="e.g. Synapt.QTOF.RP" + help="Choose a name to give for the specific settings in the parameters below"/><!-- TODO - let user choose this --> + + + <!-- ===========NB : if peak picking, alignment OR CAMERA settings have to be reused for runGC wrapper in the future, we can use Galaxy macro expansions here + to avoid defining these parameters again in the runGC wrapper ========================= --> + <conditional name="peakPicking"> + <param name="method" type="select" size="30" label="PEAK PICKING method =====================================================" + help="matchedFilter=Feature detection in the chromatographic time domain ; centWave=Feature detection for high resolution LC/MS data"> + <option value="matchedFilter" selected="true">matchedFilter</option> + <option value="centWave" >centWave</option> + </param> + <when value="matchedFilter"> + <param name="fwhm" type="integer" size="10" value="20" label="fwhm" + help="full width at half maximum of matched filtration gaussian model peak. Only used to calculate the actual sigma" /> + <param name="sigma_denom" type="float" size="10" value="2.3548" label="sigma_denominator" + help="denominator for standard deviation (width) of matched filtration model peak (e.g. sigma = fwhm/2.3548)" /> + <param name="max" type="integer" size="10" value="50" label="max" + help="maximum number of peaks per extracted ion chromatogram" /> + <param name="snthresh" type="integer" size="10" value="4" label="snthresh" + help="signal to noise ratio cutoff" /> + <param name="step" type="float" size="10" value="0.05" label="step" + help="step size to use for profile generation"/> + <param name="steps" type="integer" size="10" value="2" label="steps" + help="number of steps to merge prior to filtration"/> + <param name="mzdiff" type="float" size="10" value="0.8" label="mzdiff" + help="minimum difference in m/z for peaks with overlapping retention times"/> + </when> + <when value="centWave"> + <param name="ppm" type="integer" size="10" value="25" label="ppm" + help="maxmial tolerated m/z deviation in consecutive scans, in ppm" /> + <param name="peakwidth" type="text" size="10" value="20,50" label="peakwidth" + help="Chromatographic peak width, given as range (min,max) in seconds" /> + <param name="snthresh" type="integer" size="10" value="10" label="snthresh" + help="signal to noise ratio cutoff" /> + <param name="prefilter" type="text" size="10" value="3,100" label="prefilter=c(k,I)" + help="Prefilter step for the first phase. Mass traces are only retained if + they contain at least k peaks with intensity > = I" /> + <param name="mzCenterFun" type="select" size="30" label="mzCenterFun" + help="Function to calculate the m/z center of the feature: wMean intensity weighted mean of the + feature m/z values, mean mean of the feature m/z values, apex use m/z value at peak apex, + wMeanApex3 intensity weighted mean of the m/z value at peak apex and the m/z value left and + right of it, meanApex3 mean of the m/z value at peak apex and the m/z value left and right of it"> + <option value="wMean" selected="true">wMean</option> + <option value="mean" >mean</option> + <option value="apex" >apex</option> + <option value="wMeanApex3" >wMeanApex3</option> + <option value="meanApex3" >meanApex3</option> + </param> + <param name="integrate" type="select" size="30" label="integrate" + help="Integration method. If =1 peak limits are found through descent + on the mexican hat filtered data, if =2 the descent is done on the real data. + Method 2 is very accurate but prone to noise, while method 1 is more robust to noise but less exact"> + <option value="1" selected="true">1</option> + <option value="2" >2</option> + </param> + <param name="mzdiff" type="float" size="10" value="-0.001" label="mzdiff" + help="minimum difference in m/z for peaks with overlapping retention times, can be negative to allow overlap" /> + <param name="fitgauss" type="integer" size="10" value="20" label="fitgauss" + help="logical, if Yes: a Gaussian is fitted to each peak" > + <option value="TRUE" selected="true">Yes</option> + <option value="FALSE" >No</option> + </param> + <param name="noise" type="integer" size="10" value="0" label="noise" + help="optional argument which is useful for data that was centroided without any intensity + threshold, centroids with intensity < noise are omitted from ROI detection" /> + </when> + </conditional> + + + <param name="min_class_fraction" type="float" size="10" value="0.3" label="ALIGNMENT min.class.fraction =====================================================" + help="Minimum fraction of samples necessary in the alignment to make it a valid alignment/group"/> + <param name="min_class_size" type="integer" size="10" value="3" label="min.class.size" + help="Minimum number of samples necessary in the alignment to make it a valid alignment/group. The lowest criteria + between this and min.class.fraction will be used." /> + <param name="mzwid" type="float" size="10" value="0.1" label="mzwid" + help="width of overlapping m/z slices to use for creating peak density chromatograms and grouping peaks across samples"/> + <param name="bws" type="text" size="10" value="30,10" label="bws" + help="bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel + to apply to the peak density chromatogram. Fill in two values separated by comma. First value is used for + first grouping round, seccond value is used for last grouping/alignment round after retention time + correction. "/> + + <conditional name="retcor"> + <param name="retcormethod" type="select" size="30" label="retcormethod" + help="retention time correction method. 'peakgroups' is the default density based approach, 'obiwarp' is + alignment data by Ordered Bijective Interpolated Warping "> + <option value="peakgroups" selected="true">peakgroups</option> + <option value="obiwarp" >obiwarp</option> + </param> + <when value="peakgroups"> + <param name="retcorfamily" type="select" size="30" label="retcorfamily" + help="retention time correction method type/family"> + <option value="symmetric" selected="true">symmetric</option> + <option value="gaussian">gaussian</option> + </param> + <param name="smooth" type="select" size="30" label="smooth" + help="either 'loess' for non-linear alignment or 'linear' for linear alignment"> + <option value="linear" selected="true">linear</option> + <option value="loess">loess (TODO - waiting for metaMS to add/parse this option)</option> + </param> + <param name="missingratio" type="float" size="10" value="0.2" label="missingratio" + help="ratio of missing samples to allow in retention time correction groups"/> + <param name="extraratio" type="float" size="10" value="0.1" label="extraratio" + help="ratio of extra peaks to allow in retention time correction correction groups"/> + </when> + <when value="obiwarp"> + <param name="profStep" type="integer" size="10" value="1" label="profStep" + help="step size (in m/z) to use for profile generation from the raw data files" /> + </when> + </conditional> + + <param name="fillPeaks" type="select" size="30" label="fillPeaks" + help="Fill missing peaks in peak groups/alignments that do not include peaks from every sample. + This method produces intensity values for those missing samples by integrating raw data in peak group region."> + <option value="TRUE" selected="true">Yes</option> + <option value="FALSE">No</option> + </param> + <param name="perfwhm" type="float" size="10" value="0.6" label="CAMERA perfwhm =====================================================" + help="percentage of FWHM width"/> + <param name="cor_eic_th" type="float" size="10" value="0.7" label="cor_eic_th" + help="correlation threshold (0..1)" /> + <param name="ppm" type="float" size="10" value="5.0" label="ppm" + help="General ppm error" /> + + <param name="groupCorr_graphMethod" type="select" size="30" label="(groupCorr)graphMethod" + help="Method selection for grouping peaks after correlation analysis into pseudospectra."> + <option value="hcs" selected="true">hcs</option> + </param> + + <param name="groupCorr_pval" type="float" size="10" value="0.05" label="(groupCorr)pval" + help="significant correlation threshold" /> + + <param name="groupCorr_calcCiS" type="select" size="30" label="(groupCorr)calcCiS" + help="Use correlation inside samples for peak grouping"> + <option value="TRUE" selected="true">Yes</option> + <option value="FALSE">No</option> + </param> + + <param name="groupCorr_calcIso" type="select" size="30" label="(groupCorr)calcIso" + help="Use isotopic relationship for peak grouping"> + <option value="TRUE" >Yes</option> + <option value="FALSE" selected="true">No</option> + </param> + + <param name="groupCorr_calcCaS" type="select" size="30" label="(groupCorr)calcCaS" + help="Use correlation across samples for peak grouping"> + <option value="TRUE" >Yes</option> + <option value="FALSE" selected="true">No</option> + </param> + + + <param name="findIsotopes_maxcharge" type="integer" size="10" value="3" label="(findIsotopes)maxcharge" + help="max. ion charge" /> + + <param name="findIsotopes_maxiso" type="integer" size="10" value="4" label="(findIsotopes)maxiso" + help="max. number of expected isotopes" /> + + <param name="findIsotopes_minfrac" type="float" size="10" value="0.5" label="(findIsotopes)minfrac" + help="The ratio for the number of samples, which must satisfy the C12/C13 rule for isotope annotation" /> + + <param name="findAdducts_polarity" type="select" size="30" label="(findAdducts)polarity" + help="Which polarity mode was used for measuring of the ms sample"> + <option value="positive" selected="true">positive</option> + <option value="negative" >negative</option> + </param> + + <param name="findAdducts_multiplier" type="integer" size="10" value="3" label="(findAdducts)multiplier" + help="If no ruleset is provided, calculate ruleset with max. number n of [nM+x] clusterions" /> + + + +</inputs> +<configfiles> + +<configfile name="customMetaMSsettings">## ==================================== + ## metaMS process settings + customMetaMSsettings <- metaMSsettings(protocolName = "${protocolName}", + chrom = "LC", + PeakPicking = list( + method = "${peakPicking.method}", + #if $peakPicking.method == "matchedFilter" + fwhm = ${peakPicking.fwhm}, + sigma = ${peakPicking.fwhm}/${peakPicking.sigma_denom}, + max = ${peakPicking.max}, + snthresh = ${peakPicking.snthresh}, + step = ${peakPicking.step}, + steps = ${peakPicking.steps}, + mzdiff = ${peakPicking.mzdiff}), + #else + ppm = ${peakPicking.ppm}, + peakwidth = c(${peakPicking.peakwidth}), + snthresh = ${peakPicking.snthresh}, + prefilter = c(${peakPicking.prefilter}), + mzCenterFun = "${peakPicking.mzCenterFun}", + integrate = ${peakPicking.integrate}, + mzdiff = ${peakPicking.mzdiff}, + fitgauss = ${peakPicking.fitgauss}, + noise = ${peakPicking.noise}), + #end if + Alignment = list( + min.class.fraction = ${min_class_fraction}, + min.class.size = ${min_class_size}, + mzwid = ${mzwid}, + bws = c(${bws}), + retcormethod = "${retcor.retcormethod}", + #if $retcor.retcormethod == "peakgroups" + smooth = "${retcor.smooth}", + missingratio = ${retcor.missingratio}, + extraratio = ${retcor.extraratio}, + retcorfamily = "${retcor.retcorfamily}", + #else + ##repeating the method as workaround/ backwards compatibility (can remove this one after fix from metaMS): + method = "${retcor.retcormethod}", + profStep = ${retcor.profStep}, + #end if + fillPeaks = ${fillPeaks}), + CAMERA = list( + perfwhm = ${perfwhm}, + cor_eic_th = ${cor_eic_th}, + ppm= ${ppm}, + graphMethod= "${groupCorr_graphMethod}", + pval= ${groupCorr_pval}, + calcCiS= ${groupCorr_calcCiS}, + calcIso= ${groupCorr_calcIso}, + calcCaS= ${groupCorr_calcCaS}, + maxcharge= ${findIsotopes_maxcharge}, + maxiso= ${findIsotopes_maxiso}, + minfrac= ${findIsotopes_minfrac}, + polarity= "${findAdducts_polarity}", + multiplier= ${findAdducts_multiplier} + ))</configfile> + +</configfiles> + +<outputs> + <data name="outputFile" format="tabular" label="${tool.name} on ${on_string} - peaks table (TSV)"/> + <data name="outputLog" format="txt" label="${tool.name} on ${on_string} - LOG" hidden="True"/> + <data name="xsetOut" format="rdata" label="${tool.name} on ${on_string} - xcmsSet (RDATA)"/> + <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - report (HTML)"/> +</outputs> +<tests> + <test> + </test> +</tests> +<help> + +.. class:: infomark + +Runs metaMS process for LC/MS feature feature picking, aligning and grouping. +This part of the metaMS process makes use of the XCMS and CAMERA tools and algorithms. +CAMERA is used for automatic deconvolution/annotation of LC/ESI-MS data. +The figure below shows the main parts of the metaMS process. + +.. image:: $PATH_TO_IMAGES/metaMS.png + + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +Wehrens, R.; Weingart, G.; Mattivi, F. (2014). +metaMS: an open-source pipeline for GC-MS-based untargeted metabolomics. +Journal of chromatography B: biomedical sciences and applications, 996 (1): 109-116. +doi: 10.1016/j.jchromb.2014.02.051 +handle: http://hdl.handle.net/10449/24012 + +Wrapper by Pieter Lukasse. + + + </help> + <citations> + <citation type="doi">10.1016/j.jchromb.2014.02.051</citation> <!-- example + see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set + --> + </citations> +</tool> \ No newline at end of file
--- a/msclust.xml Wed Nov 19 15:11:45 2014 +0100 +++ b/msclust.xml Wed Dec 10 22:03:27 2014 +0100 @@ -1,4 +1,4 @@ -<tool name="MsClust" id="msclust2" version="2.0.6"> +<tool name="MsClust" id="msclust2" version="2.0.7"> <description>Extracts fragmentation spectra from aligned data</description> <!-- For remote debugging start you listener on port 8000 and use the following as command interpreter: @@ -15,7 +15,7 @@ #if $imputationMethod.type == "valueRange" -rangeUpperLimit $imputationMethod.rangeUpperLimit #end if - -plInputFormat "metalign" + -plInputFormat $plInputFormat -potDensFuncType $potDensFuncType.type -centerSelectionType $centerSelectionType.type -clusteringType $clusteringType.type @@ -68,11 +68,15 @@ </command> <inputs> - <param name="inputPeaks" type="data" format="txt" label="Ion-wise aligned data (e.g. MetAlign output data)" /> + <param name="inputPeaks" type="data" format="txt" label="Ion-wise aligned data (e.g. MetAlign or XCMS/metaMS output data)" /> + <param name="plInputFormat" type="select" size="30" label="Data format"> + <option value="metalign" selected="true">MetAlign</option> + <option value="xcms">XCMS/metaMS (beta)</option> + </param> <param name="dataType" type="select" size="30" label="Data type"> - <option value="gcms" selected="true">GC-MS</option> - <option value="lcms">LC-MS</option> - </param> + <option value="gcms" selected="true">GC-MS</option> + <option value="lcms">LC-MS</option> + </param> <conditional name="imputationMethod"> <param name="type" type="select" size="30" label="Select the approach used for imputing missing values (optional)" help="select how you generated the values to fill in the data gaps"> <option value="none" >none</option> @@ -95,8 +99,9 @@ <param name="pdf_neighborhoodWindowSize" type="integer" size="10" value="200" label="Effective Peaks" /> <conditional name="rt_dist_tol_unit"> <param name="type" type="select" size="30" label="Peak time unit"> - <option value="1" selected="true">scan nr</option> - <option value="2" >(average) micro minutes</option> + <option value="1" selected="true">scan nr (MetAlign)</option> + <option value="2" >(average) micro minutes (MetAlign)</option> + <option value="3" >(average) minutes (XCMS)</option> </param> <when value="1"> <param name="pdf_rt_toler" type="float" size="10" value="10" label="Peak Width, in scans" /> @@ -104,6 +109,9 @@ <when value="2"> <param name="pdf_rt_toler" type="float" size="10" value="100000" label="Peak Width, in micro minutes" help="e.g. 100,000=6 seconds" /> </when> + <when value="3"> + <param name="pdf_rt_toler" type="float" size="10" value="0.1" label="Peak Width, in minutes" help="e.g. 0.1=6 seconds" /> + </when> </conditional> <param name="pdf_scan_conf" type="float" size="10" value="80" label="Peak Width confidence (0.0 to 99.99)" help="example: 0[no confidence]...50[good guess]...99.9[quite certain])" /> <param name="pdf_pears_treshold" type="float" size="10" value="0.8" label="Correlation threshold (0.0 - 1.0)" />
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/next.r Wed Dec 10 22:03:27 2014 +0100 @@ -0,0 +1,4 @@ +# next script, useful for plotting the CAMERA groups pseudo-spectra: +plotPsSpectrum( LC$xset,320,maxlabel=10) +320 is group id +plotEICs( LC$xset,35,maxlabel=5) \ No newline at end of file
--- a/xcms_differential_analysis.r Wed Nov 19 15:11:45 2014 +0100 +++ b/xcms_differential_analysis.r Wed Dec 10 22:03:27 2014 +0100 @@ -14,19 +14,23 @@ #cat(paste("args.topcount <- ", args[4], "\n", sep="")) args.outTable <- args[5] -args.outLogFile <- args[6] -#cat(paste("args.outLogFile <- \"", args[6], "\"\n", sep="")) ## report files -args.htmlReportFile <- args[7] -args.htmlReportFile.files_path <- args[8] -#cat(paste("args.htmlReportFile <- \"", args[7], "\"\n", sep="")) -#cat(paste("args.htmlReportFile.files_path <- \"", args[8], "\"\n", sep="")) +args.htmlReportFile <- args[6] +args.htmlReportFile.files_path <- args[7] +#cat(paste("args.htmlReportFile <- \"", args[6], "\"\n", sep="")) +#cat(paste("args.htmlReportFile.files_path <- \"", args[7], "\"\n", sep="")) + -# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888 -msg <- file(args.outLogFile, open="wt") -sink(msg, type="message") -sink(msg, type="output") +if (length(args) == 8) +{ + args.outLogFile <- args[8] + # suppress messages: + # Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888 + msg <- file(args.outLogFile, open="wt") + sink(msg, type="message") + sink(msg, type="output") +} tryCatch( { @@ -34,12 +38,18 @@ library(xcms) #library("R2HTML") - ## load the constructed DB : - xcmsSet <- readRDS(args.xsetData) + # load the xset data : + xsetData <- readRDS(args.xsetData) + # if here to support both scenarios: + if ("xcmsSet" %in% slotNames(xsetData) ) + { + xsetData <- xsetData@xcmsSet + } + # info: levels(xcmsSet@phenoData$class) also gives access to the class names - dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE) - reporttab <- diffreport(xcmsSet, args.class1, args.class2, paste(args.htmlReportFile.files_path,"/fig", sep=""), args.topcount, metlin = 0.15, h=480, w=640) + dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE) + reporttab <- diffreport(xsetData, args.class1, args.class2, paste(args.htmlReportFile.files_path,"/fig", sep=""), args.topcount, metlin = 0.15, h=480, w=640) # write out tsv table: write.table(reporttab, args.outTable, sep="\t", row.names=FALSE)
--- a/xcms_differential_analysis.xml Wed Nov 19 15:11:45 2014 +0100 +++ b/xcms_differential_analysis.xml Wed Dec 10 22:03:27 2014 +0100 @@ -1,4 +1,4 @@ -<tool id="xcms_differential_analysis" name="XCMS Differential Analsysis" version="0.0.1"> +<tool id="xcms_differential_analysis" name="XCMS Differential Analsysis" version="0.0.4"> <description> Runs xcms diffreport function for differential Analsysis</description> <requirements> <requirement type="package" version="3.1.1">R_bioc_metams</requirement> @@ -6,13 +6,13 @@ <command interpreter="Rscript"> xcms_differential_analysis.r $xsetData - $class1 - $class2 + "$class1" + "$class2" $topcount $outTable - $outLogFile $htmlReportFile $htmlReportFile.files_path + $outLogFile </command> <inputs> @@ -27,7 +27,7 @@ </inputs> <outputs> <data name="outTable" format="tabular" label="${tool.name} on ${on_string} - Top differential items (TSV)"/> - <data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - differential log (LOG)"/> + <data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - log (LOG)" hidden="True"/> <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - differential report (HTML)"/> </outputs> <tests>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms_get_alignment_eic.r Wed Dec 10 22:03:27 2014 +0100 @@ -0,0 +1,153 @@ +# shows all alignment results in a rt region +## read args: +args <- commandArgs(TRUE) +# xset data: +args.xsetData <- args[1] + +args.rtStart <- strtoi(args[2]) +args.rtEnd <- strtoi(args[3]) + +# limit max diff to 600 and minNrSamples to at least 2 : +if (args.rtEnd - args.rtStart > 600) + stop("The RT window should be <= 600") + +args.minNrSamples <- strtoi(args[4]) #min nr samples +if (args.minNrSamples < 2) + stop("Set 'Minimum number of samples' to 2 or higher") + + +args.sampleNames <- strsplit(args[5], ",")[[1]] +# trim leading and trailing spaces: +args.sampleNames <- gsub("^\\s+|\\s+$", "", args.sampleNames) + +## report files +args.htmlReportFile <- args[6] +args.htmlReportFile.files_path <- args[7] + + +if (length(args) == 8) +{ + args.outLogFile <- args[8] + # suppress messages: + # Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888 + msg <- file(args.outLogFile, open="wt") + sink(msg, type="message") + sink(msg, type="output") +} + + + +tryCatch( + { + library(metaMS) + + # load the xset data : + xsetData <- readRDS(args.xsetData) + # if here to support both scenarios: + if ("xcmsSet" %in% slotNames(xsetData) ) + { + xsetData <- xsetData@xcmsSet + } + + # report + dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE) + message(paste("\nGenerating report.........in ", args.htmlReportFile.files_path)) + + write(paste("<html><body><h1>Extracted Ion Chromatograms (EIC) of alignments with peaks in ",args.minNrSamples, " or more samples</h1>"), + file=args.htmlReportFile) + + gt <- groups(xsetData) + message("\nParsed groups... ") + groupidx1 <- which(gt[,"rtmed"] > args.rtStart & gt[,"rtmed"] < args.rtEnd & gt[,"npeaks"] >= args.minNrSamples) # this should be only on samples selected + + if (length(groupidx1) > 0) + { + message("\nGetting EIC... ") + eiccor <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames) + #eicraw <- getEIC(xsetData, groupidx = c(groupidx1), sampleidx=args.sampleNames, rt = "raw") + + #sampleNamesIdx <- which(sampnames(LC$xset@xcmsSet) %in% args.sampleNames, arr.ind = TRUE) + #or (from bioconductor code for getEIC): NB: this is assuming sample indexes used in data are ordered after the order of sample names!! + sampNames <- sampnames(xsetData) + sampleNamesIdx <- match( args.sampleNames, sampNames) + message(paste("Samples: ", sampleNamesIdx)) + + #TODO html <- paste(html, "<table><tbody>") + message(paste("\nPlotting figures... ")) + + #get the mz list (interestingly, this [,"mz"] is relatively slow): + peakMzList <- xsetData@peaks[,"mz"] + peakSampleList <- xsetData@peaks[,"sample"] + #signal to noise list: + peakSnList <- xsetData@peaks[,"sn"] + + message(paste("Total nr of peaks: ", length(peakMzList))) + + for (i in 1:length(groupidx1)) + { + groupMembers <- xsetData@groupidx[[groupidx1[i]]] + + groupMzList <- "" + groupSampleList <- "" + finalGroupSize <- 0 + + for (j in 1:length(groupMembers)) + { + #get only the peaks from the selected samples: + memberSample <- peakSampleList[groupMembers[j]] + memberSn <- peakSnList[groupMembers[j]] + if (!is.na(memberSn) && memberSample %in% sampleNamesIdx) + { + message(paste("Group: ", groupidx1[i], " / Member sample: ", memberSample)) + memberMz <- peakMzList[groupMembers[j]] + + + if (finalGroupSize == 0) + { + groupMzList <- memberMz + groupSampleList <- sampNames[memberSample] + } else { + groupMzList <- paste(groupMzList,",",memberMz, sep="") + groupSampleList <- paste(groupSampleList,",",sampNames[memberSample], sep="") + } + + finalGroupSize <- finalGroupSize +1 + } + } + # here we do the real check on group size and only the groups that have enough + # members with signal to noise > 0 will be plotted here: + if (finalGroupSize >= args.minNrSamples) + { + message(paste("Plotting figure ",i, " of max ", length(groupidx1)," figures... ")) + + figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="") + write(paste("<img src='", "figure", i,".png' />", sep="") , + file=args.htmlReportFile, append=TRUE) + + png( figureName, type="cairo" ) + plot(eiccor, xsetData, groupidx = i) + devname = dev.off() + + write(paste("<p>Alignment id: [", groupidx1[i], "]. m/z list of peaks in alignment with signal/noise <> NA:<br/>", groupMzList,"</p>", sep="") , + file=args.htmlReportFile, append=TRUE) + write(paste("<p>m/z values found in the following samples respectively: <br/>", groupSampleList,"</p>", sep="") , + file=args.htmlReportFile, append=TRUE) + } + } + + } + + write("</body><html>", + file=args.htmlReportFile, append=TRUE) + message("finished generating report") + # unlink(args.htmlReportFile) + cat("\nWarnings================:\n") + str( warnings() ) + }, + error=function(cond) { + sink(NULL, type="message") # default setting + sink(stderr(), type="output") + message("\nERROR: ===========\n") + print(cond) + } + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms_get_alignment_eic.xml Wed Dec 10 22:03:27 2014 +0100 @@ -0,0 +1,72 @@ +<tool id="xcms_get_alignment_eic" name="XCMS Get Alignment EICs" version="0.0.4"> + <description> Extracts alignment EICs from feature alignment data</description> + <requirements> + <requirement type="package" version="3.1.1">R_bioc_metams</requirement> + </requirements> + <command interpreter="Rscript"> + xcms_get_alignment_eic.r + $xsetData + $rtStart + $rtEnd + $minNrSamples + "$sampleNames" + $htmlReportFile + $htmlReportFile.files_path + $outLogFile + </command> +<inputs> + + <param name="xsetData" type="data" format="rdata" label="xset xcms data file" help="E.g. output data file resulting from METAMS run"/> + + + <param name="rtStart" type="integer" value="" size="10" label="RT start" help="Start of Retention Time region to plot" /> + <param name="rtEnd" type="integer" value="" size="10" label="RT end" help="End of Retention Time region to plot" /> + + <param name="minNrSamples" type="integer" size="10" value="10" label="Minimum number of samples in which aligned feature should be found" help="Can also read this as: Minimum + number of features in alignment. E.g. if set to 10, it means the alignment should consist of at least 10 peaks from 10 different samples aligned together." /> + + <param name="sampleNames" type="text" area="true" size="10x70" label="List of sample names" + value="sampleName1,sampleName2,etc" + help="Comma or line-separated list of sample names. Optional field where you can specify the subset of samples + to use for the EIC plots. NB: if your dataset has many samples, specifying a subset here can significantly speed up the processing time." > + <sanitizer> + <!-- this translates from line-separated to comma separated list --> + <valid/> + <mapping initial="none"> + <add source=" " target=","/> + <add source=" " target=""/> + </mapping> + </sanitizer> + </param> + + +</inputs> +<outputs> + <data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - log (LOG)" hidden="True"/> + <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - EIC(s) report (HTML)"/> +</outputs> +<tests> + <test> + </test> +</tests> +<help> + +.. class:: infomark + +This tool finds the alignments in the specified RT window and extracts alignment EICs from feature alignment data using XCMS's getEIC method. +It produces a HTML report showing the EIC plots and the mass list of each alignment. The figure below shows an example of such an EIC plot, showing also the difference between +two classes, with extra alignment information beneath it. + +.. image:: $PATH_TO_IMAGES/diffreport.png + +Alignment id: 1709. m/z list of peaks in alignment: +614.002922098482,613.998019830021,614.000382307519,613.998229980469,613.998229980469 + + + </help> + <citations> + <citation type="doi">10.1021/ac051437y</citation> <!-- example + see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set + --> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms_get_mass_eic.r Wed Dec 10 22:03:27 2014 +0100 @@ -0,0 +1,162 @@ +## read args: +args <- commandArgs(TRUE) +# xset data: +args.xsetData <- args[1] + +args.rtStart <- strtoi(args[2]) +args.rtEnd <- strtoi(args[3]) + +args.mzStart <- as.double(args[4]) +args.mzEnd <- as.double(args[5]) +# there are 2 options: specify a mz range or a mz list: +if (args.mzStart < 0) +{ + args.mzList <- as.double(strsplit(args[6], ",")[[1]]) + cat(typeof(as.double(strsplit(args[6], ",")[[1]]))) + args.mzTolPpm <- as.double(args[7]) + # calculate mzends based on ppm tol: + mzListEnd <- c() + mzListStart <- c() + for (i in 1:length(args.mzList)) + { + mzEnd <- args.mzList[i] + args.mzList[i]*args.mzTolPpm/1000000.0 + mzStart <- args.mzList[i] - args.mzList[i]*args.mzTolPpm/1000000.0 + mzListEnd <- c(mzListEnd, mzEnd) + mzListStart <- c(mzListStart, mzStart) + } + str(mzListStart) + str(mzListEnd) +} else { + mzListEnd <- c(args.mzEnd) + mzListStart <- c(args.mzStart) +} + +args.sampleNames <- strsplit(args[8], ",")[[1]] +# trim leading and trailing spaces: +args.sampleNames <- gsub("^\\s+|\\s+$", "", args.sampleNames) + +args.combineSamples <- args[9] +args.rtPlotMode <- args[10] + +## report files +args.htmlReportFile <- args[11] +args.htmlReportFile.files_path <- args[12] + + +if (length(args) == 13) +{ + args.outLogFile <- args[13] + # suppress messages: + # Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888 + msg <- file(args.outLogFile, open="wt") + sink(msg, type="message") + sink(msg, type="output") +} + +# TODO - add option to do masses in same plot (if given in same line oid) or in separate plots +# TODO2 - let it run in parallel + +tryCatch( + { + library(metaMS) + + # load the xset data : + xsetData <- readRDS(args.xsetData) + # if here to support both scenarios: + if ("xcmsSet" %in% slotNames(xsetData) ) + { + xsetData <- xsetData@xcmsSet + } + + # report + dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE, recursive = TRUE) + message(paste("\nGenerating report.........in ", args.htmlReportFile.files_path)) + + html <- "<html><body><h1>Extracted Ion Chromatograms (EIC) matching criteria</h1>" + + if (args.combineSamples == "No") + { + if (length(args.sampleNames) > 1 && length(mzListStart) > 1 && length(args.sampleNames) != length(mzListStart)) + stop(paste("The number of sample names should match the number of m/z values in the list. Found ", length(mzListStart), + " masses while ", length(args.sampleNames), " sample names were given.")) + + iterSize <- length(args.sampleNames) + # these can be set to 1 or 0 just as a trick to iterate OR not over the items. If the respective list is of length 1, only the first item should be used + fixSampleIdx <- 1 + fixMzListIdx <- 1 + if (length(args.sampleNames) == 1) + { + fixSampleIdx <- 0 + iterSize <- length(mzListStart) + } + if (length(mzListStart) == 1) + { + fixMzListIdx <- 0 + } + lineColors <- rainbow(iterSize) + for (i in 0:(iterSize-1)) + { + message("\nGetting EIC... ") + eiccor <- getEIC(xsetData, + mzrange=matrix(c(mzListStart[i*fixMzListIdx+1],mzListEnd[i*fixMzListIdx+1]),nrow=1,ncol=2,byrow=TRUE), + rtrange=matrix(c(args.rtStart,args.rtEnd),nrow=1,ncol=2,byrow=TRUE), + sampleidx=c(args.sampleNames[i*fixSampleIdx+1]), rt=args.rtPlotMode) + + message("\nPlotting figures... ") + figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="") + html <- paste(html,"<img src='", "figure", i,".png' /><br/>", sep="") + png( figureName, type="cairo", width=1100,height=250 ) + #plot(eiccor, col=lineColors[i+1]) + # black is better in this case: + plot(eiccor) + legend('topright', # places a legend at the appropriate place + legend=c(args.sampleNames[i*fixSampleIdx+1]), # puts text in the legend + lty=c(1,1), # gives the legend appropriate symbols (lines) + lwd=c(2.5,2.5)) + + devname = dev.off() + } + + } else { + for (i in 1:length(mzListStart)) + { + message("\nGetting EIC... ") + eiccor <- getEIC(xsetData, + mzrange=matrix(c(mzListStart[i],mzListEnd[i]),nrow=1,ncol=2,byrow=TRUE), + rtrange=matrix(c(args.rtStart,args.rtEnd),nrow=1,ncol=2,byrow=TRUE), + sampleidx=args.sampleNames, rt = args.rtPlotMode) + + #set size, set option (plot per sample, plot per mass) + + message("\nPlotting figures... ") + figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="") + html <- paste(html,"<img src='", "figure", i,".png' />", sep="") + png( figureName, type="cairo", width=1100,height=450 ) + lineColors <- rainbow(length(args.sampleNames)) + plot(eiccor, col=lineColors) + legend('topright', # places a legend at the appropriate place + legend=args.sampleNames, # puts text in the legend + lty=c(1,1), # gives the legend appropriate symbols (lines) + lwd=c(2.5,2.5), + col=lineColors) + devname = dev.off() + } + } + if (args.rtPlotMode == "corrected") + { + html <- paste(html,"<p>*rt values are corrected ones</p></body><html>") + } + html <- paste(html,"</body><html>") + message("finished generating report") + write(html,file=args.htmlReportFile) + # unlink(args.htmlReportFile) + cat("\nWarnings================:\n") + str( warnings() ) + }, + error=function(cond) { + sink(NULL, type="message") # default setting + sink(stderr(), type="output") + message("\nERROR: ===========\n") + print(cond) + } + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms_get_mass_eic.xml Wed Dec 10 22:03:27 2014 +0100 @@ -0,0 +1,114 @@ +<tool id="xcms_get_mass_eic" name="XCMS Get EICs" version="0.0.4"> + <description> Extracts EICs for a given list of masses</description> + <requirements> + <requirement type="package" version="3.1.1">R_bioc_metams</requirement> + </requirements> + <command interpreter="Rscript"> + xcms_get_mass_eic.r + $xsetData + $rtStart + $rtEnd + #if $massParameters.massParametersType == "window" + $massParameters.mzStart + $massParameters.mzEnd + -1 + "." + #else + -1 + -1 + $massParameters.mzList + $massParameters.mzTolPpm + #end if + "$sampleNames" + $combineSamples + $rtPlotMode + $htmlReportFile + $htmlReportFile.files_path + $outLogFile + </command> +<inputs> + + <param name="xsetData" type="data" format="rdata" label="xset xcms data file" help="E.g. output data file resulting from METAMS run"/> + + + <param name="rtStart" type="integer" value="" size="10" label="RT start" help="Start of Retention Time region to plot" /> + <param name="rtEnd" type="integer" value="" size="10" label="RT end" help="End of Retention Time region to plot" /> + + <conditional name="massParameters"> + <param name="massParametersType" type="select" size="50" label="Give masses as" > + <option value="list" selected="true">m/z list</option> + <option value="window" >m/z window</option> + </param> + <when value="list"> + <param name="mzList" type="text" area="true" size="7x70" label="m/z list" + help="Comma or line-separated list of m/z values for which to plot an EIC. One EIC will be plotted for each m/z given here."> + <sanitizer> + <!-- this translates from line-separated to comma separated list --> + <valid/> + <mapping initial="none"> + <add source=" " target=","/> + <add source=" " target=""/> + </mapping> + </sanitizer> + </param> + <param name="mzTolPpm" type="integer" size="10" value="5" label="m/z tolerance (ppm)" /> + </when> + <when value="window"> + <param name="mzStart" type="float" value="" size="10" label="m/z start" help="Start of m/z window" /> + <param name="mzEnd" type="float" value="" size="10" label="m/z end" help="End of m/z window" /> + </when> + </conditional> + + + <param name="sampleNames" type="text" area="true" size="10x70" label="List of sample names" + value="sampleName1,sampleName2,etc" + help="Comma or line-separated list of sample names. Here you can specify the subset of samples + to use for the EIC plots. NB: if your dataset has many samples, specifying a subset here can significantly speed up the processing time." > + <sanitizer> + <valid/> + <mapping initial="none"> + <add source=" " target=","/> + <add source=" " target=""/> + </mapping> + </sanitizer> + </param> + + <param name="combineSamples" type="select" size="50" label="Combine samples in EIC" + help="Combining samples means plot contains the combined EIC of a m/z in the different samples. When NOT combining, each plot + only contains the EIC of the m/z in the respectively given sample (the sample name from the sample list in the same position as the + m/z in the m/z list.). Tip: use Yes for visualizing EIC of grouped masses (MsClust or CAMERA groups), use No for visualizing EICs of the same mass in + the different samples."> + <option value="No" selected="true">No</option> + <option value="Yes" >Yes</option> + </param> + <param name="rtPlotMode" type="select" size="50" label="RT plot mode" + help="Select whether EIC should be on original or raw Retention Time"> + <option value="raw" selected="true">Raw</option> + <option value="corrected" >Corrected</option> + </param> +</inputs> +<outputs> + <data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - log (LOG)" hidden="True"/> + <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - EIC(s) report (HTML)"/> +</outputs> +<tests> + <test> + </test> +</tests> +<help> + +.. class:: infomark + +This tool will plot EICs for a given list of masses (or a mass window) using XCMS's getEIC method. +It produces a HTML report showing the EIC plots, one for each given mass. The figure below shows an example of such an EIC plot. + +.. image:: $PATH_TO_IMAGES/massEIC.png + + + </help> + <citations> + <citation type="doi">10.1021/ac051437y</citation> <!-- example + see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set + --> + </citations> +</tool> \ No newline at end of file