Mercurial > repos > pieterlukasse > prims_metabolomics
changeset 41:e67149fbff20
small changes/improvements;
new metams and xcms tools
author | pieter.lukasse@wur.nl |
---|---|
date | Thu, 06 Nov 2014 16:14:44 +0100 |
parents | a7b609941846 |
children | 664ccd5f7cf8 |
files | combine_output.xml library_lookup.xml match_library.py metaMS_cmd_interface.r metams_lcms_annotate.xml msclust.xml static/images/diffreport.png static/images/metaMS.png tool_dependencies.xml xcms_differential_analysis.r xcms_differential_analysis.xml |
diffstat | 11 files changed, 397 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/combine_output.xml Fri Sep 19 16:14:58 2014 +0200 +++ b/combine_output.xml Thu Nov 06 16:14:44 2014 +0100 @@ -4,10 +4,11 @@ combine_output.py $rankfilter_in $caslookup_in $out_single $out_multi </command> <inputs> - <param format="tabular" name="caslookup_in" type="data" label="RIQC-Lookup RI for CAS output" + <param format="tabular" name="rankfilter_in" type="data" label="RIQC-RankFilter output (Estimated RI)" + help="Select the output file from the RankFilter tool"/> + <param format="tabular" name="caslookup_in" type="data" label="RIQC-Lookup RI for CAS output ('Known' RI)" help="Select the output file from the CasLookup tool"/> - <param format="tabular" name="rankfilter_in" type="data" label="RIQC-RankFilter output" - help="Select the output file from the RankFilter tool"/> + <!-- <param TODO : could add "tolerance for ERI-KRI"(Estimated RI-Known RI)--> </inputs> <outputs> <data format="tabular" label="${tool.name} (Single) on ${on_string}" name="out_single" />
--- a/library_lookup.xml Fri Sep 19 16:14:58 2014 +0200 +++ b/library_lookup.xml Thu Nov 06 16:14:44 2014 +0100 @@ -13,16 +13,23 @@ $regression.model </command> <inputs> + <!-- Regarding the <page> items: this blocks the use of this tool in Galaxy workflows. However, + alternatives like wrapping this in conditionals, repeats (to force a refresh_on_change as this option + is not working on its own) failed since the workflow editor does not support refreshes...not does the + workflow runtime support conditionals or repeats to be set at runtime. See also + galaxy-dev mail thread "when else" in <conditional> ? RE: refresh_on_change : is this a valid attribute? Any other ideas/options??" --> <page> <param format="tabular" name="input" type="data" label="NIST identifications as tabular file" help="Select a tab delimited NIST metabolite identifications file (converted from PDF)" /> <param name="library_file" type="select" label="CAS x RI Library file" help="Select a library/lookup file containing RI values for CAS numbers on various chromatography columns " dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RI_DB_libraries")'/> + </page> + <page> <param name="col_type" type="select" label="Select column type" refresh_on_change="true" display="radio" dynamic_options='get_column_type(library_file)' help="" /> - </page> + </page> <page> <param name="polarity" type="select" label="Select polarity" refresh_on_change="true" display="radio" dynamic_options='filter_column(library_file,col_type)'
--- a/match_library.py Fri Sep 19 16:14:58 2014 +0200 +++ b/match_library.py Thu Nov 06 16:14:44 2014 +0100 @@ -108,7 +108,7 @@ fill a Galaxy drop-down combo box. ''' - files = glob.glob(dir_name + "/*.txt") + files = glob.glob(dir_name + "/*.*") if len(files) == 0: # Configuration error: no library files found in <galaxy-home-dir>/" + dir_name : galaxy_output = [("Configuration error: expected file not found in <galaxy-home-dir>/" + dir_name, "", False)]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metaMS_cmd_interface.r Thu Nov 06 16:14:44 2014 +0100 @@ -0,0 +1,99 @@ +## read args: +args <- commandArgs(TRUE) +## the constructed DB, e.g. "E:/Rworkspace/metaMS/data/LCDBtest.RData" +args.constructedDB <- args[1] +## data files, e.g. "E:/Rworkspace/metaMS/data/data.zip" (with e.g. .CDF files) and unzip output dir, e.g. "E:/" +args.dataZip <- args[2] +args.zipExtrDir <- paste(args[2],"dir/") +## settings file, e.g. "E:/Rworkspace/metaMS/data/settings.r", should contain assignment to an object named "customMetaMSsettings" +args.settings <- args[3] + +## output file names, e.g. "E:/Rworkspace/metaMS/data/out.txt" +args.outAnnotationTable <- args[4] +args.outLogFile <- args[5] +args.xsetOut <- args[6] + +## report files +args.htmlReportFile <- args[7] +args.htmlReportFile.files_path <- args[8] + +# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888 +msg <- file(args.outLogFile, open="wt") +sink(msg, type="message") +sink(msg, type="output") + +cat("\nSettings used===============:\n") +cat(readChar(args.settings, 1e5)) + + +tryCatch( + { + library(metaMS) + + ## load the constructed DB : + tempEnv <- new.env() + testDB <- load(args.constructedDB, envir=tempEnv) + + ## load the data files from a zip file + files <- unzip(args.dataZip, exdir=args.zipExtrDir) + + ## load settings "script" into "customMetaMSsettings" + source(args.settings, local=tempEnv) + message(paste(" loaded : ", args.settings)) + + # Just to highlight: if you want to use more than one + # trigger runLC: + LC <- runLC(files, settings = tempEnv[["customMetaMSsettings"]], DB = tempEnv[[testDB[1]]]$DB, nSlaves=20, returnXset = TRUE) + + # write out runLC annotation results: + write.table(LC$Annotation$annotation.table, args.outAnnotationTable, sep="\t", row.names=FALSE) + + # the used constructed DB (write to log): + cat("\nConstructed DB info===============:\n") + str(tempEnv[[testDB[1]]]$Info) + cat("\nConstructed DB table===============:\n") + write.table(tempEnv[[testDB[1]]]$DB, args.outLogFile, append=TRUE, row.names=FALSE) + write.table(tempEnv[[testDB[1]]]$Reftable, args.outLogFile, sep="\t", append=TRUE, row.names=FALSE) + # save xset as rdata: + xsetData <- LC$xset@xcmsSet + saveRDS(xsetData, file=args.xsetOut) + + message("\nGenerating report.........") + # report + dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE) + setwd(file.path(args.htmlReportFile.files_path)) + html <- "<html><body><h1>Extracted Ion Chromatograms of groups with more than 3 peaks</h1>" + + LC$xset@xcmsSet + gt <- groups(LC$xset@xcmsSet) + colnames(gt) + groupidx1 <- which(gt[,"rtmed"] > 0 & gt[,"rtmed"] < 3000 & gt[,"npeaks"] > 3) + if (length(groupidx1) > 0) + { + eiccor <- getEIC(LC$xset@xcmsSet, groupidx = c(groupidx1)) + eicraw <- getEIC(LC$xset@xcmsSet, groupidx = c(groupidx1), rt = "raw") + for (i in 1:length(groupidx1)) + { + figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="") + html <- paste(html,"<img src='", "figure", i,".png' />", sep="") + png( figureName ) + plot(eiccor, LC$xset@xcmsSet, groupidx = i) + devname = dev.off() + } + } + + + html <- paste(html,"</body><html>") + message("finished generating report") + write(html,file=args.htmlReportFile) + # unlink(args.htmlReportFile) + cat("\nWarnings================:\n") + str( warnings() ) + }, + error=function(cond) { + sink(NULL, type="message") # default setting + sink(stderr(), type="output") + message("\nERROR: ===========\n") + print(cond) + } + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metams_lcms_annotate.xml Thu Nov 06 16:14:44 2014 +0100 @@ -0,0 +1,136 @@ +<tool id="metams_lcms_annotate" name="METAMS-LC/MS Annotate" version="0.0.3"> + <description> Runs metaMS process for LC/MS feature grouping and annotation</description> + <requirements> + <requirement type="package" version="3.1.1">R_bioc_metams</requirement> + </requirements> + <command interpreter="Rscript"> + metaMS_cmd_interface.r + $constructed_db + $data_files + $customMetaMSsettings + $outputFile + $outputLog + $xsetOut + $htmlReportFile + $htmlReportFile.files_path + </command> +<inputs> + <param name="constructed_db" type="select" label="Constructed DB" help="Reference annotation database generated from matching measurements of a mixture of chemical standards + against a manually validated reference table which contains the key analytical information for each standard." + dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/metaMS")'/> + + <param name="data_files" type="data" format="prims.fileset.zip" label="Data files (.zip file with CDFs)" help=".zip file containing the CDF files of the new measurements"/> + + + + <param name="protocolName" type="text" size="30" label="protocolName" value="Synapt.QTOF.RP" help="protocolName"/> + + <param name="method" type="select" size="30" label="PEAK PICKING method ====================================================="> + <option value="matchedFilter" selected="true">matchedFilter</option> + </param> + <param name="step" type="float" size="10" value="0.05" label="step" help="step"/> + <param name="fwhm" type="integer" size="10" value="20" label="fwhm" help="fwhm" /> + <param name="snthresh" type="integer" size="10" value="4" label="snthresh" help="snthresh" /> + <param name="max" type="integer" size="10" value="50" label="max" help="max" /> + + <param name="min_class_fraction" type="float" size="10" value="0.3" label="ALIGNMENT min.class.fraction =====================================================" help="min.class.fraction"/> + <param name="min_class_size" type="integer" size="10" value="3" label="min.class.size" help="min.class.size" /> + <param name="mzwid" type="float" size="10" value="0.1" label="mzwid" help="mzwid"/> + <param name="bws" type="text" size="10" value="30,10" label="bws" help="bws"/> + <param name="missingratio" type="float" size="10" value="0.2" label="missingratio" help="missingratio"/> + <param name="extraratio" type="float" size="10" value="0.1" label="extraratio" help="extraratio"/> + <param name="retcormethod" type="select" size="30" label="retcormethod" help="retcormethod"> + <option value="linear" selected="true">linear</option> + </param> + <param name="retcorfamily" type="select" size="30" label="retcorfamily" help="retcorfamily"> + <option value="symmetric" selected="true">symmetric</option> + </param> + <param name="fillPeaks" type="select" size="30" label="fillPeaks" help="fillPeaks"> + <option value="TRUE" selected="true">Yes</option> + <option value="FALSE">No</option> + </param> + <param name="perfwhm" type="float" size="10" value="0.6" label="CAMERA perfwhm =====================================================" help="perfwhm"/> + <param name="cor_eic_th" type="float" size="10" value="0.7" label="cor_eic_th" help="cor_eic_th" /> + <param name="ppm" type="float" size="10" value="5.0" label="ppm" help="ppm" /> + <param name="rtdiff" type="float" size="10" value="1.5" label="MATCH2DB rtdiff =====================================================" help="rtdiff"/> + <param name="rtval" type="float" size="10" value="0.1" label="rtval" help="rtval" /> + <param name="mzdiff" type="float" size="10" value="0.005" label="mzdiff" help="mzdiff" /> + <param name="match2DB_ppm" type="float" size="10" value="5.0" label="ppm" help="ppm" /> + <param name="minfeat" type="integer" size="10" value="2" label="minfeat" help="minfeat" /> + +</inputs> +<configfiles> + +<configfile name="customMetaMSsettings">## start comment + ## metaMS process settings + customMetaMSsettings <- metaMSsettings(protocolName = "${protocolName}", + chrom = "LC", + PeakPicking = list( + method = "${method}", + step = ${step}, + fwhm = ${fwhm}, + snthresh = ${snthresh}, + max = ${max}), + Alignment = list( + min.class.fraction = ${min_class_fraction}, + min.class.size = ${min_class_size}, + mzwid = ${mzwid}, + bws = c(${bws}), + missingratio = ${missingratio}, + extraratio = ${extraratio}, + retcormethod = "${retcormethod}", + retcorfamily = "${retcorfamily}", + fillPeaks = ${fillPeaks}), + CAMERA = list( + perfwhm = ${perfwhm}, + cor_eic_th = ${cor_eic_th}, + ppm= ${ppm})) +metaSetting(customMetaMSsettings, "match2DB") <- list( + rtdiff = ${rtdiff}, + rtval = ${rtval}, + mzdiff = ${mzdiff}, + ppm = ${match2DB_ppm}, + minfeat = ${minfeat})</configfile> + +</configfiles> + +<outputs> + <data name="outputFile" format="tabular" label="${tool.name} on ${on_string} - metaMS annotated file (TSV)"/> + <data name="outputLog" format="txt" label="${tool.name} on ${on_string} - metaMS LOG"/> + <data name="xsetOut" format="rdata" label="${tool.name} on ${on_string} - metaMS xcmsSet (RDATA)"/> + <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - metaMS report (HTML)"/> +</outputs> +<tests> + <test> + </test> +</tests> +<code file="match_library.py" /> <!-- file containing get_directory_files function used above--> +<help> + +.. class:: infomark + +Runs metaMS process for LC/MS feature grouping and annotation. Parts of the metaMS process also make use of the XCMS and CAMERA tools and algorithms. +The figure below shows the main parts of the metaMS process. + +.. image:: $PATH_TO_IMAGES/metaMS.png + + +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +Wehrens, R.; Weingart, G.; Mattivi, F. (2014). +metaMS: an open-source pipeline for GC-MS-based untargeted metabolomics. +Journal of chromatography B: biomedical sciences and applications, 996 (1): 109-116. +doi: 10.1016/j.jchromb.2014.02.051 +handle: http://hdl.handle.net/10449/24012 + + + </help> + <citations> + <citation type="doi">10.1016/j.jchromb.2014.02.051</citation> <!-- example + see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set + --> + </citations> +</tool> \ No newline at end of file
--- a/msclust.xml Fri Sep 19 16:14:58 2014 +0200 +++ b/msclust.xml Thu Nov 06 16:14:44 2014 +0100 @@ -327,6 +327,15 @@ .. _Click here for more details on the SIM output file: javascript:window.open('.. image:: sample_SIM.png'.replace('.. image:: ', ''),'popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes') +**References** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +Y. M. Tikunov, S. Laptenok, R. D. Hall, A. Bovy, and R. C. H. de Vos (2012). +MSClust: a tool for unsupervised mass spectra extraction of +chromatography-mass spectrometry ion-wise aligned data +http://dx.doi.org/10.1007%2Fs11306-011-0368-2 </help> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Nov 06 16:14:44 2014 +0100 @@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<tool_dependency> +<!-- see also http://wiki.galaxyproject.org/ToolShedToolFeatures for syntax help + --> + <package name="R_bioc_metams" version="3.1.1"> + <repository changeset_revision="e6171a39dd04" name="prims_metabolomics_r_dependencies" owner="pieterlukasse" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> + <readme> + This dependency: + Ensures R 3.1.1 installation is triggered (via dependency). + Ensures Bioconductor 3.0 and package metaMS, multtest and snow are installed. + </readme> +</tool_dependency> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms_differential_analysis.r Thu Nov 06 16:14:44 2014 +0100 @@ -0,0 +1,72 @@ +## read args: +args <- commandArgs(TRUE) +#cat("args <- \"\"\n") +## a xcms xset saved as .RData +args.xsetData <- args[1] +#cat(paste("args.xsetData <- \"", args[1], "\"\n", sep="")) + +args.class1 <- args[2] +args.class2 <- args[3] +#cat(paste("args.class1 <- \"", args[2], "\"\n", sep="")) +#cat(paste("args.class2 <- \"", args[3], "\"\n", sep="")) + +args.topcount <- strtoi(args[4]) +#cat(paste("args.topcount <- ", args[4], "\n", sep="")) + +args.outTable <- args[5] +args.outLogFile <- args[6] +#cat(paste("args.outLogFile <- \"", args[6], "\"\n", sep="")) + +## report files +args.htmlReportFile <- args[7] +args.htmlReportFile.files_path <- args[8] +#cat(paste("args.htmlReportFile <- \"", args[7], "\"\n", sep="")) +#cat(paste("args.htmlReportFile.files_path <- \"", args[8], "\"\n", sep="")) + +# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888 +msg <- file(args.outLogFile, open="wt") +sink(msg, type="message") +sink(msg, type="output") + +tryCatch( + { + library(metaMS) + library(xcms) + #library("R2HTML") + + ## load the constructed DB : + xcmsSet <- readRDS(args.xsetData) + + # info: levels(xcmsSet@phenoData$class) also gives access to the class names + dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE) + reporttab <- diffreport(xcmsSet, args.class1, args.class2, paste(args.htmlReportFile.files_path,"/fig", sep=""), args.topcount, metlin = 0.15, h=480, w=640) + + # write out tsv table: + write.table(reporttab, args.outTable, sep="\t", row.names=FALSE) + + message("\nGenerating report.........") + + cat("<html><body><h1>Differential analysis report</h1>", file= args.htmlReportFile) + #HTML(reporttab[1:args.topcount,], file= args.htmlReportFile) + figuresPath <- paste(args.htmlReportFile.files_path, "/fig_eic", sep="") + message(figuresPath) + listOfFiles <- list.files(path = figuresPath) + for (i in 1:length(listOfFiles)) + { + figureName <- listOfFiles[i] + # maybe we still need to copy the figures to the args.htmlReportFile.files_path + cat(paste("<img src='fig_eic/", figureName,"' />", sep=""), file= args.htmlReportFile, append=TRUE) + cat(paste("<img src='fig_box/", figureName,"' />", sep=""), file= args.htmlReportFile, append=TRUE) + } + + message("finished generating report") + cat("\nWarnings================:\n") + str( warnings() ) + }, + error=function(cond) { + sink(NULL, type="message") # default setting + sink(stderr(), type="output") + message("\nERROR: ===========\n") + print(cond) + } + ) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcms_differential_analysis.xml Thu Nov 06 16:14:44 2014 +0100 @@ -0,0 +1,55 @@ +<tool id="xcms_differential_analysis" name="XCMS Differential Analsysis" version="0.0.1"> + <description> Runs xcms diffreport function for differential Analsysis</description> + <requirements> + <requirement type="package" version="3.1.1">R_bioc_metams</requirement> + </requirements> + <command interpreter="Rscript"> + xcms_differential_analysis.r + $xsetData + $class1 + $class2 + $topcount + $outTable + $outLogFile + $htmlReportFile + $htmlReportFile.files_path + </command> +<inputs> + + <param name="xsetData" type="data" format="rdata" label="xset xcms data file" help="E.g. output data file resulting from METAMS run"/> + + + <param name="class1" type="text" size="30" label="Class1 name" value="" help="Name of first class for the comparison"/> + <param name="class2" type="text" size="30" label="Class2 name" value="" help="Name of second class for the comparison"/> + + <param name="topcount" type="integer" size="10" value="10" label="Number of items to return" help="Top X differential items. E.g. if 10, it will return top 10 differential items." /> + +</inputs> +<outputs> + <data name="outTable" format="tabular" label="${tool.name} on ${on_string} - Top differential items (TSV)"/> + <data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - differential log (LOG)"/> + <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - differential report (HTML)"/> +</outputs> +<tests> + <test> + </test> +</tests> +<help> + +.. class:: infomark + +Runs xcms diffreport for showing the most significant differences between two sets/classes of samples. This tool also creates extracted ion chromatograms (EICs) for +the most significant differences. The figure below shows an example of such an EIC. + +.. image:: $PATH_TO_IMAGES/diffreport.png + + + + + </help> + <citations> + <citation type="doi">10.1021/ac051437y</citation> <!-- example + see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set + --> + </citations> +</tool> \ No newline at end of file