changeset 41:e67149fbff20

small changes/improvements; new metams and xcms tools
author pieter.lukasse@wur.nl
date Thu, 06 Nov 2014 16:14:44 +0100
parents a7b609941846
children 664ccd5f7cf8
files combine_output.xml library_lookup.xml match_library.py metaMS_cmd_interface.r metams_lcms_annotate.xml msclust.xml static/images/diffreport.png static/images/metaMS.png tool_dependencies.xml xcms_differential_analysis.r xcms_differential_analysis.xml
diffstat 11 files changed, 397 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/combine_output.xml	Fri Sep 19 16:14:58 2014 +0200
+++ b/combine_output.xml	Thu Nov 06 16:14:44 2014 +0100
@@ -4,10 +4,11 @@
     combine_output.py $rankfilter_in $caslookup_in $out_single $out_multi
   </command>
   <inputs>
-    <param format="tabular" name="caslookup_in" type="data" label="RIQC-Lookup RI for CAS output"
+    <param format="tabular" name="rankfilter_in" type="data" label="RIQC-RankFilter output (Estimated RI)" 
+    	help="Select the output file from the RankFilter tool"/>
+    <param format="tabular" name="caslookup_in" type="data" label="RIQC-Lookup RI for CAS output ('Known' RI)"
     	help="Select the output file from the CasLookup tool"/>
-    <param format="tabular" name="rankfilter_in" type="data" label="RIQC-RankFilter output" 
-    	help="Select the output file from the RankFilter tool"/>
+    <!--  <param TODO : could add "tolerance for ERI-KRI"(Estimated RI-Known RI)--> 
   </inputs>
   <outputs>
     <data format="tabular" label="${tool.name} (Single) on ${on_string}" name="out_single" />
--- a/library_lookup.xml	Fri Sep 19 16:14:58 2014 +0200
+++ b/library_lookup.xml	Thu Nov 06 16:14:44 2014 +0100
@@ -13,16 +13,23 @@
     $regression.model	
   </command>
   <inputs>
+  <!-- Regarding the <page> items: this blocks the use of this tool in Galaxy workflows. However, 
+       alternatives like wrapping this in conditionals, repeats (to force a refresh_on_change as this option 
+       is not working on its own) failed since the workflow editor does not support refreshes...not does the 
+       workflow runtime support conditionals or repeats to be set at runtime. See also 
+       galaxy-dev mail thread "when else" in <conditional> ? RE: refresh_on_change : is this a valid attribute? Any other ideas/options??"  -->
     <page>
       <param format="tabular" name="input" type="data" label="NIST identifications as tabular file" 
       		 help="Select a tab delimited NIST metabolite identifications file (converted from PDF)" />
       <param name="library_file" type="select" label="CAS x RI Library file" 
       		 help="Select a library/lookup file containing RI values for CAS numbers on various chromatography columns " 
       		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RI_DB_libraries")'/>
+    </page>
+    <page>
       <param name="col_type" type="select" label="Select column type" refresh_on_change="true"
 	         display="radio" dynamic_options='get_column_type(library_file)'
 	         help="" />
-    </page>
+	</page>
     <page>
       <param name="polarity" type="select" label="Select polarity" refresh_on_change="true"
              display="radio" dynamic_options='filter_column(library_file,col_type)'
--- a/match_library.py	Fri Sep 19 16:14:58 2014 +0200
+++ b/match_library.py	Thu Nov 06 16:14:44 2014 +0100
@@ -108,7 +108,7 @@
     fill a Galaxy drop-down combo box.
     
     '''
-    files = glob.glob(dir_name + "/*.txt")
+    files = glob.glob(dir_name + "/*.*")
     if len(files) == 0:
         # Configuration error: no library files found in <galaxy-home-dir>/" + dir_name :
         galaxy_output = [("Configuration error: expected file not found in <galaxy-home-dir>/" + dir_name, "", False)]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metaMS_cmd_interface.r	Thu Nov 06 16:14:44 2014 +0100
@@ -0,0 +1,99 @@
+## read args:
+args <- commandArgs(TRUE)
+## the constructed DB, e.g. "E:/Rworkspace/metaMS/data/LCDBtest.RData"
+args.constructedDB <- args[1]
+## data files, e.g. "E:/Rworkspace/metaMS/data/data.zip" (with e.g. .CDF files) and unzip output dir, e.g. "E:/"
+args.dataZip <- args[2]
+args.zipExtrDir <- paste(args[2],"dir/")
+## settings file, e.g. "E:/Rworkspace/metaMS/data/settings.r", should contain assignment to an object named "customMetaMSsettings" 
+args.settings <- args[3]
+
+## output file names, e.g. "E:/Rworkspace/metaMS/data/out.txt"
+args.outAnnotationTable <- args[4]
+args.outLogFile <- args[5]
+args.xsetOut <- args[6]
+
+## report files
+args.htmlReportFile <- args[7]
+args.htmlReportFile.files_path <- args[8]
+
+# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
+msg <- file(args.outLogFile, open="wt")
+sink(msg, type="message") 
+sink(msg, type="output")
+
+cat("\nSettings used===============:\n")
+cat(readChar(args.settings, 1e5))
+
+
+tryCatch(
+        {
+	        library(metaMS)
+	
+			## load the constructed DB :
+			tempEnv <- new.env()
+			testDB <- load(args.constructedDB, envir=tempEnv)
+			
+			## load the data files from a zip file
+			files <- unzip(args.dataZip, exdir=args.zipExtrDir)
+			
+			## load settings "script" into "customMetaMSsettings" 
+			source(args.settings, local=tempEnv)
+			message(paste(" loaded : ", args.settings))
+			
+			# Just to highlight: if you want to use more than one 
+			# trigger runLC: 
+			LC <- runLC(files, settings = tempEnv[["customMetaMSsettings"]], DB = tempEnv[[testDB[1]]]$DB, nSlaves=20, returnXset = TRUE)
+			
+			# write out runLC annotation results:
+			write.table(LC$Annotation$annotation.table, args.outAnnotationTable, sep="\t", row.names=FALSE)
+			
+			# the used constructed DB (write to log):
+			cat("\nConstructed DB info===============:\n")
+			str(tempEnv[[testDB[1]]]$Info)
+			cat("\nConstructed DB table===============:\n") 
+			write.table(tempEnv[[testDB[1]]]$DB, args.outLogFile, append=TRUE, row.names=FALSE)
+			write.table(tempEnv[[testDB[1]]]$Reftable, args.outLogFile, sep="\t", append=TRUE, row.names=FALSE)
+			# save xset as rdata:
+			xsetData <- LC$xset@xcmsSet
+			saveRDS(xsetData, file=args.xsetOut)
+			
+			message("\nGenerating report.........")
+			# report
+			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE)
+			setwd(file.path(args.htmlReportFile.files_path))
+			html <- "<html><body><h1>Extracted Ion Chromatograms of groups with more than 3 peaks</h1>" 
+			
+			LC$xset@xcmsSet
+			gt <- groups(LC$xset@xcmsSet)
+			colnames(gt)
+			groupidx1 <- which(gt[,"rtmed"] > 0 & gt[,"rtmed"] < 3000 & gt[,"npeaks"] > 3)
+			if (length(groupidx1) > 0)
+			{
+				eiccor <- getEIC(LC$xset@xcmsSet, groupidx = c(groupidx1))
+				eicraw <- getEIC(LC$xset@xcmsSet, groupidx = c(groupidx1), rt = "raw")
+				for (i in 1:length(groupidx1)) 
+				{
+					figureName <- paste(args.htmlReportFile.files_path, "/figure", i,".png", sep="")
+					html <- paste(html,"<img src='", "figure", i,".png' />", sep="") 
+					png( figureName ) 
+					plot(eiccor, LC$xset@xcmsSet, groupidx = i)
+					devname = dev.off()
+				}
+			}
+			
+			
+			html <- paste(html,"</body><html>")
+			message("finished generating report")
+			write(html,file=args.htmlReportFile)
+			# unlink(args.htmlReportFile)
+			cat("\nWarnings================:\n")
+			str( warnings() ) 
+		},
+        error=function(cond) {
+            sink(NULL, type="message") # default setting
+			sink(stderr(), type="output")
+            message("\nERROR: ===========\n")
+            print(cond)
+        }
+    ) 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metams_lcms_annotate.xml	Thu Nov 06 16:14:44 2014 +0100
@@ -0,0 +1,136 @@
+<tool id="metams_lcms_annotate" name="METAMS-LC/MS Annotate"  version="0.0.3">
+	<description> Runs metaMS process for LC/MS feature grouping and annotation</description>
+	<requirements>
+		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
+	</requirements>	
+	<command interpreter="Rscript">
+		metaMS_cmd_interface.r 
+	    $constructed_db
+	    $data_files
+	    $customMetaMSsettings
+	    $outputFile 
+	    $outputLog
+	    $xsetOut
+	    $htmlReportFile
+	    $htmlReportFile.files_path
+	</command>
+<inputs>
+	<param name="constructed_db" type="select" label="Constructed DB" help="Reference annotation database generated from matching measurements of a mixture of chemical standards
+	against a manually validated reference table which contains the key analytical information for each standard." 
+      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/metaMS")'/>
+	
+	<param name="data_files" type="data" format="prims.fileset.zip" label="Data files (.zip file with CDFs)" help=".zip file containing the CDF files of the new measurements"/>
+	
+	
+	
+	<param name="protocolName" type="text" size="30" label="protocolName" value="Synapt.QTOF.RP" help="protocolName"/>
+	
+	<param name="method" type="select" size="30" label="PEAK PICKING method =====================================================">
+		<option value="matchedFilter" selected="true">matchedFilter</option>
+	</param>
+	<param name="step" type="float" size="10" value="0.05" label="step" help="step"/>
+	<param name="fwhm" type="integer" size="10" value="20" label="fwhm" help="fwhm" />
+	<param name="snthresh" type="integer" size="10" value="4" label="snthresh" help="snthresh" />
+	<param name="max" type="integer" size="10" value="50" label="max" help="max" />
+	
+	<param name="min_class_fraction" type="float" size="10" value="0.3" label="ALIGNMENT min.class.fraction =====================================================" help="min.class.fraction"/>
+	<param name="min_class_size" type="integer" size="10" value="3" label="min.class.size" help="min.class.size" />
+	<param name="mzwid" type="float" size="10" value="0.1" label="mzwid" help="mzwid"/>
+	<param name="bws" type="text" size="10" value="30,10" label="bws" help="bws"/>
+	<param name="missingratio" type="float" size="10" value="0.2" label="missingratio" help="missingratio"/>
+	<param name="extraratio" type="float" size="10" value="0.1" label="extraratio" help="extraratio"/>
+	<param name="retcormethod" type="select" size="30" label="retcormethod" help="retcormethod">
+		<option value="linear" selected="true">linear</option>
+	</param>
+	<param name="retcorfamily" type="select" size="30" label="retcorfamily" help="retcorfamily">
+		<option value="symmetric" selected="true">symmetric</option>
+	</param>
+	<param name="fillPeaks" type="select" size="30" label="fillPeaks" help="fillPeaks">
+		<option value="TRUE" selected="true">Yes</option>
+		<option value="FALSE">No</option>
+	</param>
+	<param name="perfwhm" type="float" size="10" value="0.6" label="CAMERA perfwhm =====================================================" help="perfwhm"/>
+	<param name="cor_eic_th" type="float" size="10" value="0.7" label="cor_eic_th" help="cor_eic_th" />
+	<param name="ppm" type="float" size="10" value="5.0" label="ppm" help="ppm" />
+	<param name="rtdiff" type="float" size="10" value="1.5" label="MATCH2DB rtdiff =====================================================" help="rtdiff"/>
+	<param name="rtval" type="float" size="10" value="0.1" label="rtval" help="rtval" />
+	<param name="mzdiff" type="float" size="10" value="0.005" label="mzdiff" help="mzdiff" />
+	<param name="match2DB_ppm" type="float" size="10" value="5.0" label="ppm" help="ppm" />
+	<param name="minfeat" type="integer" size="10" value="2" label="minfeat" help="minfeat" />
+	
+</inputs>
+<configfiles>
+
+<configfile name="customMetaMSsettings">## start comment
+		## metaMS process settings
+		customMetaMSsettings &lt;- metaMSsettings(protocolName = "${protocolName}",
+                            chrom = "LC",
+                            PeakPicking = list(
+                              method = "${method}",
+                              step = ${step},
+                              fwhm = ${fwhm},
+                              snthresh = ${snthresh},
+                              max = ${max}),
+                            Alignment = list(
+                              min.class.fraction = ${min_class_fraction},
+                              min.class.size = ${min_class_size},
+                              mzwid = ${mzwid},
+                              bws = c(${bws}),
+                              missingratio = ${missingratio},
+                              extraratio = ${extraratio},
+                              retcormethod = "${retcormethod}",
+                              retcorfamily = "${retcorfamily}",            
+                              fillPeaks = ${fillPeaks}),
+                            CAMERA = list(
+                              perfwhm = ${perfwhm},
+                              cor_eic_th = ${cor_eic_th},
+                              ppm= ${ppm}))
+metaSetting(customMetaMSsettings, "match2DB") &lt;- list(
+            rtdiff = ${rtdiff},
+            rtval = ${rtval},
+            mzdiff = ${mzdiff},
+            ppm = ${match2DB_ppm},
+            minfeat = ${minfeat})</configfile>
+
+</configfiles>
+
+<outputs>
+	<data name="outputFile" format="tabular" label="${tool.name} on ${on_string} - metaMS annotated file (TSV)"/>
+	<data name="outputLog" format="txt" label="${tool.name} on ${on_string} - metaMS LOG"/>
+	<data name="xsetOut" format="rdata" label="${tool.name} on ${on_string} - metaMS xcmsSet (RDATA)"/>
+	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - metaMS report (HTML)"/>
+</outputs>
+<tests>
+	<test>
+	</test>
+</tests>
+<code file="match_library.py" /> <!-- file containing get_directory_files function used above-->
+<help>
+
+.. class:: infomark
+  
+Runs metaMS process for LC/MS feature grouping and annotation. Parts of the metaMS process also make use of the XCMS and CAMERA tools and algorithms.
+The figure below shows the main parts of the metaMS process. 
+
+.. image:: $PATH_TO_IMAGES/metaMS.png 
+
+
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Wehrens, R.; Weingart, G.; Mattivi, F. (2014). 
+metaMS: an open-source pipeline for GC-MS-based untargeted metabolomics. 
+Journal of chromatography B: biomedical sciences and applications, 996 (1): 109-116. 
+doi: 10.1016/j.jchromb.2014.02.051 
+handle: http://hdl.handle.net/10449/24012
+
+
+  </help>
+  <citations>
+        <citation type="doi">10.1016/j.jchromb.2014.02.051</citation> <!-- example 
+        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
+        -->
+   </citations>
+</tool>
\ No newline at end of file
--- a/msclust.xml	Fri Sep 19 16:14:58 2014 +0200
+++ b/msclust.xml	Thu Nov 06 16:14:44 2014 +0100
@@ -327,6 +327,15 @@
 .. _Click here for more details on the SIM output file: javascript:window.open('.. image:: sample_SIM.png'.replace('.. image:: ', ''),'popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
 
 
+**References**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Y. M. Tikunov, S. Laptenok, R. D. Hall, A. Bovy, and R. C. H. de Vos (2012).
+MSClust: a tool for unsupervised mass spectra extraction of 
+chromatography-mass spectrometry ion-wise aligned data
+http://dx.doi.org/10.1007%2Fs11306-011-0368-2
 
   </help>
 </tool>
Binary file static/images/diffreport.png has changed
Binary file static/images/metaMS.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Nov 06 16:14:44 2014 +0100
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<tool_dependency>
+<!-- see also http://wiki.galaxyproject.org/ToolShedToolFeatures for syntax help
+   -->
+  	<package name="R_bioc_metams" version="3.1.1">
+		<repository changeset_revision="e6171a39dd04" name="prims_metabolomics_r_dependencies" owner="pieterlukasse" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu" />
+	</package>
+	<readme>
+				This dependency:
+				Ensures R 3.1.1 installation is triggered (via dependency). 
+				Ensures Bioconductor 3.0 and package metaMS, multtest and snow are installed. 
+      </readme>
+</tool_dependency>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms_differential_analysis.r	Thu Nov 06 16:14:44 2014 +0100
@@ -0,0 +1,72 @@
+## read args:
+args <- commandArgs(TRUE)
+#cat("args <- \"\"\n")
+## a xcms xset saved as .RData
+args.xsetData <- args[1]
+#cat(paste("args.xsetData <- \"", args[1], "\"\n", sep=""))
+
+args.class1 <- args[2]
+args.class2 <- args[3]
+#cat(paste("args.class1 <- \"", args[2], "\"\n", sep=""))
+#cat(paste("args.class2 <- \"", args[3], "\"\n", sep=""))
+
+args.topcount <- strtoi(args[4]) 
+#cat(paste("args.topcount <- ", args[4], "\n", sep=""))
+
+args.outTable <- args[5]
+args.outLogFile <- args[6]
+#cat(paste("args.outLogFile <- \"", args[6], "\"\n", sep=""))
+
+## report files
+args.htmlReportFile <- args[7]
+args.htmlReportFile.files_path <- args[8]
+#cat(paste("args.htmlReportFile <- \"", args[7], "\"\n", sep=""))
+#cat(paste("args.htmlReportFile.files_path <- \"", args[8], "\"\n", sep=""))
+
+# Send all STDERR to STDOUT using sink() see http://mazamascience.com/WorkingWithData/?p=888
+msg <- file(args.outLogFile, open="wt")
+sink(msg, type="message") 
+sink(msg, type="output")
+
+tryCatch(
+        {
+        	library(metaMS)
+        	library(xcms)
+	        #library("R2HTML")
+	
+			## load the constructed DB :
+			xcmsSet <- readRDS(args.xsetData)
+			
+			# info: levels(xcmsSet@phenoData$class) also gives access to the class names
+			dir.create(file.path(args.htmlReportFile.files_path), showWarnings = FALSE)
+			reporttab <- diffreport(xcmsSet, args.class1, args.class2, paste(args.htmlReportFile.files_path,"/fig", sep=""), args.topcount, metlin = 0.15, h=480, w=640)
+			
+			# write out tsv table:
+			write.table(reporttab, args.outTable, sep="\t", row.names=FALSE)
+			
+			message("\nGenerating report.........")
+			
+			cat("<html><body><h1>Differential analysis report</h1>", file= args.htmlReportFile)
+			#HTML(reporttab[1:args.topcount,], file= args.htmlReportFile)
+			figuresPath <- paste(args.htmlReportFile.files_path, "/fig_eic", sep="")
+			message(figuresPath)
+			listOfFiles <- list.files(path = figuresPath)
+			for (i in 1:length(listOfFiles))  
+			{
+				figureName <- listOfFiles[i]
+				# maybe we still need to copy the figures to the args.htmlReportFile.files_path
+				cat(paste("<img src='fig_eic/", figureName,"' />", sep=""), file= args.htmlReportFile, append=TRUE)
+				cat(paste("<img src='fig_box/", figureName,"' />", sep=""), file= args.htmlReportFile, append=TRUE)
+			}
+			
+			message("finished generating report")
+			cat("\nWarnings================:\n")
+			str( warnings() ) 
+		},
+        error=function(cond) {
+            sink(NULL, type="message") # default setting
+			sink(stderr(), type="output")
+            message("\nERROR: ===========\n")
+            print(cond)
+        }
+    ) 			
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xcms_differential_analysis.xml	Thu Nov 06 16:14:44 2014 +0100
@@ -0,0 +1,55 @@
+<tool id="xcms_differential_analysis" name="XCMS Differential Analsysis"  version="0.0.1">
+	<description> Runs xcms diffreport function for differential Analsysis</description>
+	<requirements>
+		<requirement type="package" version="3.1.1">R_bioc_metams</requirement>
+	</requirements>	
+	<command interpreter="Rscript">
+		xcms_differential_analysis.r 
+	    $xsetData
+		$class1
+		$class2
+		$topcount
+		$outTable 
+		$outLogFile
+		$htmlReportFile
+		$htmlReportFile.files_path
+	</command>
+<inputs>
+	
+	<param name="xsetData" type="data" format="rdata" label="xset xcms data file" help="E.g. output data file resulting from METAMS run"/>
+	
+	
+	<param name="class1" type="text" size="30" label="Class1 name" value="" help="Name of first class for the comparison"/>
+	<param name="class2" type="text" size="30" label="Class2 name" value="" help="Name of second class for the comparison"/>
+	
+	<param name="topcount" type="integer" size="10" value="10" label="Number of items to return" help="Top X differential items. E.g. if 10, it will return top 10 differential items." />
+	
+</inputs>
+<outputs>
+	<data name="outTable" format="tabular" label="${tool.name} on ${on_string} - Top differential items (TSV)"/>
+	<data name="outLogFile" format="txt" label="${tool.name} on ${on_string} - differential log (LOG)"/>
+	<data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - differential report (HTML)"/>
+</outputs>
+<tests>
+	<test>
+	</test>
+</tests>
+<help>
+
+.. class:: infomark
+  
+Runs xcms diffreport for showing the most significant differences between two sets/classes of samples. This tool also creates extracted ion chromatograms (EICs) for 
+the most significant differences. The figure below shows an example of such an EIC.
+
+.. image:: $PATH_TO_IMAGES/diffreport.png 
+
+
+
+
+  </help>
+  <citations>
+        <citation type="doi">10.1021/ac051437y</citation> <!-- example 
+        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
+        -->
+   </citations>
+</tool>
\ No newline at end of file