changeset 0:054e4681667c draft

planemo upload
author lecorguille
date Fri, 07 Aug 2015 10:49:35 -0400
parents
children 1592925c2e82
files Makefile abims_xcms_xcmsSet.xml planemo.sh repository_dependencies.xml static/images/XCMS_Galaxy_workflow.png static/images/xcms_tics.png test-data/sacuri.zip test-data/sampleMetadata.tsv test-data/xset.BPCs_raw.pdf test-data/xset.RData test-data/xset.TICs_raw.pdf test-data/xset.log.txt tool_dependencies.xml xcms_set.tgz
diffstat 14 files changed, 510 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Makefile	Fri Aug 07 10:49:35 2015 -0400
@@ -0,0 +1,23 @@
+# USAGE: make [install|clean]
+
+# -------- VARIABLE --------
+
+OBJ=xcms_set.tgz
+DEP=abims_xcms_xcmsSet.xml tool_dependencies.xml repository_dependencies.xml static test-data
+
+
+# ------------------------
+
+all: $(OBJ)
+
+$(OBJ): $(DEP)
+	tar --exclude=".svn" -zchf $@ $^
+
+# ------------------------
+
+install: $(OBJ)
+	mv *.tgz ~
+
+clean:
+	rm *.tgz
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abims_xcms_xcmsSet.xml	Fri Aug 07 10:49:35 2015 -0400
@@ -0,0 +1,403 @@
+<tool id="abims_xcms_xcmsSet" name="xcms.xcmsSet" version="2.0.2">
+    
+    <description>Filtration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis </description>
+    
+    <requirements>
+        <requirement type="package" version="3.1.2">R</requirement>
+        <requirement type="binary">Rscript</requirement>
+        <requirement type="package" version="1.44.0">xcms</requirement>
+        <requirement type="package" version="2.1">xcms_w4m_script</requirement>
+    </requirements>
+    
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+    
+    <command>
+        xcms.r
+        #if $inputs.input == "lib":
+            library $__app__.config.user_library_import_dir/$__user_email__/$inputs.library
+        #elif $inputs.input == "zip_file":
+            zipfile $inputs.zip_file
+        #end if
+
+        xfunction xcmsSet
+        ## profmethod $profmethod 
+        nSlaves \${GALAXY_SLOTS:-1} method $methods.method 
+        #if $methods.method == "centWave":
+            ppm $methods.ppm
+            peakwidth "c($methods.peakwidth)"
+        #if $methods.options_scanrange.option == "show":
+                scanrange "c($methods.options_scanrange.scanrange)"
+            #end if 
+            #if $methods.options_c.option == "show":
+                mzdiff $methods.options_c.mzdiff
+                snthresh $methods.options_c.snthresh
+                integrate $methods.options_c.integrate
+                noise $methods.options_c.noise
+                prefilter "c($methods.options_c.prefilter)"
+            #end if
+        #elif $methods.method == "matchedFilter":
+            step $methods.step
+            fwhm $methods.fwhm
+            #if $methods.options_m.option == "show":
+                ## sigma "$methods.options_m.sigma"
+                max $methods.options_m.max
+                snthresh $methods.options_m.snthresh
+                ## mzdiff $methods.options_m.mzdiff
+                steps $methods.options_m.steps
+                ## sleep $methods.options_m.sleep
+            #end if
+        #elif $methods.method == "MSW":
+            snthr $methods.snthr
+            nearbyPeak $methods.nearbyPeak
+            winSize.noise $methods.winSize_noise
+            amp.Th $methods.amp_Th
+            scales "c($methods.scales)"
+            SNR.method "$methods.SNR_method"
+        #end if
+        &amp;&amp; (mv xcmsSet.RData $xsetRData;
+        mv sampleMetadata.tsv $sampleMetadata;
+        mv TICs_raw.pdf $ticsRawPdf;
+        mv BPCs_raw.pdf $bpcsRawPdf;
+        mv xset.log $log);
+        cat $log
+    </command>
+    
+    <inputs>
+
+        <conditional name="inputs">
+            <param name="input" type="select" label="Choose your inputs method" >
+                <option value="zip_file" selected="true">Zip file from your history containing your chromatograms</option>
+                <option value="lib" >Library directory name</option>
+            </param>
+            <when value="zip_file">
+                <param name="zip_file" type="data" format="no_unzip.zip" label="Zip file" />
+             </when>
+            <when value="lib">
+                <param name="library" type="text" size="40" label="Library directory name" help="The name of your directory containing all your data" >
+                <validator type="empty_field"/> 
+            </param>
+                </when>
+
+        </conditional>
+
+       
+<!--
+        <param name="profmethod" type="select" label="Method to use for profile generation (profmethod)" >
+            <option value="bin" selected="true">bin</option>
+            <option value="binlin">binlin</option>
+            <option value="binlinbase">binlinbase</option>
+            <option value="intlin">intlin</option>
+        </param>
+        <param name="nSlaves" type="integer" value="9" label="MPI-slaves CPU" help="number of MPI-slaves to use for parallel peak detection" />
+-->
+        <conditional name="methods">
+            <param name="method" type="select" label="Extraction method for peaks detection" help="[method] See the help section below">
+                <option value="centWave" >centWave</option>
+                <option value="matchedFilter" selected="true">matchedFilter</option>
+                <option value="MSW">MSW</option>
+            </param>
+
+            <!-- centWave Filter options -->
+            <when value="centWave">
+                <param name="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="[ppm]" />
+                <param name="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="[peakwidth]" />
+                <conditional name="options_scanrange">
+                    <param name="option" type="select" label="Scan range option " >
+                        <option value="show">show</option>
+                        <option value="hide" selected="true">hide</option>
+                    </param>
+                    <when value="show">
+                        <param name="scanrange" type="text" value="" label="scanrange" help="scan range to process, for example (16,365)" >
+                            <validator type="empty_field"/> 
+                        </param>
+                    </when>
+                </conditional>
+                        
+                <conditional name="options_c">
+                    <param name="option" type="select" label="Advanced options" >
+                        <option value="show">show</option>
+                        <option value="hide" selected="true">hide</option>
+                    </param>
+                    <when value="show">
+                        <param name="snthresh" type="integer" value="10" label="Signal/Noise threshold" help="[snthresh] Signal to noise ratio cutoff" />
+                        <param name="mzdiff" type="float" value="-0.001" label="Min m/z difference" help="[mzdiff] Min m/z difference for peaks with overlapping RT " />
+                        <param name="integrate" type="select" label="peak limits method" help="[integrate]" >
+                            <option value="1">peak limits based on smoothed 2nd derivative (less precise)</option>
+                            <option value="2">peak limits based on real data (more sensitive to noise)</option>
+                        </param>
+                        <param name="prefilter" type="text" value="3,100" label="Prefilter step for the first phase" help="[prefilter] Separate by coma k,I. Mass traces are only retained if they contain at least ‘k’ peaks with intensity >= ‘I’"/>
+                        <param name="noise" type="integer" value="0" label="Noise filter" help="[noise] optional argument which is useful for data that was centroided without any intensity threshold, centroids with intensity smaller than ‘noise’ are omitted from ROI detection"/>
+                    </when>
+                </conditional>
+            </when>
+
+        <!-- matched Filter options -->
+            <when value="matchedFilter">
+                <param name="step" type="float" value="0.01" label="Step size to use for profile generation" help="[step] The peak detection algorithm creates extracted ion base peak chromatograms (EIBPC) on a fixed step size" />
+                <param name="fwhm" type="integer" value="30" label="Full width at half maximum of matched filtration gaussian model peak" help="[fwhm] Only used to calculate the actual sigma" />
+                <conditional name="options_m">
+                    <param name="option" type="select" label="Advanced options" >
+                        <option value="show">show</option>
+                        <option value="hide" selected="true">hide</option>
+                    </param>
+                    <when value="show">
+<!--
+                        <param name="sigma" type="hidden" value="fwhm/2.3548" label="sigma" help="standard deviation (fwhm/2.3548)" />
+-->
+                        <param name="max" type="integer" value="5" label="Maximum number of peaks per extracted ion chromatogram" help="[max]" />
+                        <param name="snthresh" type="integer" value="10" label="Signal to noise ratio cutoff" help="[snthresh]" />
+                        <param name="steps" type="integer" value="2" label="Number of steps to merge prior to filtration" help="[steps] The peak identification algorithm combines a given number of EIBPCs prior to filtration and peak detection, as defined by the steps argument" />
+<!--
+                        <param name="mzdiff" type="text" size="20" value="0.8-step*steps" label="m/z difference" help="min m/z difference for peaks with overlapping RT " />
+-->
+                    </when>
+                </conditional>
+            </when>
+
+        <!-- MSW Filter options -->
+            <when value="MSW">
+                <param name="nearbyPeak" type="select" label="Determine whether to include the nearby small peaks of major peaks" help="[nearbyPeak]" >
+                    <option value="TRUE">TRUE</option>
+                    <option value="FALSE">FALSE</option>
+                </param>
+                <param name="winSize_noise" type="integer" value="500" label="The local window size to estimate the noise level" help="[winSize.noise]" />
+                <param name="snthr" type="integer" value="3" label="SNR (Signal to Noise Ratio) threshold" help="[snthr]" />
+                <param name="amp_Th" type="float" value="0.002" label="Minimum required relative amplitude of the peak" help="[amp.Th] Ratio to the maximum of CWT coefficients" />
+                <param name="scales" type="text" value="seq(1,22,3)" label="Scales for the Continuous Wavelet Transform (CWT)" help="[scales] Scales are linked to the width of the peaks that are to be detected. Tape as indicaded seq('n,n,n') or c(n,n) : seq(from, to, by steps), c - linear vector " />
+                <param name="SNR_method" type="text" value="data.mean" label="SNR (Signal to Noise Ratio) method" help="[SNR.method] Method to estimate noise level. Currently, only 95 percentage quantile is supported." />
+            </when>
+        </conditional>
+    </inputs>
+    
+    <outputs>
+        <data name="xsetRData" format="rdata.xcms.raw" label="xset.RData" />
+        <data name="sampleMetadata" format="tabular" label="sampleMetadata.tsv" />
+        <data name="ticsRawPdf"   format="pdf" label="xset.TICs_raw.pdf" />
+        <data name="bpcsRawPdf"   format="pdf" label="xset.BPCs_raw.pdf" />
+        <data name="log" format="txt" label="xset.log.txt" />
+    </outputs>
+    
+    <tests>
+        <test>
+            <param name="inputs.input" value="zip_file" />
+            <param name="inputs.zip_file" value="sacuri.zip" />
+            <param name="methods.method" value="matchedFilter" />
+            <param name="methods.step" value="0.01" />
+            <param name="methods.fwhm" value="4" />
+            <param name="methods.options_m.option" value="show" />
+            <param name="methods.options_m.max" value="50" />
+            <param name="methods.options_m.snthresh" value="1" />
+            <param name="methods.options_m.steps" value="2" />
+            <output name="xsetRData" file="xset.RData" />
+            <output name="sampleMetadata" file="sampleMetadata.tsv" />
+            <output name="ticsRawPdf" file="xset.TICs_raw.pdf" />
+            <output name="bpcsRawPdf" file="xset.BPCs_raw.pdf" />
+            <output name="log" file="xset.log.txt" />
+        </test>
+    </tests>
+    
+    <help>
+
+.. class:: infomark
+
+**Authors**  Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu 
+
+.. class:: infomark
+
+**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@univ-nantes.fr - part of Workflow4Metabolomics.org [W4M]
+
+ | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool.
+
+---------------------------------------------------
+
+============
+Xcms.xcmsSet
+============
+
+-----------
+Description
+-----------
+
+This tool is used for preprocessing analyte data from multiple LC/MS files (formats NetCDF, mzXML and mzData). It extracts ion from each sample independently and using a statistic model, peaks are filtered and integrated.
+You can read a tutorial on how to perform xcms preprocessing which is available here_.
+
+.. _here: http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToPerformXcmsPreprocessing_v02.pdf
+
+
+-----------------
+Workflow position
+-----------------
+
+**Upstream tools**
+
+========================= ================= ======= =========
+Name                      output file       format  parameter
+========================= ================= ======= =========
+NA                        NA                zip     NA       
+========================= ================= ======= =========
+
+
+**Downstream tools**
+
++---------------------------+--------------------+-----------------+
+| Name                      | Output file        | Format          |
++===========================+====================+=================+
+|xcms.group                 | xset.RData         | rdata.xcms.raw  |
++---------------------------+--------------------+-----------------+
+|PCA ellipsoid by factors   | sampleMetadata.tsv | Tabular         |
++---------------------------+--------------------+-----------------+
+|Anova                      | sampleMetadata.tsv | Tabular         |
++---------------------------+--------------------+-----------------+
+
+
+**Example of a metabolomic workflow**
+
+.. image:: XCMS_Galaxy_workflow.png
+
+
+------
+
+.. class:: infomark 
+
+The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool.
+
+---------------------------------------------------
+
+
+
+-----------
+Input files
+-----------
+
++---------------------------+------------+
+| Parameter : num + label   |   Format   |
++===========================+============+
+| 1 : Choose your inputs    |   zip      |
++---------------------------+------------+
+
+**Choose your inputs**
+
+You have two methods for your inputs:
+
+    | Zip file (recommended): You can put a zip file containing your inputs: myinputs.zip (containing all your conditions as sub-directories).
+    | library folder: You must specify the name of your "library" (folder) created within your space project (for example: /projet/externe/institut/login/galaxylibrary/yourlibrary). Your library must contain all your conditions as sub-directories.
+
+----------
+Parameters
+----------
+
+Extraction method for peaks detection
+-------------------------------------
+
+**Matched Filter**
+
+    | One parameter to consider is the Gaussian model peak width used for matched filtration,an integral part of the peak detection algorithm. 
+    | For a discussion of how model peak width affects the signal to noise ratio, see Danielsson et al. (2002).
+
+
+**cent Wave**
+
+    | This algorithm is most suitable for high resolution LC/{TOF,OrbiTrap,FTICR}-MS data in centroid mode.
+    | Due to the fact that peak centroids are used, a binning step is not necessary.
+    | The method is capable of detecting close-by-peaks and also overlapping peaks. Some efforts are made to detect the exact peak boundaries to get precise peak integrals.
+
+**MSW**
+
+    | Wavelet based, used for direct infusion data. Continuous wavelet transform (CWT) can be used to locate chromatographic peaks on different scales.
+    | If you wish to have more details about the other parameters, you can read the following documents:
+    | -Example of preprocessing data with XCMS : http://www.bioconductor.org/packages/2.12/bioc/vignettes/xcms/inst/doc/xcmsPreprocess.pdf
+    | -Details and explanations for all the parameters of XCMS package: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf
+
+
+------------
+Output files
+------------
+
+xset.TICs_raw.pdf
+
+    | "Total Ion Chromatograms" graph in pdf format.
+
+xset.BPCs_raw.pdf
+
+    | "Base Peak Chromatograms" graph in pdf format with each class samples opposed.
+
+sampleMetadata.tsv
+
+    | Tabular file that contains for each sample, it's associated class and polarity (positive,negative and mixed).
+    | This file is necessary in the Anova and PCA step of the workflow.
+
+xset.RData: rdata.xcms.raw format
+
+    | Rdata file that is necessary in the second step of the workflow "xcms.group".
+    
+------
+
+.. class:: infomark 
+
+The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool.
+
+---------------------------------------------------
+
+---------------
+Working example
+---------------
+
+Input files
+-----------
+
+    | zip_file -> **sacuri.zip**
+
+Parameters
+----------
+
+    | Method -> **matchedFilter**
+    | step   -> **0.01**
+    | fwhm   -> **4** 
+    | Advanced option -> **show**
+    | max: -> **50**
+    | snthresh -> **1**
+    | steps -> **2**
+
+
+Output files
+------------
+
+    | **1) xset.RData: RData file**
+
+    | **2) Example of a sampleMetadata.tsv  :**
+
+
++---------------------------+------------+---------+
+| sampleMetadata            |   class    | polarity|
++===========================+============+=========+
+|HU_neg_017                 |   bio      |negative |
++---------------------------+------------+---------+
+|HU_neg_028                 |   bio      |negative |
++---------------------------+------------+---------+
+|HU_neg_034                 |   bio      |negative |
++---------------------------+------------+---------+
+|Blanc04                    |   blank    |negative |
++---------------------------+------------+---------+
+|Blanc06                    |   blank    |negative |
++---------------------------+------------+---------+
+|Blanc09                    |   blank    |negative |
++---------------------------+------------+---------+
+
+
+
+    | **3) Example of xset.TICs_raw.pdf (Total Ion Chromatograms) :**
+
+.. image:: xcms_tics.png
+
+
+    </help>
+
+
+    <citations>
+        <citation type="doi">10.1021/ac051437y</citation>
+        <citation type="doi">10.1093/bioinformatics/btu813</citation>
+    </citations>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/planemo.sh	Fri Aug 07 10:49:35 2015 -0400
@@ -0,0 +1,1 @@
+planemo shed_init -f --name=xcms_xcms_set --owner=lecorguille --description="[W4M][GC-MS] XCMS R Package - Preprocessing - peaks calling in NetCDF/mzXML files" --homepage_url="http://workflow4metabolomics.org" --long_description="Part of the W4M project: http://workflow4metabolomics.org\n\nXCMS: http://www.bioconductor.org/packages/release/bioc/html/xcms.html\n\nFiltration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis\n\nBEWARE: this tool don't come with its script. You will need to install the dedicated package_xcms_w4m_script too" --category="Metabolomics"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Fri Aug 07 10:49:35 2015 -0400
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+<repositories>
+    <repository changeset_revision="12afb0ecb55f" name="no_unzip_datatype" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" />
+	<repository changeset_revision="d64562a4ebb3" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>
Binary file static/images/XCMS_Galaxy_workflow.png has changed
Binary file static/images/xcms_tics.png has changed
Binary file test-data/sacuri.zip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sampleMetadata.tsv	Fri Aug 07 10:49:35 2015 -0400
@@ -0,0 +1,10 @@
+sampleMetadata	class	polarity
+HU_neg_028	bio	negative
+HU_neg_060	bio	negative
+HU_neg_051	bio	negative
+HU_neg_017	bio	negative
+HU_neg_034	bio	negative
+Blanc09	blank	negative
+Blanc06	blank	negative
+Blanc12	blank	negative
+Blanc04	blank	negative
Binary file test-data/xset.BPCs_raw.pdf has changed
Binary file test-data/xset.RData has changed
Binary file test-data/xset.TICs_raw.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/xset.log.txt	Fri Aug 07 10:49:35 2015 -0400
@@ -0,0 +1,56 @@
+	PACKAGE INFO
+parallel	3.1.1
+BiocGenerics	0.12.1
+Biobase	2.26.0
+Rcpp	0.11.5
+mzR	2.0.0
+igraph	0.7.1
+xcms	1.42.0
+snow	0.3.13
+batch	1.1.4
+
+
+	ARGUMENTS INFO
+zipfile	/w/galaxy/galaxy4metabolomics/galaxy-dist/database/files/041/dataset_41794.dat
+xfunction	xcmsSet
+nSlaves	8
+method	matchedFilter
+step	0.01
+fwhm	4
+max	50
+snthresh	1
+steps	2
+
+
+	INFILE PROCESSING INFO
+
+
+	MAIN PROCESSING INFO
+Starting snow cluster with 8 local sockets.
+Detecting features in file # 1 : HU_neg_028.mzXML 
+Detecting features in file # 2 : HU_neg_060.mzXML 
+Detecting features in file # 3 : HU_neg_051.mzXML 
+Detecting features in file # 4 : HU_neg_017.mzXML 
+Detecting features in file # 5 : HU_neg_034.mzXML 
+Detecting features in file # 6 : Blanc09.mzXML 
+Detecting features in file # 7 : Blanc06.mzXML 
+Detecting features in file # 8 : Blanc12.mzXML 
+Detecting features in file # 9 : Blanc04.mzXML 
+
+
+	XSET OBJECT INFO
+An "xcmsSet" object with 9 samples
+
+Time range: 0.7-1140 seconds (0-19 minutes)
+Mass range: 50.0019-999.9863 m/z
+Peaks: 135846 (about 15094 per sample)
+Peak Groups: 0 
+Sample classes: bio, blank 
+
+Profile settings: method = bin
+                  step = 0.01
+
+Memory usage: 13.8 MB
+
+
+	DONE
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri Aug 07 10:49:35 2015 -0400
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="R" version="3.1.2">
+        <repository changeset_revision="9f2fddb9d6e2" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="xcms" version="1.44.0">
+        <repository changeset_revision="a1c3fc774157" name="package_r_xcms_1_44_0" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="xcms_w4m_script" version="2.1">
+        <repository changeset_revision="c94c8ef520c8" name="package_xcms_w4m_script_2_1" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
Binary file xcms_set.tgz has changed