view metams_runGC.xml @ 0:2066efbafd7c draft

planemo upload for repository https://github.com/workflow4metabolomics/metaMS commit 6384fcf4496a64affe0b8a173c3f7ea09a275ffb
author yguitton
date Wed, 13 Jul 2016 06:46:45 -0400
parents
children 142fbe102a9d
line wrap: on
line source

 <tool id="metams_runGC" name="metaMS.runGC" version="1.1">
       
    <description>GC-MS data preprocessing using metaMS package</description>
    
    <requirements>
        <requirement type="package" version="1.8.0">bioconductor-metams</requirement>
        <requirement type="package" version="1.46.0">bioconductor-xcms</requirement>
        <requirement type="package" version="1.1_4">r-batch</requirement>
    </requirements>
    
    <stdio>
        <exit_code range="1:" level="fatal" />
    </stdio>
    
    <command>
        Rscript $__tool_directory__/metams.r
        
        #if $inputs.input == "zip_file":
                zipfile $zip_file
        #elif $inputs.input == "xset":
                xset  $inputs.xset
        #end if

   		#if $settings.setting == "gcdefault":
		    settings "default"
	    #elif $settings.setting == "usersettings":
		    settings "User_defined"
		    fwhm $settings.fwhm
		    rtdiff $settings.rtdiff
		    minfeat $settings.minfeat
		    simthreshold $settings.simthreshold
            minclassfraction $settings.minclassfraction
            minclasssize $settings.minclasssize
        #end if
        #if $settings.options_rtrange.option == "show":
        	rtrange "c($settings.options_rtrange.rtrange)"
        #end if 
        #if $settings.options_rtrange.option == "hide":
        	rtrange "NULL"
        #end if
        #if $settings.options_ri.option == "show":
        	ri $settings.options_ri.ri_input
        #end if 
        #if $settings.options_ri.option == "hide":
        	ri "NULL"
        #end if
        #if $settings.options_db.option == "show":
        	db $settings.options_db.db_input
        #end if 
        #if $settings.options_db.option == "hide":
            db "NULL"
        #end if
        nSlaves \${GALAXY_SLOTS:-1}
        unkn "c($unkn)"
            
    </command>
    
    <inputs>
        <conditional name="inputs">
            <param name="input" type="select" label="Choose your inputs" help="Choose your input method" >
                <option value="zip_file" selected="true">Zip file containing your chromatograms</option>
                <option value="xset" >Rdata file from xcms.xcmsSet </option>
            </param>
            <when value="zip_file">
                <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" help="Zip file containing your chromatograms for analysis" />
            </when>
            <when value="xset">
                <param name="xset" type="data" format="rdata.xcms.raw,rdata.xcms.group,rdata.xcms.retcor,rdata" label="Rdata from xcms.xcmsSet" help="Rdata file containing a xset object" />
		    </when>
	    </conditional>
	    <conditional name="settings">
            <param name="setting" type="select" label="Settings" help="Choose the settings used for finding peaks" >
			    <option value="gcdefault" selected="true">GC_Default</option>
			    <option value="usersettings" >User_Defined</option>
		    </param>
            <when value="gcdefault">
                <conditional name="options_rtrange">
                    <param name="option" type="select" label="RT range option " >
                        <option value="show">show</option>
                        <option value="hide" selected="true">hide</option>
                    </param>
                    <when value="show">
                         <param name="rtrange" type="text" value="" label="RTrange" help="RT range to process in minutes, for example 5,25" >
                            <validator type="empty_field"/> 
                        </param>
                    </when>
                    <when value="hide" />
                </conditional>
                <conditional name="options_db">
                    <param name="option" type="select" label="Use Personnal DataBase option " >
                        <option value="show">show</option>
                        <option value="hide" selected="true">hide</option>
                    </param>
                    <when value="show">
                        <param name="db_input" type="data" format="msp,txt" label=" DB file" help="A database file as needed for DB option in runGC" />
                    </when>
                    <when value="hide" />
                </conditional>
                <conditional name="options_ri">
                    <param name="option" type="select" label="Use RI option " >
                        <option value="show">show</option>
                        <option value="hide" selected="true">hide</option>
                    </param>
                    <when value="show">
                        <param name="ri_input" type="data" format="csv,tsv,gg" label=" RI file" help="A file with two column (rt and RI) with rt in minutes as needed for RI option in runGC" />
                    </when>
                    <when value="hide" />
                </conditional>
            </when>
		    <when value="usersettings">
			    <param name="fwhm" type="integer" value="5" label="FWHM" help="The FWHM of your peaks (matchedFilter method is used)" />
                <conditional name="options_rtrange">
                    <param name="option" type="select" label="RT range option " >
                        <option value="show">show</option>
		                <option value="hide" selected="true">hide</option>
                    </param>
                    <when value="show">
                         <param name="rtrange" type="text" value="" label="RTrange" help="RT range to process in minutes, for example 5,25" >
                            <validator type="empty_field"/> 
                        </param>
                    </when>
                    <when value="hide" />
                </conditional>
               
                <param name="rtdiff" type="float" value="0.05" label="RT_Diff" help="The allowed RT shift between same molecule in different sample" />
                <param name="minfeat" type="integer" value="5" label="Min_Features" help="The minimum number of ion in a mass spectra to consider it a molecule" />
                <param name="simthreshold" type="float" value="0.70" label="similarity_threshold" help="The minimum similarity allowed between peaks mass spectra to be considered as equal" />
                <param name="minclassfraction" type="float" value="0.5" label="min.class.fract" help="The fraction of samples in which a pseudospectrum is present before it is regarded as an unknown" />
                <param name="minclasssize" type="integer" value="3" label="min.class.size" help="The absolute number of samples in which a pseudospectrum is present before it is regarded as an unknown" />
                <conditional name="options_db">
                    <param name="option" type="select" label="Use Personnal DataBase option " >
                        <option value="show">show</option>
                        <option value="hide" selected="true">hide</option>
                    </param>
                    <when value="show">
                        <param name="db_input" type="data" format="msp,txt" label=" DB file" help="A database file as needed for DB option in runGC" />
                    </when>
                    <when value="hide" />
                </conditional>
                <conditional name="options_ri">
                    <param name="option" type="select" label="Use RI option " >
                        <option value="show">show</option>
                        <option value="hide" selected="true">hide</option>
                    </param>
                    <when value="show">
                        <param name="ri_input" type="data" format="csv,tsv,gg" label=" RI file" help="A file with two column (rt and RI) with rt in minutes as needed for RI option in runGC" />
                    </when>
                    <when value="hide" />
                </conditional>
            
            </when>
	     </conditional>
         <param name="unkn" type="text" value="1:5" label="EIC_Unknown" help="vector of peaks number to be plotted, for example 1:5 (mean 1 to 5) or 1,4,12  means 1 4 and 12). For all EIC use 0" />
                 
	     <!-- pour !is.null(DB) il faut un conditionnal avec un input type="text" pour charger la db au format msp et donc il faut définir un datatype (cf Misharl) -->
    </inputs>
    
    <outputs>
    
        <data name="peaktable" format="tabular" from_work_dir="peaktable.tsv" label="peaktable.tsv" />
        <!--  <data name="peaktable_PCA" format="tabular" from_work_dir="peaktable_PCA.tsv" label="peaktable_PCA.tsv" />
        -->
        <data name="sampleMetadata" format="tabular" from_work_dir="sampleMetadata.tsv" label="sampleMetadata.tsv" />
        <data name="variableMetadata" format="tabular" from_work_dir="variableMetadata.tsv" label="variableMetadata.tsv" />
        <data name="dataMatrix" format="tabular" from_work_dir="dataMatrix.tsv" label="dataMatrix.tsv" />
        <data name="peakspectra" format="txt" from_work_dir="peakspectra.msp" label="peakspectra.msp" />
        <data name="ticsRawPdf"   format="pdf" from_work_dir="TICs_raw.pdf" label="TICs_raw.pdf" />
        <data name="bpcsRawPdf"   format="pdf" from_work_dir="BPCs_raw.pdf" label="BPCs_raw.pdf" />
        <data name="unknownPdf"   format="pdf" from_work_dir="GCMS_EIC.pdf" label="GCMS_EIC.pdf" />
        <data name="rungcRData" format="rdata" from_work_dir="runGC.RData" label="rungc.RData" />
        <data name="log" format="txt" from_work_dir="metams.log" label="rungc.log.txt" />
        <data name="zip" format="zip"   from_work_dir="rungc.zip"   label="rungc.zip" />
    </outputs>
    
    <tests>
        <test>
            <param name="inputs|input" value="zip_file" />
            <param name="inputs|zip_file" value="W4M_GCMS_Dataset.zip"  ftype="zip" />
            <param name="settings|setting" value="gcdefault" />
            <param name="settings|options_rtrange|option" value="hide" />
            <param name="settings|options_db|option" value="hide" />
            <param name="settings|options_ri|option" value="hide" />
            <param name="methods|peakwidth" value="20,50" />
            <output name="sampleMetadata" file="sampleMetadata.tsv" />
            <output name="variableMetadata" file="variableMetadata.tsv" />
            <output name="dataMatrix" file="dataMatrix.tsv" lines_diff="6" />
        </test>
    </tests>
    
    <help>
	
	
.. class:: infomark

**Author(s)**  Ron Wehrens (ron.wehrens@gmail.com), Georg Weingart, Fulvio Mattivi

.. class:: infomark

**Galaxy wrapper and scripts developpers** Guitton Yann LABERCA yann.guitton@oniris-nantes.fr

.. class:: infomark

**Please cites**  
 
 metaMS : Wehrens, R.; Weingart, G.; Mattivi, F. Journal of Chromatography B.

 xcms :   Smith, C. A.; Want, E. J.; O’Maille, G.; Abagyan, R.; Siuzdak, G. Anal. Chem. 2006, 78, 779–787.

 CAMERA :	Kuhl, C.; Tautenhahn, R.; Böttcher, C.; Larson, T. R.; Neumann, S. Analytical Chemistry 2012, 84, 283–289.


---------------------------------------------------

====================
metaMS.runGC
====================

-----------
Description
-----------
metaMS.runGC is a function dedicated to GCMS data processing from converted files to the generation of pseudospectra (compounds) table.

**Process:** 
Each of the converted data (cdf, mzML...)  is profiled by a combination of xcms and CAMERA functions. Then all the mass spectra of detected peaks are compared and clustered.
For more details see metaMS : Wehrens, R.; Weingart, G.; Mattivi, F. Journal of Chromatography B (10.1016/j.jchromb.2014.02.051) link_

.. _link: http://www.sciencedirect.com/science/article/pii/S1570023214001548

**Main outputs:**
A PeakTable is generated with one line per "compound" and one column per sample. 
A dataMatrix.csv file is generated and can be used for PCA or for further analysis.
A peakspectra.mps file is generated that contains all the spectra of the detected compounds in MSP format. That file can be used for database search online (Golm, MassBank) or locally (NIST MSSEARCH)
for NIST search a tutorial is available here_.

.. _here: http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToUseNIST_V01.pdf

3 PDF are generated for BPCs, TICs and EICs of detected compounds in all samples

  

-----------------
Workflow position
-----------------


**Upstream tools**

You can start from here or use result file from :

========================= ==================== ======= ==========
Name                      output file          format  parameter
========================= ==================== ======= ==========
xcms.xcmsSet              xset.RData RData     RData   file            			
========================= ==================== ======= ==========



**Downstream tools**

+---------------------------+---------------------------------------+--------+
| Name                      | Output file                           | Format |
+===========================+=======================================+========+
|Determine Vdk or Lowess    |                        dataMatrix.tsv | Tabular|
+---------------------------+---------------------------------------+--------+
|Normalization Vdk/Lowess   |                        dataMatrix.tsv | Tabular|
+---------------------------+---------------------------------------+--------+
|Anova                      |                        dataMatrix.tsv | Tabular|
+---------------------------+---------------------------------------+--------+
|PCA                        |                        dataMatrix.tsv | Tabular|
+---------------------------+---------------------------------------+--------+
|Hierarchical Clustering    |                        dataMatrix.tsv | Tabular|
+---------------------------+---------------------------------------+--------+
|Golm Metabolome Search     |                       peakspectra.msp | Text   |
+---------------------------+---------------------------------------+--------+

**General schema of the metabolomic workflow for GCMS**

.. image:: ./static/images/gcms_workflow.png

-----------
Input files
-----------

If you choose to use results from xcms.xcmsSet

+---------------------------+------------+
| Parameter : num + label   |   Format   |
+===========================+============+
| 1 : RData file            |   RData    |
+---------------------------+------------+

Or converted GCMS files (mzML, CDF...) in your local libray 


----------
Parameters
----------

Parameters are described in metaMS R package and mainly correspond to those of xcms.xcmsSet

----------
Outputs
----------

The output file **dataMatrix.tsv** is an tabular file. You can continue your analysis using it in the statistical tools.
The output file **peakspectra.msp** is a text file. You can continue your analysis using it in the golm search tool. Or you can load it in your 
personnal NIST MSsearch program (c:/NISTXX/mssearch/nistms.exe) that tool is in general installed by default on GCMS apparatus. Tutorial available here_.

.. _here:  http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToUseNIST_V01.pdf


----------------
Working Exemple
----------------
.. class:: warningmark  

**Reference Data for testing are taken from:**
Dittami,S.M. et al. (2012) Towards deciphering dynamic changes and evolutionary mechanisms involved in the adaptation to low salinities in Ectocarpus (brown algae): Adaptation to low salinities in Ectocarpus. The Plant Journal

Input files
-----------

    | **Zip file** -> GCMS_Idealg_FWS_SWS.zip

Parameters
----------

    | Settings -> **User_Defined**
    | RT range option -> **show**
        | RTrange: -> **4.5,45**
    | ...all default option values


Output files
------------

    | **1) rungc.RData: RData file**

    | **2) Example of BPCs_raw.pdf (Base Peak Chromatograms) :**

.. image:: ./static/images/metaMS_BPCs.png



---------------------------------------------------

Changelog/News
--------------

**Version 1.1 - 11/07/2016**

- TEST: refactoring to pass planemo test using conda dependencies


**Version 1.0 - 01/06/2015**

- NEW: new tool

    </help>

    <citations>
        <citation type="doi">10.1016/j.jchromb.2014.02.051</citation>
        <citation type="doi">10.1093/bioinformatics/btu813</citation>
    </citations>

</tool>