Mercurial > repos > lecorguille > xcms_xcmsset
changeset 0:054e4681667c draft
planemo upload
author | lecorguille |
---|---|
date | Fri, 07 Aug 2015 10:49:35 -0400 |
parents | |
children | 1592925c2e82 |
files | Makefile abims_xcms_xcmsSet.xml planemo.sh repository_dependencies.xml static/images/XCMS_Galaxy_workflow.png static/images/xcms_tics.png test-data/sacuri.zip test-data/sampleMetadata.tsv test-data/xset.BPCs_raw.pdf test-data/xset.RData test-data/xset.TICs_raw.pdf test-data/xset.log.txt tool_dependencies.xml xcms_set.tgz |
diffstat | 14 files changed, 510 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Makefile Fri Aug 07 10:49:35 2015 -0400 @@ -0,0 +1,23 @@ +# USAGE: make [install|clean] + +# -------- VARIABLE -------- + +OBJ=xcms_set.tgz +DEP=abims_xcms_xcmsSet.xml tool_dependencies.xml repository_dependencies.xml static test-data + + +# ------------------------ + +all: $(OBJ) + +$(OBJ): $(DEP) + tar --exclude=".svn" -zchf $@ $^ + +# ------------------------ + +install: $(OBJ) + mv *.tgz ~ + +clean: + rm *.tgz +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/abims_xcms_xcmsSet.xml Fri Aug 07 10:49:35 2015 -0400 @@ -0,0 +1,403 @@ +<tool id="abims_xcms_xcmsSet" name="xcms.xcmsSet" version="2.0.2"> + + <description>Filtration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis </description> + + <requirements> + <requirement type="package" version="3.1.2">R</requirement> + <requirement type="binary">Rscript</requirement> + <requirement type="package" version="1.44.0">xcms</requirement> + <requirement type="package" version="2.1">xcms_w4m_script</requirement> + </requirements> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <command> + xcms.r + #if $inputs.input == "lib": + library $__app__.config.user_library_import_dir/$__user_email__/$inputs.library + #elif $inputs.input == "zip_file": + zipfile $inputs.zip_file + #end if + + xfunction xcmsSet + ## profmethod $profmethod + nSlaves \${GALAXY_SLOTS:-1} method $methods.method + #if $methods.method == "centWave": + ppm $methods.ppm + peakwidth "c($methods.peakwidth)" + #if $methods.options_scanrange.option == "show": + scanrange "c($methods.options_scanrange.scanrange)" + #end if + #if $methods.options_c.option == "show": + mzdiff $methods.options_c.mzdiff + snthresh $methods.options_c.snthresh + integrate $methods.options_c.integrate + noise $methods.options_c.noise + prefilter "c($methods.options_c.prefilter)" + #end if + #elif $methods.method == "matchedFilter": + step $methods.step + fwhm $methods.fwhm + #if $methods.options_m.option == "show": + ## sigma "$methods.options_m.sigma" + max $methods.options_m.max + snthresh $methods.options_m.snthresh + ## mzdiff $methods.options_m.mzdiff + steps $methods.options_m.steps + ## sleep $methods.options_m.sleep + #end if + #elif $methods.method == "MSW": + snthr $methods.snthr + nearbyPeak $methods.nearbyPeak + winSize.noise $methods.winSize_noise + amp.Th $methods.amp_Th + scales "c($methods.scales)" + SNR.method "$methods.SNR_method" + #end if + && (mv xcmsSet.RData $xsetRData; + mv sampleMetadata.tsv $sampleMetadata; + mv TICs_raw.pdf $ticsRawPdf; + mv BPCs_raw.pdf $bpcsRawPdf; + mv xset.log $log); + cat $log + </command> + + <inputs> + + <conditional name="inputs"> + <param name="input" type="select" label="Choose your inputs method" > + <option value="zip_file" selected="true">Zip file from your history containing your chromatograms</option> + <option value="lib" >Library directory name</option> + </param> + <when value="zip_file"> + <param name="zip_file" type="data" format="no_unzip.zip" label="Zip file" /> + </when> + <when value="lib"> + <param name="library" type="text" size="40" label="Library directory name" help="The name of your directory containing all your data" > + <validator type="empty_field"/> + </param> + </when> + + </conditional> + + +<!-- + <param name="profmethod" type="select" label="Method to use for profile generation (profmethod)" > + <option value="bin" selected="true">bin</option> + <option value="binlin">binlin</option> + <option value="binlinbase">binlinbase</option> + <option value="intlin">intlin</option> + </param> + <param name="nSlaves" type="integer" value="9" label="MPI-slaves CPU" help="number of MPI-slaves to use for parallel peak detection" /> +--> + <conditional name="methods"> + <param name="method" type="select" label="Extraction method for peaks detection" help="[method] See the help section below"> + <option value="centWave" >centWave</option> + <option value="matchedFilter" selected="true">matchedFilter</option> + <option value="MSW">MSW</option> + </param> + + <!-- centWave Filter options --> + <when value="centWave"> + <param name="ppm" type="integer" value="25" label="Max tolerated ppm m/z deviation in consecutive scans in ppm" help="[ppm]" /> + <param name="peakwidth" type="text" value="20,50" label="Min,Max peak width in seconds" help="[peakwidth]" /> + <conditional name="options_scanrange"> + <param name="option" type="select" label="Scan range option " > + <option value="show">show</option> + <option value="hide" selected="true">hide</option> + </param> + <when value="show"> + <param name="scanrange" type="text" value="" label="scanrange" help="scan range to process, for example (16,365)" > + <validator type="empty_field"/> + </param> + </when> + </conditional> + + <conditional name="options_c"> + <param name="option" type="select" label="Advanced options" > + <option value="show">show</option> + <option value="hide" selected="true">hide</option> + </param> + <when value="show"> + <param name="snthresh" type="integer" value="10" label="Signal/Noise threshold" help="[snthresh] Signal to noise ratio cutoff" /> + <param name="mzdiff" type="float" value="-0.001" label="Min m/z difference" help="[mzdiff] Min m/z difference for peaks with overlapping RT " /> + <param name="integrate" type="select" label="peak limits method" help="[integrate]" > + <option value="1">peak limits based on smoothed 2nd derivative (less precise)</option> + <option value="2">peak limits based on real data (more sensitive to noise)</option> + </param> + <param name="prefilter" type="text" value="3,100" label="Prefilter step for the first phase" help="[prefilter] Separate by coma k,I. Mass traces are only retained if they contain at least ‘k’ peaks with intensity >= ‘I’"/> + <param name="noise" type="integer" value="0" label="Noise filter" help="[noise] optional argument which is useful for data that was centroided without any intensity threshold, centroids with intensity smaller than ‘noise’ are omitted from ROI detection"/> + </when> + </conditional> + </when> + + <!-- matched Filter options --> + <when value="matchedFilter"> + <param name="step" type="float" value="0.01" label="Step size to use for profile generation" help="[step] The peak detection algorithm creates extracted ion base peak chromatograms (EIBPC) on a fixed step size" /> + <param name="fwhm" type="integer" value="30" label="Full width at half maximum of matched filtration gaussian model peak" help="[fwhm] Only used to calculate the actual sigma" /> + <conditional name="options_m"> + <param name="option" type="select" label="Advanced options" > + <option value="show">show</option> + <option value="hide" selected="true">hide</option> + </param> + <when value="show"> +<!-- + <param name="sigma" type="hidden" value="fwhm/2.3548" label="sigma" help="standard deviation (fwhm/2.3548)" /> +--> + <param name="max" type="integer" value="5" label="Maximum number of peaks per extracted ion chromatogram" help="[max]" /> + <param name="snthresh" type="integer" value="10" label="Signal to noise ratio cutoff" help="[snthresh]" /> + <param name="steps" type="integer" value="2" label="Number of steps to merge prior to filtration" help="[steps] The peak identification algorithm combines a given number of EIBPCs prior to filtration and peak detection, as defined by the steps argument" /> +<!-- + <param name="mzdiff" type="text" size="20" value="0.8-step*steps" label="m/z difference" help="min m/z difference for peaks with overlapping RT " /> +--> + </when> + </conditional> + </when> + + <!-- MSW Filter options --> + <when value="MSW"> + <param name="nearbyPeak" type="select" label="Determine whether to include the nearby small peaks of major peaks" help="[nearbyPeak]" > + <option value="TRUE">TRUE</option> + <option value="FALSE">FALSE</option> + </param> + <param name="winSize_noise" type="integer" value="500" label="The local window size to estimate the noise level" help="[winSize.noise]" /> + <param name="snthr" type="integer" value="3" label="SNR (Signal to Noise Ratio) threshold" help="[snthr]" /> + <param name="amp_Th" type="float" value="0.002" label="Minimum required relative amplitude of the peak" help="[amp.Th] Ratio to the maximum of CWT coefficients" /> + <param name="scales" type="text" value="seq(1,22,3)" label="Scales for the Continuous Wavelet Transform (CWT)" help="[scales] Scales are linked to the width of the peaks that are to be detected. Tape as indicaded seq('n,n,n') or c(n,n) : seq(from, to, by steps), c - linear vector " /> + <param name="SNR_method" type="text" value="data.mean" label="SNR (Signal to Noise Ratio) method" help="[SNR.method] Method to estimate noise level. Currently, only 95 percentage quantile is supported." /> + </when> + </conditional> + </inputs> + + <outputs> + <data name="xsetRData" format="rdata.xcms.raw" label="xset.RData" /> + <data name="sampleMetadata" format="tabular" label="sampleMetadata.tsv" /> + <data name="ticsRawPdf" format="pdf" label="xset.TICs_raw.pdf" /> + <data name="bpcsRawPdf" format="pdf" label="xset.BPCs_raw.pdf" /> + <data name="log" format="txt" label="xset.log.txt" /> + </outputs> + + <tests> + <test> + <param name="inputs.input" value="zip_file" /> + <param name="inputs.zip_file" value="sacuri.zip" /> + <param name="methods.method" value="matchedFilter" /> + <param name="methods.step" value="0.01" /> + <param name="methods.fwhm" value="4" /> + <param name="methods.options_m.option" value="show" /> + <param name="methods.options_m.max" value="50" /> + <param name="methods.options_m.snthresh" value="1" /> + <param name="methods.options_m.steps" value="2" /> + <output name="xsetRData" file="xset.RData" /> + <output name="sampleMetadata" file="sampleMetadata.tsv" /> + <output name="ticsRawPdf" file="xset.TICs_raw.pdf" /> + <output name="bpcsRawPdf" file="xset.BPCs_raw.pdf" /> + <output name="log" file="xset.log.txt" /> + </test> + </tests> + + <help> + +.. class:: infomark + +**Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu + +.. class:: infomark + +**Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@univ-nantes.fr - part of Workflow4Metabolomics.org [W4M] + + | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool. + +--------------------------------------------------- + +============ +Xcms.xcmsSet +============ + +----------- +Description +----------- + +This tool is used for preprocessing analyte data from multiple LC/MS files (formats NetCDF, mzXML and mzData). It extracts ion from each sample independently and using a statistic model, peaks are filtered and integrated. +You can read a tutorial on how to perform xcms preprocessing which is available here_. + +.. _here: http://web11.sb-roscoff.fr/download/w4m/howto/w4m_HowToPerformXcmsPreprocessing_v02.pdf + + +----------------- +Workflow position +----------------- + +**Upstream tools** + +========================= ================= ======= ========= +Name output file format parameter +========================= ================= ======= ========= +NA NA zip NA +========================= ================= ======= ========= + + +**Downstream tools** + ++---------------------------+--------------------+-----------------+ +| Name | Output file | Format | ++===========================+====================+=================+ +|xcms.group | xset.RData | rdata.xcms.raw | ++---------------------------+--------------------+-----------------+ +|PCA ellipsoid by factors | sampleMetadata.tsv | Tabular | ++---------------------------+--------------------+-----------------+ +|Anova | sampleMetadata.tsv | Tabular | ++---------------------------+--------------------+-----------------+ + + +**Example of a metabolomic workflow** + +.. image:: XCMS_Galaxy_workflow.png + + +------ + +.. class:: infomark + +The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool. + +--------------------------------------------------- + + + +----------- +Input files +----------- + ++---------------------------+------------+ +| Parameter : num + label | Format | ++===========================+============+ +| 1 : Choose your inputs | zip | ++---------------------------+------------+ + +**Choose your inputs** + +You have two methods for your inputs: + + | Zip file (recommended): You can put a zip file containing your inputs: myinputs.zip (containing all your conditions as sub-directories). + | library folder: You must specify the name of your "library" (folder) created within your space project (for example: /projet/externe/institut/login/galaxylibrary/yourlibrary). Your library must contain all your conditions as sub-directories. + +---------- +Parameters +---------- + +Extraction method for peaks detection +------------------------------------- + +**Matched Filter** + + | One parameter to consider is the Gaussian model peak width used for matched filtration,an integral part of the peak detection algorithm. + | For a discussion of how model peak width affects the signal to noise ratio, see Danielsson et al. (2002). + + +**cent Wave** + + | This algorithm is most suitable for high resolution LC/{TOF,OrbiTrap,FTICR}-MS data in centroid mode. + | Due to the fact that peak centroids are used, a binning step is not necessary. + | The method is capable of detecting close-by-peaks and also overlapping peaks. Some efforts are made to detect the exact peak boundaries to get precise peak integrals. + +**MSW** + + | Wavelet based, used for direct infusion data. Continuous wavelet transform (CWT) can be used to locate chromatographic peaks on different scales. + | If you wish to have more details about the other parameters, you can read the following documents: + | -Example of preprocessing data with XCMS : http://www.bioconductor.org/packages/2.12/bioc/vignettes/xcms/inst/doc/xcmsPreprocess.pdf + | -Details and explanations for all the parameters of XCMS package: http://www.bioconductor.org/packages/release/bioc/manuals/xcms/man/xcms.pdf + + +------------ +Output files +------------ + +xset.TICs_raw.pdf + + | "Total Ion Chromatograms" graph in pdf format. + +xset.BPCs_raw.pdf + + | "Base Peak Chromatograms" graph in pdf format with each class samples opposed. + +sampleMetadata.tsv + + | Tabular file that contains for each sample, it's associated class and polarity (positive,negative and mixed). + | This file is necessary in the Anova and PCA step of the workflow. + +xset.RData: rdata.xcms.raw format + + | Rdata file that is necessary in the second step of the workflow "xcms.group". + +------ + +.. class:: infomark + +The output file is an xset.RData file. You can continue your analysis using it in **xcms.group** tool. + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +----------- + + | zip_file -> **sacuri.zip** + +Parameters +---------- + + | Method -> **matchedFilter** + | step -> **0.01** + | fwhm -> **4** + | Advanced option -> **show** + | max: -> **50** + | snthresh -> **1** + | steps -> **2** + + +Output files +------------ + + | **1) xset.RData: RData file** + + | **2) Example of a sampleMetadata.tsv :** + + ++---------------------------+------------+---------+ +| sampleMetadata | class | polarity| ++===========================+============+=========+ +|HU_neg_017 | bio |negative | ++---------------------------+------------+---------+ +|HU_neg_028 | bio |negative | ++---------------------------+------------+---------+ +|HU_neg_034 | bio |negative | ++---------------------------+------------+---------+ +|Blanc04 | blank |negative | ++---------------------------+------------+---------+ +|Blanc06 | blank |negative | ++---------------------------+------------+---------+ +|Blanc09 | blank |negative | ++---------------------------+------------+---------+ + + + + | **3) Example of xset.TICs_raw.pdf (Total Ion Chromatograms) :** + +.. image:: xcms_tics.png + + + </help> + + + <citations> + <citation type="doi">10.1021/ac051437y</citation> + <citation type="doi">10.1093/bioinformatics/btu813</citation> + </citations> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo.sh Fri Aug 07 10:49:35 2015 -0400 @@ -0,0 +1,1 @@ +planemo shed_init -f --name=xcms_xcms_set --owner=lecorguille --description="[W4M][GC-MS] XCMS R Package - Preprocessing - peaks calling in NetCDF/mzXML files" --homepage_url="http://workflow4metabolomics.org" --long_description="Part of the W4M project: http://workflow4metabolomics.org\n\nXCMS: http://www.bioconductor.org/packages/release/bioc/html/xcms.html\n\nFiltration and Peak Identification using xcmsSet function from xcms R package to preprocess LC/MS data for relative quantification and statistical analysis\n\nBEWARE: this tool don't come with its script. You will need to install the dedicated package_xcms_w4m_script too" --category="Metabolomics"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Fri Aug 07 10:49:35 2015 -0400 @@ -0,0 +1,5 @@ +<?xml version="1.0"?> +<repositories> + <repository changeset_revision="12afb0ecb55f" name="no_unzip_datatype" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="d64562a4ebb3" name="rdata_xcms_datatypes" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sampleMetadata.tsv Fri Aug 07 10:49:35 2015 -0400 @@ -0,0 +1,10 @@ +sampleMetadata class polarity +HU_neg_028 bio negative +HU_neg_060 bio negative +HU_neg_051 bio negative +HU_neg_017 bio negative +HU_neg_034 bio negative +Blanc09 blank negative +Blanc06 blank negative +Blanc12 blank negative +Blanc04 blank negative
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/xset.log.txt Fri Aug 07 10:49:35 2015 -0400 @@ -0,0 +1,56 @@ + PACKAGE INFO +parallel 3.1.1 +BiocGenerics 0.12.1 +Biobase 2.26.0 +Rcpp 0.11.5 +mzR 2.0.0 +igraph 0.7.1 +xcms 1.42.0 +snow 0.3.13 +batch 1.1.4 + + + ARGUMENTS INFO +zipfile /w/galaxy/galaxy4metabolomics/galaxy-dist/database/files/041/dataset_41794.dat +xfunction xcmsSet +nSlaves 8 +method matchedFilter +step 0.01 +fwhm 4 +max 50 +snthresh 1 +steps 2 + + + INFILE PROCESSING INFO + + + MAIN PROCESSING INFO +Starting snow cluster with 8 local sockets. +Detecting features in file # 1 : HU_neg_028.mzXML +Detecting features in file # 2 : HU_neg_060.mzXML +Detecting features in file # 3 : HU_neg_051.mzXML +Detecting features in file # 4 : HU_neg_017.mzXML +Detecting features in file # 5 : HU_neg_034.mzXML +Detecting features in file # 6 : Blanc09.mzXML +Detecting features in file # 7 : Blanc06.mzXML +Detecting features in file # 8 : Blanc12.mzXML +Detecting features in file # 9 : Blanc04.mzXML + + + XSET OBJECT INFO +An "xcmsSet" object with 9 samples + +Time range: 0.7-1140 seconds (0-19 minutes) +Mass range: 50.0019-999.9863 m/z +Peaks: 135846 (about 15094 per sample) +Peak Groups: 0 +Sample classes: bio, blank + +Profile settings: method = bin + step = 0.01 + +Memory usage: 13.8 MB + + + DONE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Aug 07 10:49:35 2015 -0400 @@ -0,0 +1,12 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="R" version="3.1.2"> + <repository changeset_revision="9f2fddb9d6e2" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="xcms" version="1.44.0"> + <repository changeset_revision="a1c3fc774157" name="package_r_xcms_1_44_0" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="xcms_w4m_script" version="2.1"> + <repository changeset_revision="c94c8ef520c8" name="package_xcms_w4m_script_2_1" owner="lecorguille" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>