diff abims_xcms_fillPeaks.xml @ 13:91c71f3808f3 draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 9f72e947d9c241d11221cad561f3525d27231857
author lecorguille
date Tue, 18 Sep 2018 16:13:36 -0400
parents dcb9041cb9ea
children 34fe699200ce
line wrap: on
line diff
--- a/abims_xcms_fillPeaks.xml	Wed Nov 29 09:47:20 2017 -0500
+++ b/abims_xcms_fillPeaks.xml	Tue Sep 18 16:13:36 2018 -0400
@@ -1,22 +1,24 @@
-<tool id="abims_xcms_fillPeaks" name="xcms.fillPeaks" version="2.1.1">
+<tool id="abims_xcms_fillPeaks" name="xcms fillChromPeaks (fillPeaks)" version="@WRAPPER_VERSION@.0">
 
-    <description>Integrate a sample's signal in regions where peak groups are not represented to create new peaks in missing areas</description>
+    <description>Integrate areas of missing peaks</description>
 
     <macros>
         <import>macros.xml</import>
+        <import>macros_xcms.xml</import>
     </macros>
 
     <expand macro="requirements"/>
     <expand macro="stdio"/>
 
     <command><![CDATA[
-        @COMMAND_XCMS_SCRIPT@
-        xfunction fillPeaks
+        @COMMAND_RSCRIPT@/xcms_fillpeaks.r
+
         image '$image'
 
-        xsetRdataOutput '$xsetRData'
-
-        method $method
+        ## Advanced
+        expandMz $Adv.expandMz
+        expandRt $Adv.expandRt
+        ppm $Adv.ppm
 
         @COMMAND_PEAKLIST@
 
@@ -27,11 +29,13 @@
     ]]></command>
 
     <inputs>
-        <param name="image" type="data" format="rdata.xcms.group,rdata" label="xset RData file" help="output file from another xcms function (group)" />
-        <param name="method" type="select" label="Filling method" help="[method] See the help section below">
-            <option value="chrom" selected="true">chrom</option>
-            <option value="MSW" >MSW</option>
-        </param>
+        <param name="image" type="data" format="rdata.xcms.group,rdata" label="@INPUT_IMAGE_LABEL@" help="@INPUT_IMAGE_HELP@ from groupChromPeaks" />
+
+        <section name="Adv" title="Advanced Options" expanded="False">
+            <param argument="expandMz" type="integer" value="0" label="Value by which the mz width of peaks should be expanded" help="Each peak is expanded in mz direction by ‘expandMz *’ their original mz width. A value of ‘0’ means no expansion, a value of ‘1’ grows each peak by 1 * the mz width of the peak resulting in peakswith twice their original size in mz direction (expansion by half mz width to both sides)." />
+            <param argument="expandRt" type="integer" value="0" label="Value by which the RT width of peaks should be expanded" help="Each peak is expanded in RT direction by ‘expandRt *’ their original RT width. A value of ‘0’ means no expansion, a value of ‘1’ grows each peak by 1 * the RT width of the peak resulting in peakswith twice their original size in RT direction (expansion by half RT width to both sides)."/>
+            <param argument="ppm" type="integer" value="0" label="Specifying a ppm by which the mz width of the peak region should be expanded" help="For peaks with an mz width smaller than ‘mean(c(mzmin, mzmax)) * ppm / 1e6’, the ‘mzmin’ will be replaced by ‘mean(c(mzmin, mzmax)) - (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)’ and ‘mzmax’ by ‘mean(c(mzmin, mzmax)) + (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)’. This is applied before eventually expanding the mz width using the ‘expandMz’ parameter." />
+        </section>
 
         <expand macro="input_peaklist"/>
 
@@ -40,72 +44,111 @@
     </inputs>
 
     <outputs>
-        <data name="xsetRData" format="rdata.xcms.fillpeaks" label="${image.name[:-6]}.fillPeaks.RData" />
+        <data name="xsetRData" format="rdata.xcms.fillpeaks" label="${image.name[:-6]}.fillChromPeaks.RData" from_work_dir="fillpeaks.RData" />
         <expand macro="output_peaklist" function="fillpeaks" />
-        <data name="log" format="txt" label="xset.log.txt"  hidden="true" />
     </outputs>
 
     <tests>
         <!--<test>
-            <param name="image" value="xset.group.retcor.group.RData"/>
+            <param name="image" value="xset.group.retcor.group.RData" ftype="rdata"/>
             <param name="method" value="chrom"/>
             <param name="zip_file" value="sacuri_dir_root.zip"  ftype="zip" />
-            <output name="log">
-                <assert_contents>
-                    <has_text text="object with 4 samples" />
-                    <has_text text="Time range: 0.2-1140.1 seconds (0-19 minutes)" />
-                    <has_text text="Mass range: 50.0021-999.9863 m/z" />
-                    <has_text text="Peaks: 199718 (about 49930 per sample)" />
-                    <has_text text="Peak Groups: 48958" />
-                    <has_text text="Sample classes: bio, blank" />
-                </assert_contents>
-            </output>
+            <assert_stdout>
+                <has_text text="object with 4 samples" />
+                <has_text text="Time range: 0.2-1140.1 seconds (0-19 minutes)" />
+                <has_text text="Mass range: 50.0021-999.9863 m/z" />
+                <has_text text="Peaks: 199718 (about 49930 per sample)" />
+                <has_text text="Peak Groups: 48958" />
+                <has_text text="Sample classes: bio, blank" />
+            </assert_stdout>
         </test>-->
+        <!-- Issue with fillpeaks because it seems that there are too many NA
         <test>
-            <param name="image" value="faahKO.xset.group.retcor.group.RData"/>
+            <param name="image" value="faahKO.xset.group.retcor.group.RData" ftype="rdata"/>
             <param name="method" value="chrom"/>
             <conditional name="peaklist">
+                <param name="peaklistBool" value="true" />
                 <param name="convertRTMinute" value="false" />
-                <param name="peaklistBool" value="true" />
                 <param name="numDigitsMZ" value="4" />
                 <param name="numDigitsRT" value="1" />
             </conditional>
             <expand macro="test_file_load_zip"/>
-            <output name="log">
-                <assert_contents>
-                    <has_text text="object with 4 samples" />
-                    <has_text text="Time range: 2506-4484 seconds (41.8-74.7 minutes)" />
-                    <has_text text="Mass range: 200.1-600 m/z" />
-                    <has_text text="Peaks: 32720 (about 8180 per sample)" />
-                    <has_text text="Peak Groups: 8157" />
-                    <has_text text="Sample classes: KO, WT" />
-                </assert_contents>
-            </output>
+            <assert_stdout>
+                <has_text text="object with 4 samples" />
+                <has_text text="Time range: 2509.2-4480.3 seconds (41.8-74.7 minutes)" />
+                <has_text text="Mass range: 200.1-600 m/z" />
+                <has_text text="Peaks: 32720 (about 8180 per sample)" />
+                <has_text text="Peak Groups: 8209" />
+                <has_text text="Sample classes: KO, WT" />
+            </assert_stdout>
             <output name="variableMetadata" file="faahKO.xset.group.retcor.group.fillPeaks.variableMetadata.tsv" />
             <output name="dataMatrix" file="faahKO.xset.group.retcor.group.fillPeaks.dataMatrix.tsv" />
         </test>
         <test>
-            <param name="image" value="faahKO-single.xset.merged.group.retcor.group.RData"/>
+            <param name="image" value="faahKO-single.xset.merged.group.retcor.group.RData" ftype="rdata"/>
             <param name="method" value="chrom"/>
             <conditional name="peaklist">
+                <param name="peaklistBool" value="true" />
                 <param name="convertRTMinute" value="false" />
-                <param name="peaklistBool" value="true" />
                 <param name="numDigitsMZ" value="4" />
                 <param name="numDigitsRT" value="1" />
             </conditional>
             <expand macro="test_file_load_single"/>
-            <output name="log">
-                <assert_contents>
-                    <has_text text="object with 4 samples" />
-                    <has_text text="Time range: 2506-4484 seconds (41.8-74.7 minutes)" />
-                    <has_text text="Mass range: 200.1-600 m/z" />
-                    <has_text text="Peaks: 32720 (about 8180 per sample)" />
-                    <has_text text="Peak Groups: 8157" />
-                    <has_text text="Sample classes: KO, WT" />
-                </assert_contents>
-            </output>
+            <assert_stdout>
+                <has_text text="object with 4 samples" />
+                <has_text text="Time range: 2509.2-4480.3 seconds (41.8-74.7 minutes)" />
+                <has_text text="Mass range: 200.1-600 m/z" />
+                <has_text text="Peaks: 32720 (about 8180 per sample)" />
+                <has_text text="Peak Groups: 8209" />
+                <has_text text="Sample classes: KO, WT" />
+            </assert_stdout>
             <output name="variableMetadata" file="faahKO.xset.group.retcor.group.fillPeaks.variableMetadata.tsv" />
             <output name="dataMatrix" file="faahKO.xset.group.retcor.group.fillPeaks.dataMatrix.tsv" />
+        </test>-->
+        <!--<test>
+            <param name="image" value="faahKO-single.xset.merged.group2.retcor2.group2.RData" ftype="rdata"/>
+            <conditional name="peaklist">
+                <param name="peaklistBool" value="true" />
+                <param name="convertRTMinute" value="false" />
+                <param name="numDigitsMZ" value="4" />
+                <param name="numDigitsRT" value="1" />
+                <param name="naTOzero" value="false" />
+            </conditional>
+            <expand macro="test_file_load_single"/>
+            <assert_stdout>
+                <has_text text="expandMz: 0" />
+                <has_text text="expandRt: 0" />
+                <has_text text="object with 4 samples" />
+                <has_text text="Time range: 2499.4-4473.6 seconds (41.7-74.6 minutes)" />
+                <has_text text="Mass range: 200.1-600 m/z" />
+                <has_text text="Peaks: 15230 (about 3808 per sample)" />
+                <has_text text="Peak Groups: 6332" />
+                <has_text text="Sample classes: KO, WT" />
+            </assert_stdout>
+            <output name="variableMetadata" file="faahKO.xset.group2.retcor2.group2.fillPeaks2.variableMetadata.tsv" />
+            <output name="dataMatrix" file="faahKO.xset.group2.retcor2.group2.fillPeaks2.dataMatrix.tsv" />
+        </test>-->
+        <test>
+            <param name="image" value="faahKO-single.xset.merged.group2.retcor2.group2.RData" ftype="rdata"/>
+            <conditional name="peaklist">
+                <param name="peaklistBool" value="true" />
+                <param name="convertRTMinute" value="false" />
+                <param name="numDigitsMZ" value="4" />
+                <param name="numDigitsRT" value="1" />
+            </conditional>
+            <expand macro="test_file_load_single"/>
+            <assert_stdout>
+                <has_text text="expandMz: 0" />
+                <has_text text="expandRt: 0" />
+                <has_text text="object with 4 samples" />
+                <has_text text="Time range: 2499.4-4473.6 seconds (41.7-74.6 minutes)" />
+                <has_text text="Mass range: 200.1-600 m/z" />
+                <has_text text="Peaks: 15230 (about 3808 per sample)" />
+                <has_text text="Peak Groups: 6332" />
+                <has_text text="Sample classes: KO, WT" />
+            </assert_stdout>
+            <output name="variableMetadata" file="faahKO.xset.group2.retcor2.group2.fillPeaks2.variableMetadata.tsv" />
+            <output name="dataMatrix" file="faahKO.xset.group2.retcor2.group2.fillPeaks2.dataMatrix.NAless.tsv" />
         </test>
     </tests>
 
@@ -113,9 +156,9 @@
 
 @HELP_AUTHORS@
 
-==============
-Xcms.fillPeaks
-==============
+===================
+xcms fillChromPeaks
+===================
 
 -----------
 Description
@@ -126,12 +169,6 @@
 represented. For each of those peak groups, integrate the signal
 in the region of that peak group and create a new peak.
 
-According to the type of raw-data there are 2
-different methods available. for filling gcms/lcms data the method
-"chrom" integrates raw-data in the chromatographic domain, whereas
-"MSW" is used for peaklists without retention-time information
-like those from direct-infusion spectra.
-
 
 
 -----------------
@@ -141,80 +178,41 @@
 
 **Upstream tools**
 
-========================= ================= ================== ==========
-Name                      output file       format             parameter
-========================= ================= ================== ==========
-xcms.group                xset.group.RData  rdata.xcms.group   RData file
-========================= ================= ================== ==========
+========================= ============================ ==================
+Name                      Output file                  Format
+========================= ============================ ==================
+xcms.groupChromPeaks      ``*``.groupChromPeaks.RData  rdata.xcms.group
+========================= ============================ ==================
 
 
 **Downstream tools**
 
-+---------------------------+------------------+-----------------------+
-| Name                      | Output file      | Format                |
-+===========================+==================+=======================+
-|CAMERA.annotate            | xset.retcor.RData| rdata.xcms.fillpeaks  |
-+---------------------------+------------------+-----------------------+
-|xcms.summary               | xset.retcor.RData| rdata.xcms.fillpeaks  |
-+---------------------------+------------------+-----------------------+
-
-The output file **xset.fillpeaks** is a RData file. You can continue your analysis using it in **CAMERA.annotate** or **xcms.summary** tool as a following step of your workflow.
+=========================== =========================== =======================
+Name                        Output file                 Format
+=========================== =========================== =======================
+CAMERA.annotate             ``*``.fillChromPeaks.RData  rdata.xcms.fillpeaks
+--------------------------- --------------------------- -----------------------
+xcms.process_history        ``*``.fillChromPeaks.RData  rdata.xcms.fillpeaks
+=========================== =========================== =======================
 
 
 **General schema of the metabolomic workflow**
 
 .. image:: xcms_fillpeaks_workflow.png
 
-
-
------------
-Input files
------------
-
-+---------------------------+-----------------------+
-| Parameter : num + label   |   Format              |
-+===========================+=======================+
-| 1 : RData file            |   rdata.xcms.group    |
-+---------------------------+-----------------------+
-
+---------------------------------------------------
 
 ----------
 Parameters
 ----------
 
-
-Method
-------
-
-**chrom**
+| See the fillChromPeaks_manual_
 
-    | This method produces intensity values for those missing samples by integrating raw data in peak group region. In a given group, the start and ending retention time points for integration are defined by the median start and end points of the other detected peaks. The start and end m/z values are similarly determined. Intensities can be still be zero, which is a rather unusual intensity for a peak.  This is the case if e.g. the raw data was threshholded, and the integration area contains no actual raw intensities, or if one sample is miscalibrated, such the raw data points are (just) outside the integration area.
-    | Importantly, if retention time correction data is available, the alignment information is used to more precisely integrate the propper region of the raw data. If the corrected retention time is beyond the end of the raw data, the value will be not-a-number (NaN).
-
-**MSW**
-
-    | "MSW" is used for peaklists without retention-time information like those from direct-infusion spectra.
-
-
-Get a Peak List
----------------
+.. _fillChromPeaks_manual: https://rdrr.io/bioc/xcms/man/fillChromPeaks.html
 
-If 'true', the module generates two additional files corresponding to the peak list:
-- the variable metadata file (corresponding to information about extracted ions such as mass or retention time)
-- the data matrix (corresponding to related intensities)
-
-**decimal places for [mass or retention time] values in identifiers**
+@HELP_XCMS_MANUAL@
 
-    | Ions' identifiers are constructed as MxxxTyyy where 'xxx' is the ion median mass and 'yyy' the ion median retention time.
-    | Two parameters are used to adjust the number of decimal places wanted in identifiers for mass and retention time respectively.
-    | Theses parameters do not affect decimal places in columns other than the identifier one.
-
-**Reported intensity values**
-
-    | This parameter determines which values should be reported as intensities in the dataMatrix table; it correspond to xcms 'intval' parameter:
-    | - into: integrated area of original (raw) peak
-    | - maxo: maximum intensity of original (raw) peak
-    | - intb: baseline corrected integrated peak area (only available if peak detection was done by ‘findPeaks.centWave’)
+@HELP_PEAKLIST@
 
 ------------
 Output files
@@ -222,45 +220,9 @@
 
 xset.fillPeaks.RData : rdata.xcms.fillpeaks format
 
-    | Rdata file that will be used in the **CAMERA.annotate** or **xcms.summary** step of the workflow.
-
-xset.variableMetadata.tsv : tabular format
-
-    | Table containing information about ions; can be used as one input of **Quality_Metrics** or **Generic_filter** modules.
-
-xset.dataMatrix.tsv : tabular format
-
-    | Table containing ions' intensities; can be used as one input of **Quality_Metrics** or **Generic_filter** modules.
-
-------
-
-.. class:: infomark
-
-The output file is a xset.fillPeaks.RData file. You can continue your analysis using it in **CAMERA.annotate** or **xcms.summary** tool.
-
-
----------------------------------------------------
+    | Rdata file that will be used in the **CAMERA.annotate** or **xcms.process_history** step of the workflow.
 
----------------
-Working example
----------------
-
-Input files
------------
-
-    | RData file -> **xset.retcor.RData**
-
-Parameters
-----------
-
-    | method -> **chrom**
-    | Get a Peak List -> **false**
-
-
-Output files
-------------
-
-    | **xset.fillPeaks.RData: RData file**
+@HELP_PEAKLIST_OUTPUT@
 
 
 ---------------------------------------------------
@@ -268,28 +230,42 @@
 Changelog/News
 --------------
 
+**Version 3.0.0.0 - 08/03/2018**
+
+- UPGRADE: upgrade the xcms version from 1.46.0 to 3.0.0. So refactoring of a lot of underlying codes and methods. Some parameters may have been renamed.
+
+- UPDATE: since xcms 3.0.0, the selection of a method is no more needed (chrom or MSW). xcms will detect from the data the peak picking method used in findChromPeaks
+
+- UPDATE: since xcms 3.0.0, new parameters are available: expandMz, expandRt and ppm
+
+
 **Version 2.1.1 - 29/11/2017**
 
 - BUGFIX: To avoid issues with accented letter in the parentFile tag of the mzXML files, we changed a hidden mechanim to LC_ALL=C
 
+
 **Version 2.1.0 - 07/02/2017**
 
 - IMPROVEMENT: change the management of the peaklist ids. The main ids remain the same as xcms generated. The export setiings now only add custom names in the variableMetadata tab (namecustom)
 
 - IMPROVEMENT: xcms.fillpeaks can deal with merged individual data
 
+
 **Version 2.0.8 - 22/12/2016**
 
 - IMPROVEMENT: Add an option to export the peak list at this step without having to wait for CAMERA.annotate
 
+
 **Version 2.0.7 - 06/07/2016**
 
 - UPGRADE: upgrate the xcms version from 1.44.0 to 1.46.0
 
+
 **Version 2.0.6 - 04/04/2016**
 
 - TEST: refactoring to pass planemo test using conda dependencies
 
+
 **Version 2.0.5 - 10/02/2016**
 
 - BUGFIX: better management of errors. Datasets remained green although the process failed