diff thermo_converter.xml @ 9:92ac8e086317 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/ThermoRawFileParser commit 9bb883e027809047684fd6c45e9a6f1f3bf69351"
author galaxyp
date Wed, 17 Feb 2021 09:50:26 +0000
parents 26c6706bfb07
children b9b385097f13
line wrap: on
line diff
--- a/thermo_converter.xml	Sat Jul 25 16:26:42 2020 -0400
+++ b/thermo_converter.xml	Wed Feb 17 09:50:26 2021 +0000
@@ -1,145 +1,202 @@
-<tool id="thermo_raw_file_converter" name="Thermo" version="1.2.3+galaxy0">
+<tool id="thermo_raw_file_converter" name="Thermo" version="@TOOL_VERSION@+galaxy0" profile="20.05">
     <description>RAW file converter</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.3.2</token>
+    </macros>
     <requirements>
-        <requirement type="package" version="1.2.3">thermorawfileparser</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">thermorawfileparser</requirement>
     </requirements>
+    <stdio>
+        <regex match="ERROR" source="both" level="fatal" description="Fatal error"/>
+    </stdio>
+    <version_command><![CDATA[ThermoRawFileParser.sh --version]]></version_command>
     <command>
 <![CDATA[
 #import re
 
-mkdir ./raws_folder &&
-mkdir ./output_folder &&
-#for $input_raw in $input_files:
-    #set $input_name = re.sub('[^\w\-\.]', '_',$input_raw.element_identifier.split('/')[-1].replace(".raw", "") + ".raw")
-    ln -s -f '${input_raw}' './raws_folder/${input_name}' &&
-#end for
+#set $input_name = re.sub('[^\w\-\.]', '_', $input_file.element_identifier.replace(".raw", "") + ".raw")
+ln -s -f '$input_file' '$input_name' &&
 
 ThermoRawFileParser.sh
-    -d=./raws_folder
-    -o=./output_folder
-    -f=$output_format
+    --input='$input_name'
+    --output_file='$output'
+    -f=$format_cond.output_format
     #if $output_metadata_selector != "off":
-        --metadata="${output_metadata_selector}"
+        --metadata="$output_metadata_selector"
+        --metadata_output_file='$output_metadata'
+    #end if
+    #if $format_cond.output_format == "0"
+        $format_cond.mgfPrecursor
     #end if
-    $zlib_boolean
-    $peakpicking_boolean
+    #if $format_cond.output_format in ['1', '2']
+        $format_cond.zlib_boolean
+        $format_cond.peakpicking_boolean
+    #end if
     $ignore_instrument_errors_boolean
-
+    $allDetectors
+    $includeExceptionData
+    #if $msLevel != ''
+        --msLevel="$msLevel"
+    #end if
 ]]>
     </command>
     <inputs>
-        <param name="input_files" type="data" format="thermo.raw" label="Thermo RAW file" help="" multiple="true"
-          optional="False" />
-
-        <param name="output_format" type="select" label="Output format">
-            <option value="0">mgf</option>
-            <option value="1" selected="True">mzml</option>
-            <option value="2">Indexed mzml</option>
-        </param>
-
-        <param name="zlib_boolean" type="boolean" truevalue="" falsevalue="-z" checked="true"
-            label="Use zlib compression for the m/z ratios and intensities" help="" />
-
-        <param name="peakpicking_boolean" type="boolean" truevalue="" falsevalue="-p" checked="true"
-            label="Use the peak picking provided by the native thermo library" help="" />
-
-        <param name="ignore_instrument_errors_boolean" type="boolean" truevalue="-e" falsevalue="" checked="true"
-            label="Ignore missing instrument properties" help="If false, it stops the conversion if instrument properties are missing" />
-
-        <param name="output_metadata_selector" type="select" label="Output metadata" >
+        <param argument="--input" name="input_file" type="data" format="thermo.raw" label="Thermo RAW file" help="" optional="False"/>
+        <conditional name="format_cond">
+            <param argument="--format" name="output_format" type="select" label="Output format">
+                <option value="0">mgf</option>
+                <option value="1" selected="True">mzml</option>
+                <option value="2">Indexed mzml</option>
+                <!-- <option value="3">Parquet</option> -->
+            </param>
+            <when value="0">
+                <param argument="--mgfPrecursor" type="boolean" truevalue="--mgfPrecursor" falsevalue="" label="Include precursor scan number in MGF file" help=""/>
+            </when>
+            <when value="1">
+                <param argument="--noZlibCompression" name="zlib_boolean" type="boolean" truevalue="" falsevalue="-z" checked="true"
+                    label="Use zlib compression for the m/z ratios and intensities" help=""/>
+                <param argument="--noPeakPicking" name="peakpicking_boolean" type="boolean" truevalue="" falsevalue="-p" checked="true"
+                    label="Use the peak picking provided by the native thermo library" help=""/>
+            </when>
+            <when value="2">
+                <param argument="--noZlibCompression" name="zlib_boolean" type="boolean" truevalue="" falsevalue="-z" checked="true"
+                    label="Use zlib compression for the m/z ratios and intensities" help=""/>
+                <param argument="--noPeakPicking" name="peakpicking_boolean" type="boolean" truevalue="" falsevalue="-p" checked="true"
+                    label="Use the peak picking provided by the native thermo library" help=""/>
+            </when>
+            <when value="3"/>
+        </conditional>
+        <param argument="--metadata" name="output_metadata_selector" type="select" label="Output metadata">
             <option value="off" selected="True">No</option>
             <option value="0">json</option>
             <option value="1">txt</option>
         </param>
-
+        <param argument="--ignoreInstrumentErrors" name="ignore_instrument_errors_boolean" type="boolean" truevalue="-e" falsevalue="" checked="true"
+            label="Ignore missing instrument properties" help="If false, it stops the conversion if instrument properties are missing"/>
+        <param argument="--allDetectors" type="boolean" truevalue="--allDetectors" falsevalue="" checked="false" label="Extract additional detector data" help="UV/PDA etc"/>
+        <param argument="--includeExceptionData" type="boolean" truevalue="--includeExceptionData" falsevalue="" label="Include reference and exception data"/>
+        <param argument="--msLevel" type="text" value="" label="Select MS levels " help="(MS1, MS2, etc) included in the output, should be a comma-separated list of integers ( 1,2,3 ) and/or intervals ( 1-3 ), open-end intervals ( 1- ) are allowed">
+            <sanitizer invalid_char="">
+                <valid initial="string.digits">
+                    <add value="-"/>
+                    <add value=","/>
+                </valid>
+            </sanitizer>
+        </param>
     </inputs>
     <outputs>
-
-        <collection name="output_mgf_collection" type="list" label="${tool.name} on ${on_string}: MGF collection">
-            <filter>output_format == "0"</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.mgf" directory="output_folder" ext="mgf"/>
-        </collection>
-
-        <collection name="output_mzml_collection" type="list" label="${tool.name} on ${on_string}: mzML collection">
-            <filter>output_format == "1"</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.mzML" ext="mzml" directory="output_folder"/>
-        </collection>
-
-        <collection name="output_indexedmzml_collection" type="list" label="${tool.name} on ${on_string}: Indexed mzML collection">
-            <filter>output_format == "2"</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.mzML" ext="mzml" directory="output_folder"/>
-        </collection>
-
-        <collection name="output_metadata_collection" type="list" label="${tool.name} on ${on_string}: metadata collection">
+        <data name="output" format="mzml" label="${tool.name} on ${on_string}">
+            <change_format>
+                <when input="format_cond.output_format" value="0" format="mgf"/>
+                <!-- <when input="format_cond.output_format" value="3" format="parquet"/> -->
+            </change_format>
+        </data>
+        <data name="output_metadata" format="txt" label="${tool.name} on ${on_string}: metadata">
             <filter>output_metadata_selector != "off"</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)-metadata.txt" ext="txt" directory="output_folder"/>
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)-metadata.json" ext="json" directory="output_folder"/>
-        </collection>
-
+            <change_format>
+                <when input="output_metadata_selector" value="0" format="json"/>
+            </change_format>
+        </data>
     </outputs>
 
     <tests>
-        <!-- Basic mgf test - 1 file -->
+        <!-- mgf test, no metadata -->
         <test expect_num_outputs="1">
-            <param name="input_files" value="really_small.raw" ftype="thermo.raw"/>
-            <param name="output_format" value="0"/>
-            <output_collection name="output_mgf_collection" type="list" count="1"/>
-
-            <!--<output name="output" file="really_small.mzml" ftype="mzml" compare="sim_size" delta="3000" />-->
+            <param name="input_file" value="really_small.raw" ftype="thermo.raw"/>
+            <conditional name="format_cond">
+                <param name="output_format" value="0"/>
+                <param name="mgfPrecursor" value="true"/>
+            </conditional>
+            <output name="output" ftype="mgf" value="really_small.mgf">
+                <assert_contents>
+                    <has_text text="SCANS=36"/>
+                    <has_text text="RTINSECONDS=73.863181104"/>
+                    <has_text text="PEPMASS=675.248779296875"/>
+                    <has_text text="CHARGE=2+"/>
+                    <has_text text="121.3116455 920.2367553711"/>
+                    <has_text text="229.2241211 1137.6958007813"/>
+                    <has_text text="1577.8967285 1487.9519042969"/>
+                </assert_contents>
+            </output>
+            <assert_command>
+                <has_text text="--mgfPrecursor"></has_text>
+            </assert_command>
         </test>
 
-
-        <!-- Basic mzml test - 2 files -->
-        <test expect_num_outputs="1">
-            <param name="input_files" value="really_small.raw,really_small_2.raw" ftype="thermo.raw"/>
-            <param name="output_format" value="1"/>
-            <output_collection name="output_mzml_collection" type="list" count="2"/>
+        <!-- mzml test and json metadata  -->
+        <test expect_num_outputs="2">
+            <param name="input_file" value="really_small.raw" ftype="thermo.raw"/>
+            <conditional name="format_cond">
+                <param name="output_format" value="1"/>
+            </conditional>
+            <param name="output_metadata_selector" value="0"/>
+            <output name="output" file="really_small.mzml" ftype="mzml"/>
+            <output name="output_metadata" value="really_small.json" ftype="json"/>
+            <assert_command>
+                <not_has_text text="--mgfPrecursor"></not_has_text>
+            </assert_command>
         </test>
 
-        <!-- Testing contents of converted mgf file with txt metadata -->
+        <!-- mzml test (wo zlib compression and peak picking) and json metadata  -->
         <test expect_num_outputs="2">
-            <param name="input_files" value="really_small.raw" ftype="thermo.raw"/>
-            <param name="output_format" value="0"/>
-            <param name="output_metadata_selector" value="1"/>
-            <output_collection name="output_mgf_collection" type="list" count="1">
-                <element name="really_small" ftype="mgf">
-                    <assert_contents>
-                        <has_text text="SCANS=36"/>
-                        <has_text text="RTINSECONDS=73.863181104"/>
-                        <has_text text="PEPMASS=675.248779296875"/>
-                        <has_text text="CHARGE=2+"/>
-                        <has_text text="121.3116455 920.2367553711"/>
-                        <has_text text="229.2241211 1137.6958007813"/>
-                        <has_text text="1577.8967285 1487.9519042969"/>
-                    </assert_contents>
-                </element>
-            </output_collection>
-            <output_collection name="output_metadata_collection" type="list">
-                <element name="really_small" ftype="txt">
-                    <assert_contents>
-                        <has_text text="Instrument model=[MS, MS:1000494, Thermo Scientific instrument model, Orbitrap Fusion]"/>
-                        <has_text text="Instrument name=Orbitrap Fusion"/>
-                        <has_text text="Instrument serial number=[MS, MS:1000529, instrument serial number, FSN10188]"/>
-                        <has_text text="Software version=[NCIT, NCIT:C111093, Software Version, 3.1.2412.17]"/>
-                        <has_text text="Mass resolution=[MS, MS:1000011, mass resolution, 0.500]"/>
-                        <has_text text="Number of scans=101"/>
-                        <has_text text="Scan range=1;101"/>
-                        <has_text text="Scan start time=[MS, MS:1000016, scan start time, 0.89]"/>
-                        <has_text text="Time range=0.89;1.59"/>
-                        <has_text text="Mass range=120.0000;2000.0000"/>
-                    </assert_contents>
-                </element>
-            </output_collection>
+            <param name="input_file" value="really_small.raw" ftype="thermo.raw"/>
+            <conditional name="format_cond">
+                <param name="output_format" value="1"/>
+                <param name="zlib_boolean" value="false"/>
+                <param name="peakpicking_boolean" value="false"/>
+            </conditional>
+            <param name="output_metadata_selector" value="0"/>
+            <output name="output" file="really_small_ext.mzml" ftype="mzml"/>
+            <output name="output_metadata" value="really_small.json" ftype="json"/>
+            <assert_command>
+                <not_has_text text="--mgfPrecursor"></not_has_text>
+                <has_text text="-z "></has_text>
+                <has_text text="-p "></has_text>
+            </assert_command>
         </test>
 
-        <!-- mgf collection test with metadata -->
+        <!-- indexed mzml, txt metadata (+ extra options) -->
         <test expect_num_outputs="2">
-            <param name="input_files" value="really_small.raw,really_small_2.raw,really_small_3.raw"/>
-            <param name="output_format" value="0"/>
-            <param name="output_metadata_selector" value="0"/>
-            <output_collection name="output_mgf_collection" type="list" count="3"/>
-            <output_collection name="output_metadata_collection" type="list" count="3"/>
+            <param name="input_file" value="really_small.raw" ftype="thermo.raw"/>
+            <conditional name="format_cond">
+                <param name="output_format" value="2"/>
+            </conditional>
+            <param name="output_metadata_selector" value="1"/>
+            <param name="ignore_instrument_errors_boolean" value="true"/>
+            <param name="allDetectors" value="true"/>
+            <param name="includeExceptionData" value="--includeExceptionData"/>
+            <param name="msLevel" value="1"/>
+            <output name="output" value="really_small.indexed_mzML" ftype="mzml"/>
+            <output name="output_metadata" ftype="txt">
+                <assert_contents>
+                    <has_text text="Instrument model=[MS, MS:1000494, Thermo Scientific instrument model, Orbitrap Fusion]"/>
+                    <has_text text="Instrument name=Orbitrap Fusion"/>
+                    <has_text text="Instrument serial number=[MS, MS:1000529, instrument serial number, FSN10188]"/>
+                    <has_text text="Software version=[NCIT, NCIT:C111093, Software Version, 3.1.2412.17]"/>
+                    <has_text text="Mass resolution=[MS, MS:1000011, mass resolution, 0.500]"/>
+                    <has_text text="Number of scans=101"/>
+                    <has_text text="Scan range=1;101"/>
+                    <has_text text="Scan start time=[MS, MS:1000016, scan start time, 0.89]"/>
+                    <has_text text="Time range=0.89;1.59"/>
+                    <has_text text="Mass range=120.0000;2000.0000"/>
+                </assert_contents>
+            </output>
+            <assert_command>
+                <not_has_text text="--mgfPrecursor"></not_has_text>
+                <has_text text="-e "/>
+                <has_text text="--allDetectors"/>
+                <has_text text="--includeExceptionData"/>
+                <has_text text="--msLevel=&quot;1&quot;"/>
+            </assert_command>
         </test>
+<!--     parquet test, no metadata 
+        <test expect_num_outputs="1">
+            <param name="input_file" value="really_small.raw" ftype="thermo.raw"/>
+            <conditional name="format_cond">
+                <param name="output_format" value="3"/>
+            </conditional>
+            <output name="output" ftype="parquet" value="really_small.parquet"/>
+        </test> -->
     </tests>
     <help>
 <![CDATA[
@@ -149,12 +206,6 @@
 ]]>
     </help>
     <citations>
-        <citation type="bibtex">@misc{Galaxy Proteomics Tools,
-        author = {Niels Hulstaert, et al.},
-        title = {Galaxy Proteomics Tools},
-        publisher = {GitHub},
-        journal = {GitHub repository},
-        year = {2017}, url = {https://github.com/compomics/ThermoRawFileParser}}
-        </citation>
+        <citation type="doi">10.1021/acs.jproteome.9b00328</citation>
     </citations>
 </tool>