Repository 'thermo_raw_file_converter'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/thermo_raw_file_converter

Changeset 5:77a18a61aeed (2019-08-30)
Previous changeset 4:344e10282449 (2019-04-26) Next changeset 6:2d80c8b2dfc2 (2020-04-06)
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/ThermoRawfileParser commit 2c29e2fadeba9be11e81420f24e7a6429ef52aa7"
modified:
thermo_converter.xml
added:
test-data/really_small.mzml
test-data/really_small.raw
test-data/really_small_2.raw
test-data/really_small_3.raw
removed:
test-data/fake_input.txt
b
diff -r 344e10282449 -r 77a18a61aeed test-data/fake_input.txt
--- a/test-data/fake_input.txt Fri Apr 26 14:33:21 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-This is a fake input, I was not able to find a very small RAW file.
b
diff -r 344e10282449 -r 77a18a61aeed test-data/really_small.mzml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/really_small.mzml Fri Aug 30 10:28:22 2019 -0400
b
b'@@ -0,0 +1,4696 @@\n+<?xml version="1.0" encoding="utf-8"?>\n+<mzML xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" version="1.1.0" id="input" xmlns="http://psi.hupo.org/ms/mzml">\n+  <cvList count="2">\n+    <cv id="MS" fullName="Mass spectrometry ontology" version="4.1.12" URI="https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo" />\n+    <cv id="UO" fullName="Unit Ontology" version="09:04:2014" URI="https://raw.githubusercontent.com/bio-ontology-research-group/unit-ontology/master/unit.obo" />\n+  </cvList>\n+  <fileDescription>\n+    <fileContent>\n+      <cvParam cvRef="MS" accession="MS:1000579" value="" name="MS1 spectrum" />\n+      <cvParam cvRef="MS" accession="MS:1000580" value="" name="MSn spectrum" />\n+    </fileContent>\n+    <sourceFileList count="1">\n+      <sourceFile id="RAW1" name="input" location="./raws_folder/input.raw">\n+        <cvParam cvRef="MS" accession="MS:1000768" value="" name="Thermo nativeID format" />\n+        <cvParam cvRef="MS" accession="MS:1000563" value="" name="Thermo RAW format" />\n+        <cvParam cvRef="MS" accession="MS:1000569" value="a6c0d318207b271d5e2f31b3302239e040c66dbe" name="SHA-1" />\n+      </sourceFile>\n+    </sourceFileList>\n+  </fileDescription>\n+  <referenceableParamGroupList count="1">\n+    <referenceableParamGroup id="commonInstrumentParams">\n+      <cvParam cvRef="MS" accession="MS:1002416" value="" name="Orbitrap Fusion" />\n+      <cvParam cvRef="MS" accession="MS:1000529" value="FSN10188" name="instrument serial number" />\n+    </referenceableParamGroup>\n+  </referenceableParamGroupList>\n+  <softwareList count="1">\n+    <software id="ThermoRawFileParser" version="1.1.9">\n+      <cvParam cvRef="MS" accession="MS:1000799" value="ThermoRawFileParser" name="custom unreleased software tool" />\n+    </software>\n+  </softwareList>\n+  <instrumentConfigurationList count="1">\n+    <instrumentConfiguration id="IC1">\n+      <referenceableParamGroupRef ref="commonInstrumentParams" />\n+      <componentList count="3">\n+        <source order="1">\n+          <cvParam cvRef="MS" accession="MS:1000398" value="" name="nanoelectrospray" />\n+        </source>\n+        <analyzer order="2">\n+          <cvParam cvRef="MS" accession="MS:1000079" value="" name="fourier transform ion cyclotron resonance mass spectrometer" />\n+        </analyzer>\n+        <detector order="3">\n+          <cvParam cvRef="MS" accession="MS:1000624" value="" name="inductive detector" />\n+        </detector>\n+      </componentList>\n+    </instrumentConfiguration>\n+  </instrumentConfigurationList>\n+  <dataProcessingList count="1">\n+    <dataProcessing id="ThermoRawFileParserProcessing">\n+      <processingMethod order="0" softwareRef="ThermoRawFileParser">\n+        <cvParam cvRef="MS" accession="MS:1000544" value="" name="Conversion to mzML" />\n+      </processingMethod>\n+    </dataProcessing>\n+  </dataProcessingList>\n+  <run id="input" defaultInstrumentConfigurationRef="IC1" startTimeStamp="2019-03-25T11:30:48.075Z" defaultSourceFileRef="RAW1">\n+    <spectrumList count="101" defaultDataProcessingRef="ThermoRawFileParserProcessing">\n+      <spectrum id="controllerType=0 controllerNumber=1 scan=1" index="0" defaultArrayLength="136">\n+        <cvParam cvRef="MS" accession="MS:1000511" value="1" name="ms level" />\n+        <cvParam cvRef="MS" accession="MS:1000579" value="" name="MS1 spectrum" />\n+        <cvParam cvRef="MS" accession="MS:1000130" value="" name="positive scan" />\n+        <cvParam cvRef="MS" accession="MS:1000285" value="464386.25" name="total ion current" />\n+        <cvParam cvRef="MS" accession="MS:1000127" value="" name="centroid spectrum" />\n+        <cvParam cvRef="MS" accession="MS:1000504" value="536.163879394531" name="base peak m/z" unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />\n+        <cvParam cvRef="MS" accession="MS:1000505" value="70424.4453125" name="base peak '..b'dh7CO5S+tcAhJH/wp+6tJV3+MRf3WGwUeNZcHcApHf+bdBjdBj/fvhI8iZRm8Mn/2Uz9fGMKzcGF6Tirk1uI339dTfCHT6e+PiI/n51/Fv6MhHr4R95BHWXVEHgXR2rQZy1mFs1T6AP0hRu8TiBuTwX169ly5JP6C24h/tp69IPZWxEvLTl6HniihsCTcNYjr7rOAX1Jv0UfhZ35MfjgvT7ER8ZW2jtpZuCXLadIF5uaqlF/Vgby8dy1hHf+HdJnrg/Na2gp+BAR68GrPNGFeZHt1Zh/aUsb2ZMboSs230nvvxFH9TakYm74zGjoTFYcwMMKDZS3Ngz9Zu2vY+5ESB36IRvN6K8S54IeNe970IGaPa0O9QYXEY7BR7QfW6y05wYt4EP7JZ3mZvEB7As15QPogI07SO/vDlN/q//fRx772uFfORv8aK1vQsfKtjHsS7E0gOZvhkJ8VU7QXPgFA4+mW0J67b2OOGV3IM48ntE89+6jPdo2j949uhv6V3ry8R/hvp6Yd6FbhnfzhiLiqdFFe823FXHqq+G0b7rt1FedDe9hT6ZiH/CdnqSPzCj0QyQN03ursnBWckqQR0ox0V7YdRh7USyag+/alBOk/x+NxI9IpDlJ70E9eXUn8bAsEFael044btlovkIi6T+gjdL8J5txX/nEg8553bCySKU+3/yTcN3dDr2qTFA/Ii5AV/znZrp3W0//CzGD3l/oSfrnNuTnCWX03/NKoz3U30I6Pvs8zcHyiWz5Px7fS6E=</binary>\n+          </binaryDataArray>\n+        </binaryDataArrayList>\n+      </spectrum>\n+    </spectrumList>\n+    <chromatogramList count="1" defaultDataProcessingRef="ThermoRawFileParserProcessing">\n+      <chromatogram id="base_peak_0" index="0" defaultArrayLength="101">\n+        <cvParam cvRef="MS" accession="MS:1000235" value="" name="total ion current chromatogram" />\n+        <binaryDataArrayList count="2">\n+          <binaryDataArray encodedLength="1092">\n+            <cvParam cvRef="MS" accession="MS:1000595" value="" name="time array" unitAccession="UO:0000031" unitName="minute" unitCvRef="UO" />\n+            <cvParam cvRef="MS" accession="MS:1000523" value="" name="64-bit float" />\n+            <cvParam cvRef="MS" accession="MS:1000574" value="" name="zlib compression" />\n+            <binary>eJwBKAPX/JDHAE/pe+w/3mWbZZ3O7D9Ls/S+PB/tPyWmRWgDcO0/Aec2NLPC7T/dYNeaWxPuPxmPLAUMZO4/oQlWUxq27j9RnYTTeQfvP7RCsY6MWe8/9oNOS9ir7z8HjLM6mf7vP6O6sYeTJ/A/XmbU0fZP8D8RyYUJ83jwPyrMHMmiofA/hXafgP/J8D9eagBGAvPwP9nLjkipG/E/89OtlKdE8T96On1btW3xP9T0cVsCl/E/u+1dZgfA8T+2IlTiX+jxP3nfny5rEfI/Ewjt7Bo68j9DCnkacWLyP1qyxVnHivI/iwg8PWGz8j8nFj54ddzyP1unu/2ABfM/3aYyY84u8z9QBAcuL1fzP19WT0SFf/M/vOpH4Jeo8z+tuFqmZLLzP8SNMlkd0fM/4bypm9na8z+ZRoJTWv7zPyf42nm+KPQ/p6f+LFhR9D9KVhUFc3r0P8Bs7Yh+o/Q/3m9DdiPM9D/1I0ywe/T0Pz2S1y+HHfU/JLORYPFF9T9gGs9xvk/1Py+YZ0ECYvU/dxRjrZ9w9T855ANdJZv1P/5yISSCw/U/IfM4Nyns9T/JiXfANhX2P1njlpU5PvY/dy86SOlm9j91Yv1GSI/2P6OfEtRIuPY/vLtKi/jg9j/iRPrDCgr3Pw4iHx8qM/c/71WpquM89z9HTd52WVz3PwNhQiHQh/c/IMV5sPGw9z+ACakSFtL3Pye7yxdQ9fc/RMIhWj759z+GAJxylwv4P05/SFPJE/g/7o1ifaQc+D9UCBpn3CT4P5KYF5URLfg/CY1JzUQ1+D9TMYD7dT34PwTJM5ypRfg/kr4PlthN+D+gjQyPC1b4P/1jtUi6ZPg/MXqo0nRu+D8ybzC12oD4P2Ocw50Zifg/waSRwPSR+D//TxgrJJr4PyOsfMJbovg/uylCP4+q+D+iAKhhwLL4PwUsGQlxwfg/BwIKkSvL+D9ErbT+ct34P/Vrexai5fg/HaQgai3u+D/N5FLUV/b4P45aGNuI/vg/RITPuc0G+T9atjjg/g75P/PZJdotF/k/3RquVUkf+T8C8kaf+S35P+Kdn8YgW/k/LoNzh0KE+T9dHplk</binary>\n+          </binaryDataArray>\n+          <binaryDataArray encodedLength="704">\n+            <cvParam cvRef="MS" accession="MS:1000515" value="" name="intensity array" unitAccession="MS:1000131" unitName="number of counts" unitCvRef="MS" />\n+            <cvParam cvRef="MS" accession="MS:1000523" value="" name="64-bit float" />\n+            <cvParam cvRef="MS" accession="MS:1000574" value="" name="zlib compression" />\n+            <binary>eJwtkl9IFGEUxT/ajE0sbNG0IBoiBAsqQZLyoVkITINFCIXooQkS8iGpdX1fCDP/VEb1WE0QCS1BrLRSTDXZln+2rC0rtIxPa1FMzdV1K4qtvt/0dDj3O+eee++MEEJrKUnq4h+GKxIKZXdoXKG55foiONBjUT/oHoW7j35VKJob4SLb80ahcXPNnEL9bAqfftwjqZf9141deqEwuLWNHL3JAEU4MAu/lvqMvqOVfnqkboLct5XPqWcN98I172t8yTvzzKO1kCdj1e/Zw9r7mPf6XPxm3b4B8HSmDwycGaJ/es+YQvt8z0fmKipOk3NryblDXn0/vCHt9L0cI9+oeODM9ezYN3ISK9lbzHZN0i/a1A2P+PHJ7IIv+HyJFHxDFntqoU3MY/sXouSXXv0OlngX0O2OO7q527/QrevEb2xb8YH+i5FlXvU+XZhB5yrnDqI16OhEaY56N6ue7FBo9FaTJzZurlRcDObXKNTXFp5UKKfWn0AXr4IHbyzfTv9VO93oD3hy6de/+id57ffI0/rKf3DPC4fZXx5aYm67uIZ7arVXPjGPGR2BF0wcob8MzVC3cqaoh31x+syMs5/hqh2m3tHG97Y7ffjFrvtPwT939zN3IJ//Sr6zXnE/q2GQ73xxCJ+cnn9JvXmSd/vU7xj+xnbqZtk5538q0h+S+yiP+5n+pIs5M10p/S+To/IQ</binary>\n+          </binaryDataArray>\n+        </binaryDataArrayList>\n+      </chromatogram>\n+    </chromatogramList>\n+  </run>\n+</mzML>\n\\ No newline at end of file\n'
b
diff -r 344e10282449 -r 77a18a61aeed test-data/really_small.raw
b
Binary file test-data/really_small.raw has changed
b
diff -r 344e10282449 -r 77a18a61aeed test-data/really_small_2.raw
b
Binary file test-data/really_small_2.raw has changed
b
diff -r 344e10282449 -r 77a18a61aeed test-data/really_small_3.raw
b
Binary file test-data/really_small_3.raw has changed
b
diff -r 344e10282449 -r 77a18a61aeed thermo_converter.xml
--- a/thermo_converter.xml Fri Apr 26 14:33:21 2019 -0400
+++ b/thermo_converter.xml Fri Aug 30 10:28:22 2019 -0400
[
b'@@ -1,60 +1,193 @@\n-<tool id="thermo_raw_file_converter" name="Thermo" version="1.1.2">\n+<tool id="thermo_raw_file_converter" name="Thermo" version="1.1.10">\n     <description>RAW file converter</description>\n     <requirements>\n-        <requirement type="package" version="1.1.2">ThermoRawFileParser</requirement>\n+        <requirement type="package" version="1.1.10">ThermoRawFileParser</requirement>\n     </requirements>\n     <command>\n <![CDATA[\n+#import re\n \n-ln -s \'$input\' ./input.raw &&\n+#set $temp_stderr = "thermo_converter_stderr"\n+\n+echo "" > $temp_stderr &&\n \n-ThermoRawFileParser.sh\n-    -i=input.raw\n-    -o=./\n+mkdir ./raws_folder &&\n+mkdir ./output_folder &&\n+#for $input_raw in $input:\n+    #if len($input) > 1\n+        #set $input_name = re.sub(\'[^\\w\\-\\.]\', \'_\',$input_raw.element_identifier.split(\'/\')[-1].replace(".raw", "") + ".raw")\n+        ln -s -f \'${input_raw}\' \'./raws_folder/${input_name}\' &&\n+    #else:\n+        ln -s -f \'${input_raw}\' \'./raws_folder/input.raw\' &&\n+    #end if\n+#end for\n+\n+(ThermoRawFileParser.sh\n+    -d=./raws_folder\n+    -o=./output_folder\n     -f=$output_format\n-    #if $metadata:\n-        --metadata=1\n+    #if $output_metadata_selector != "off":\n+        --metadata="${output_metadata_selector}"\n     #end if\n+    $zlib_boolean\n+    $peakpicking_boolean\n+    $ignore_instrument_errors_boolean\n+\n+    2>> $temp_stderr)\n+\n     &&\n-    #if $output_format == \'0\':\n-        mv ./input.mgf \'$output\'\n-    #else:\n-        mv ./input.mzML \'$output\'\n+\n+    #if len($input) == 1:\n+        #if $output_format == "0":\n+            mv ./output_folder/input.mgf ./output_file.out &&\n+        #else if $output_format == "1":\n+            mv ./output_folder/input.mzML ./output_file.out &&\n+        #else if $output_format == "2":\n+            mv ./output_folder/input.mzML ./output_file.out &&\n+        #end if\n+\n+        #if $output_metadata_selector != "off":\n+            #if $output_metadata_selector == "0":\n+                mv ./output_folder/input-metadata.json ./input-metadata.txt &&\n+            #else if $output_metadata_selector == "1":\n+                mv ./output_folder/input-metadata.txt ./input-metadata.txt &&\n+            #end if\n+        #end if\n     #end if\n+\n+    cat $temp_stderr 2>&1;\n ]]>\n     </command>\n     <inputs>\n-        <param name="input" type="data" format="thermo.raw" label="Thermo RAW file" help="" />\n-        <param name="metadata" type="boolean" truevalue="" falsevalue="" checked="False"\n-            label="Output metadata" help="" />\n-        <param name="output_format" type="select" label="Choose an output format">\n+        <param name="input" type="data" format="thermo.raw" label="Thermo RAW file" help="" multiple="true"\n+          optional="False" />\n+\n+        <param name="output_format" type="select" label="Output format">\n             <option value="0">mgf</option>\n             <option value="1" selected="True">mzml</option>\n+            <option value="2">Indexed mzml</option>\n         </param>\n+\n+        <param name="zlib_boolean" type="boolean" truevalue="" falsevalue="-z" checked="true"\n+            label="Use zlib compression for the m/z ratios and intensities" help="" />\n+\n+        <param name="peakpicking_boolean" type="boolean" truevalue="" falsevalue="-p" checked="true"\n+            label="Use the peak picking provided by the native thermo library" help="" />\n+\n+        <param name="ignore_instrument_errors_boolean" type="boolean" truevalue="-e" falsevalue="" checked="true"\n+            label="Ignore missing instrument properties" help="If false, it stops the conversion if instrument properties are missing" />\n+\n+        <param name="output_metadata_selector" type="select" label="Output metadata" >\n+            <option value="off" selected="True">No</option>\n+            <option value="0">json</option>\n+            <option value="1">txt</option>\n+        </param>\n+\n     </inputs>\n     <outputs>\n-        <data format="mzml" name="output" from_work_dir="input.mzML" label="'..b'ectory="output_folder"/>\n+        </collection>\n+\n+        <collection name="output_metadata_collection" type="list" label="${tool.name} on ${on_string}: metadata">\n+            <filter>output_metadata_selector != "off"</filter>\n+            <filter>(str(input)).count(\',\') > 0</filter>\n+            <discover_datasets pattern="(?P&lt;designation&gt;.+)-metadata.txt" ext="txt" directory="output_folder"/>\n+            <discover_datasets pattern="(?P&lt;designation&gt;.+)-metadata.json" ext="json" directory="output_folder"/>\n+        </collection>\n+\n     </outputs>\n+\n     <tests>\n-        <test>\n-            <param name="input" value="fake_input.txt"/>\n-            <assert_command>\n-                <has_text text="ThermoRawFileParser.sh"/>\n-            </assert_command>\n+        <!-- Basic test -->\n+        <test expect_num_outputs="1">\n+            <param name="input" value="really_small.raw"/>\n+            <param name="output_format" value="1"/>\n+            <output name="output" file="really_small.mzml" ftype="mzml" compare="sim_size" delta="3000" />\n         </test>\n-        <test>\n-            <param name="input" value="fake_input.txt"/>\n+\n+        <!-- Testing contents of converted mgf file with txt metadata -->\n+        <test expect_num_outputs="2">\n+            <param name="input" value="really_small.raw"/>\n             <param name="output_format" value="0"/>\n-            <assert_command>\n-                <has_text text="-f=0"/>\n-            </assert_command>\n+            <param name="output_metadata_selector" value="1"/>\n+            <output name="output">\n+                <assert_contents>\n+                    <has_text text="SCANS=36"/>\n+                    <has_text text="RTINSECONDS=73.863181104"/>\n+                    <has_text text="PEPMASS=675.248779296875"/>\n+                    <has_text text="CHARGE=2+"/>\n+                    <has_text text="121.3116455 920.2367553711"/>\n+                    <has_text text="229.2241211 1137.6958007813"/>\n+                    <has_text text="1577.8967285 1487.9519042969"/>\n+                </assert_contents>\n+            </output>\n+            <output name="output_metadata" ftype="txt">\n+                <assert_contents>\n+                    <has_text text="Instrument model=[MS, MS:1000494, Thermo Scientific instrument model, Orbitrap Fusion]"/>\n+                    <has_text text="Instrument name=Orbitrap Fusion"/>\n+                    <has_text text="Instrument serial number=[MS, MS:1000529, instrument serial number, FSN10188]"/>\n+                    <has_text text="Software version=[NCIT, NCIT:C111093, Software Version, 3.1.2412.17]"/>\n+                    <has_text text="Mass resolution=[MS, MS:1000011, mass resolution, 0.500]"/>\n+                    <has_text text="Number of scans=101"/>\n+                    <has_text text="Scan range=1;101"/>\n+                    <has_text text="Scan start time=[MS, MS:1000016, scan start time, 0.89]"/>\n+                    <has_text text="Time range=0.89;1.59"/>\n+                    <has_text text="Mass range=120.0000;2000.0000"/>\n+                </assert_contents>\n+            </output>\n+\n+        </test>\n+\n+        <!-- Basic mzml collection test -->\n+        <test expect_num_outputs="1">\n+            <param name="input" value="really_small.raw,really_small_2.raw"/>\n+            <param name="output_format" value="1"/>\n+            <output_collection name="output_mzml_collection" type="list" count="2"/>\n+        </test>\n+\n+        <!-- mgf collection test with metadata -->\n+        <test expect_num_outputs="2">\n+            <param name="input" value="really_small.raw,really_small_2.raw,really_small_3.raw"/>\n+            <param name="output_format" value="0"/>\n+            <param name="output_metadata_selector" value="0"/>\n+            <output_collection name="output_mgf_collection" type="list" count="3"/>\n+            <output_collection name="output_metadata_collection" type="list" count="3"/>\n         </test>\n     </tests>\n     <help>\n'