diff metafiles2pin.xml @ 0:3a49065a05d6 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/percolator commit b4871f9659a924a68430aed3a93f4f9bad733fd6
author galaxyp
date Wed, 07 Dec 2016 16:43:51 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metafiles2pin.xml	Wed Dec 07 16:43:51 2016 -0500
@@ -0,0 +1,116 @@
+<tool id="metafiles2pin" name="Merge search data" version="1.0">
+    <description>before creating percolator input</description>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command>python $__tool_directory__/metafiles2pin.py
+        --batchsize $percopoolsize
+        #if len($percopoolids) > 0
+        --percolator-pool-ids
+        #for $ppool in $percopoolids
+        "${ppool.ppool_identifier}"
+        #end for
+        #end if
+        --spectrafiles
+        #for $specfile in $searchresult.keys()
+        "$specfile"
+        #end for
+        --searchfiles
+        #for $specfile in $searchresult.keys()
+        "$searchresult[$specfile]"
+        #end for
+    </command>
+    <inputs>
+        <param name="percopoolsize" type="integer" value="8" label="Amount of fractions to batch per Percolator process" />
+        <repeat name="percopoolids" title="Percolator pools" help="For cases where file amounts cause search results to not automatically line up into correct percolator batches">
+            <param name="ppool_identifier" type="text" label="Filename part identifying percolator pool" 
+            help="Specify part of the input spectra filenames that are unique for each percolator pool, e.g set_A."/>
+        </repeat>
+        <param name="searchresult" type="data_collection" format="mzid" label="MSGF+, SQT or XTandem search results" />
+    </inputs>
+    <outputs>
+        <collection name="metafile_collection" type="list" label="Percolator metafiles list">
+          <discover_datasets pattern="__designation_and_ext__" directory="metafiles" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="percopoolsize" value="2"/>
+            <param name="searchresult">
+                <collection type="list">
+                    <element name="fraction_one_spectra" value="empty_file1.mzid"/>
+                    <element name="fraction_two_spectra" value="empty_file2.mzid"/>
+                    <element name="fraction_three_spectra" value="empty_file3.mzid"/>
+                    <element name="fraction_four_spectra" value="empty_file4.mzid"/>
+                </collection>
+            </param>
+            <output_collection name="metafile_collection" type="list">
+                <element name="percolatorpool0">
+                    <assert_contents>
+                        <has_text_matching expression="dataset_1.dat" />
+                        <has_text_matching expression="dataset_2.dat" />
+                    </assert_contents>
+                </element>
+                <element name="percolatorpool1">
+                    <assert_contents>
+                        <has_text_matching expression="dataset_3.dat" />
+                        <has_text_matching expression="dataset_4.dat" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="percopoolsize" value="2"/>
+            <repeat name="percopoolids">
+                <param name="ppool_identifier" value="set1"/>
+            </repeat>
+            <repeat name="percopoolids">
+                <param name="ppool_identifier" value="set2"/>
+            </repeat>
+            <param name="searchresult">
+                <collection type="list">
+                    <element name="fr_one_set1_spectra" value="empty_file1.mzid"/>
+                    <element name="fr_two_set1_spectra" value="empty_file2.mzid"/>
+                    <element name="fr_three_set1_spectra" value="empty_file3.mzid"/>
+                    <element name="fr_one_set2_spectra" value="empty_file4.mzid"/>
+                    <element name="fr_two_set2_spectra" value="empty_file5.mzid"/>
+                    <element name="fr_three_set2_spectra" value="empty_file6.mzid"/>
+                </collection>
+            </param>
+            <output_collection name="metafile_collection" type="list">
+                <element name="percolatorpool0">
+                    <assert_contents>
+                        <has_text_matching expression="dataset_7.dat" />
+                        <has_text_matching expression="dataset_8.dat" />
+                    </assert_contents>
+                </element>
+                <element name="percolatorpool1">
+                    <assert_contents>
+                        <has_text_matching expression="dataset_9.dat" />
+                    </assert_contents>
+                </element>
+                <element name="percolatorpool2">
+                    <assert_contents>
+                        <has_text_matching expression="dataset_10.dat" />
+                        <has_text_matching expression="dataset_11.dat" />
+                    </assert_contents>
+                </element>
+                <element name="percolatorpool3">
+                    <assert_contents>
+                        <has_text_matching expression="dataset_12.dat" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help>
+    Creates meta files containing the filenames of the search data in batches of
+    a specific size. These are used as inputs to sqt2pin, msgfplus2pin or tandem2pin.
+    Use this feature when you want to batch a specific amount of input search results
+    to run in percolator, as opposed to one at a time. This tool generates a collection 
+    containing multiple batches of the search results. You can specify batch size as well
+    as a pattern in the spectra files which makes sure the batching isn't continued through
+    very different MS sets. Instead, for each filename-pattern it generates batches of the
+    specified size.
+    </help>
+</tool>