Mercurial > repos > galaxyp > percolator

<tool id="metafiles2pin" name="Merge search data" version="1.0">
    <description>before creating percolator input</description>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command>python $__tool_directory__/metafiles2pin.py
        --batchsize $percopoolsize
        #if len($percopoolids) > 0
        --percolator-pool-ids
        #for $ppool in $percopoolids
        "${ppool.ppool_identifier}"
        #end for
        #end if
        --spectrafiles
        #for $specfile in $searchresult.keys()
        "$specfile"
        #end for
        --searchfiles
        #for $specfile in $searchresult.keys()
        "$searchresult[$specfile]"
        #end for
    </command>
    <inputs>
        <param name="percopoolsize" type="integer" value="8" label="Amount of fractions to batch per Percolator process" />
        <repeat name="percopoolids" title="Percolator pools" help="For cases where file amounts cause search results to not automatically line up into correct percolator batches">
            <param name="ppool_identifier" type="text" label="Filename part identifying percolator pool"
            help="Specify part of the input spectra filenames that are unique for each percolator pool, e.g set_A."/>
        </repeat>
        <param name="searchresult" type="data_collection" format="mzid" label="MSGF+, SQT or XTandem search results" />
    </inputs>
    <outputs>
        <collection name="metafile_collection" type="list" label="Percolator metafiles list">
          <discover_datasets pattern="__designation_and_ext__" directory="metafiles" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="percopoolsize" value="2"/>
            <param name="searchresult">
                <collection type="list">
                    <element name="fraction_one_spectra" value="empty_file1.mzid"/>
                    <element name="fraction_two_spectra" value="empty_file2.mzid"/>
                    <element name="fraction_three_spectra" value="empty_file3.mzid"/>
                    <element name="fraction_four_spectra" value="empty_file4.mzid"/>
                </collection>
            </param>
            <output_collection name="metafile_collection" type="list">
                <element name="percolatorpool0">
                    <assert_contents>
                        <has_text_matching expression="dataset_1.dat" />
                        <has_text_matching expression="dataset_2.dat" />
                    </assert_contents>
                </element>
                <element name="percolatorpool1">
                    <assert_contents>
                        <has_text_matching expression="dataset_3.dat" />
                        <has_text_matching expression="dataset_4.dat" />
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <test>
            <param name="percopoolsize" value="2"/>
            <repeat name="percopoolids">
                <param name="ppool_identifier" value="set1"/>
            </repeat>
            <repeat name="percopoolids">
                <param name="ppool_identifier" value="set2"/>
            </repeat>
            <param name="searchresult">
                <collection type="list">
                    <element name="fr_one_set1_spectra" value="empty_file1.mzid"/>
                    <element name="fr_two_set1_spectra" value="empty_file2.mzid"/>
                    <element name="fr_three_set1_spectra" value="empty_file3.mzid"/>
                    <element name="fr_one_set2_spectra" value="empty_file4.mzid"/>
                    <element name="fr_two_set2_spectra" value="empty_file5.mzid"/>
                    <element name="fr_three_set2_spectra" value="empty_file6.mzid"/>
                </collection>
            </param>
            <output_collection name="metafile_collection" type="list">
                <element name="percolatorpool0">
                    <assert_contents>
                        <has_text_matching expression="dataset_7.dat" />
                        <has_text_matching expression="dataset_8.dat" />
                    </assert_contents>
                </element>
                <element name="percolatorpool1">
                    <assert_contents>
                        <has_text_matching expression="dataset_9.dat" />
                    </assert_contents>
                </element>
                <element name="percolatorpool2">
                    <assert_contents>
                        <has_text_matching expression="dataset_10.dat" />
                        <has_text_matching expression="dataset_11.dat" />
                    </assert_contents>
                </element>
                <element name="percolatorpool3">
                    <assert_contents>
                        <has_text_matching expression="dataset_12.dat" />
                    </assert_contents>
                </element>
            </output_collection>
        </test>
    </tests>
    <help>
    Creates meta files containing the filenames of the search data in batches of
    a specific size. These are used as inputs to sqt2pin, msgfplus2pin or tandem2pin.
    Use this feature when you want to batch a specific amount of input search results
    to run in percolator, as opposed to one at a time. This tool generates a collection
    containing multiple batches of the search results. You can specify batch size as well
    as a pattern in the spectra files which makes sure the batching isn't continued through
    very different MS sets. Instead, for each filename-pattern it generates batches of the
    specified size.
    </help>
</tool>
author	galaxyp
date	Wed, 07 Dec 2016 16:43:51 -0500
parents
children