view stacks_cstacks.xml @ 5:209d84ea21f9 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit 9401451df4a985ef5686864eaadafa077ffc0877
author iuc
date Mon, 27 Feb 2017 05:43:28 -0500
parents a62f7e799494
children 759ba5c7faca
line wrap: on
line source

<tool id="stacks_cstacks" name="Stacks: cstacks" version="@WRAPPER_VERSION@.0">
    <description>build a catalogue of loci</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <command><![CDATA[
        #import re

        mkdir stacks_inputs stacks_outputs

        &&

        #set $samples = ""
        #for $input_file in $input_col:
            #set $filename = str($input_file.element_identifier)
            #if not filename.endswith('.tsv'):
                #set $filename = $filename + ".tsv"
            #end if
            #if not re.search('catalog\.[a-z]+(\.tsv)?$', $filename):
                ln -s "${input_file}" "stacks_inputs/$filename" &&

                #if $filename.endswith('.tags.tsv'):
                    #set samples += " -s \"stacks_inputs/" + $filename[:-9] + "\""
                #end if
            #end if
        #end for

        cstacks

            -p \${GALAXY_SLOTS:-1}

            $samples

            ## Batch description
            -b 1

            $g

            -n $n

            $include_multiple

            #if $gapped.use_gapped:
                --gapped
                --max_gaps $gapped.max_gaps
                --min_aln_len $gapped.min_aln_len
            #end if

            -o stacks_outputs

             > cstacks.log 2>&1
    ]]></command>

    <inputs>
        <param name="input_col" format="tabular,txt" type="data_collection" collection_type="list" label="Output from previous Stacks pipeline steps (e.g. denovo_map, refmap, ustacks or pstacks)" />

        <param name="g" argument="-g" type="boolean" checked="false" truevalue="-g" falsevalue="" label="Base catalog matching on genomic location, not sequence identity" />

        <param name="n" argument="-n" type="integer" value="1" label="Number of mismatches allowed between sample tags when building the catalog"/>

        <param name="include_multiple" argument="-m" type="boolean" checked="false" truevalue="-m" falsevalue="" label="Include tags in the catalog that match to more than one entry" />

        <conditional name="gapped">
            <param name="use_gapped" argument="--gapped" type="boolean" checked="false" label="Perform gapped alignments between stacks" />
            <when value="false"></when>
            <when value="true">
                <param name="max_gaps" argument="--max_gaps" type="integer" value="2" label="Number of gaps allowed between stacks before merging"/>
                <param name="min_aln_len" argument="--min_aln_len" type="float" value="0.8" min="0.0" max="1.0" label="Minimum length of aligned sequence in a gapped alignment"/>
            </when>
        </conditional>
    </inputs>

    <outputs>
        <data format="txt" name="output_log" label="cstacks.log with ${tool.name} on ${on_string}" from_work_dir="cstacks.log" />

        <data format="tabular" name="catalogtags" label="Catalog assembled loci (tags) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.tags.tsv" />
        <data format="tabular" name="catalogsnps" label="Catalog model calls (snps) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.snps.tsv" />
        <data format="tabular" name="catalogalleles" label="Catalog haplotypes (alleles) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.alleles.tsv" />
    </outputs>

    <tests>
        <test>
            <param name="input_col">
                <collection type="list">
                    <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
                    <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
                    <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
                    <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
                    <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
                    <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
                    <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
                    <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
                    <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
                    <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
                    <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
               </collection>
            </param>

            <output name="output_log">
                <assert_contents>
                    <has_text text="done." />
                </assert_contents>
            </output>

            <!-- catalog -->
            <output name="catalogtags">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogsnps">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogalleles">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="input_col">
                <collection type="list">
                    <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
                    <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
                    <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
                    <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
                    <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
                    <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
                    <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
                    <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
                    <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
                    <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
                    <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
               </collection>
            </param>
            <param name="gapped|use_gapped" value="true" />

            <output name="output_log">
                <assert_contents>
                    <has_text text="done." />
                </assert_contents>
            </output>

            <!-- catalog -->
            <output name="catalogtags">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogsnps">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogalleles">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
        </test>
    </tests>

    <help>
<![CDATA[
.. class:: infomark

**What it does**

A catalog can be built from any set of samples processed by the ustacks or pstacks programs. It will create a set of consensus loci, merging alleles together. In the case of a genetic cross, a catalog would be constructed from the parents of the cross to create a set of all possible alleles expected in the progeny of the cross.

--------

**Input files**

Output from denovo_map, refmap, ustacks or pstacks

**Output files**

- XXX.tags.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.


- XXX.snps.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.


- XXX.alleles.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation" />
</tool>