Mercurial > repos > iuc > stacks_cstacks

<tool id="stacks_cstacks" name="Stacks: cstacks" version="@WRAPPER_VERSION@.0">
    <description>build a catalogue of loci</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <command><![CDATA[
        #import re

        mkdir stacks_inputs stacks_outputs

        &&

        #set $samples = ""
        #for $input_file in $input_col:
            #set $filename = str($input_file.name)
            #if not filename.endswith('.tsv'):
                #set $filename = ".tsv"
            #end if
            #if not re.search('catalog\.[a-z]+(\.tsv)?$', $filename):
                ln -s "${input_file}" "stacks_inputs/$filename" &&

                #if $filename.endswith('.tags.tsv'):
                    #set samples += " -s \"stacks_inputs/" + $filename[:-9] + "\""
                #end if
            #end if
        #end for

        cstacks

            -p \${GALAXY_SLOTS:-1}

            $samples

            ## Batch description
            -b 1

            $g

            -n $n

            $include_multiple

            #if $gapped.use_gapped:
                --gapped
                --max_gaps $gapped.max_gaps
                --min_aln_len $gapped.min_aln_len
            #end if

            -o stacks_outputs
    ]]></command>

    <inputs>
        <param name="input_col" format="tabular,txt" type="data_collection" collection_type="list" label="Output from previous Stacks pipeline steps (e.g. denovo_map, refmap, ustacks or pstacks)" />

        <param name="g" argument="-g" type="boolean" checked="false" truevalue="-g" falsevalue="" label="Base catalog matching on genomic location, not sequence identity" />

        <param name="n" argument="-n" type="integer" value="1" label="Number of mismatches allowed between sample tags when building the catalog"/>

        <param name="include_multiple" argument="-m" type="boolean" checked="false" truevalue="-m" falsevalue="" label="Include tags in the catalog that match to more than one entry" />

        <conditional name="gapped">
            <param name="use_gapped" argument="--gapped" type="boolean" checked="false" label="Perform gapped alignments between stacks" />
            <when value="false"></when>
            <when value="true">
                <param name="max_gaps" argument="--max_gaps" type="integer" value="2" label="Number of gaps allowed between stacks before merging"/>
                <param name="min_aln_len" argument="--min_aln_len" type="float" value="0.8" min="0.0" max="1.0" label="Minimum length of aligned sequence in a gapped alignment"/>
            </when>
        </conditional>
    </inputs>

    <outputs>
        <data format="tabular" name="catalogtags" label="Catalog assembled loci (tags) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.tags.tsv" />
        <data format="tabular" name="catalogsnps" label="Catalog model calls (snps) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.snps.tsv" />
        <data format="tabular" name="catalogalleles" label="Catalog haplotypes (alleles) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.alleles.tsv" />
    </outputs>

    <tests>
        <test>
            <param name="input_col">
                <collection type="list">
                    <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
                    <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
                    <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
                    <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
                    <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
                    <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
                    <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
                    <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
                    <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
                    <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
                    <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
               </collection>
            </param>

            <!-- catalog -->
            <output name="catalogtags">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogsnps">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogalleles">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="input_col">
                <collection type="list">
                    <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
                    <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
                    <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
                    <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
                    <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
                    <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
                    <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
                    <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
                    <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
                    <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
                    <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
               </collection>
            </param>
            <param name="gapped|use_gapped" value="true" />

            <!-- catalog -->
            <output name="catalogtags">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogsnps">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogalleles">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
        </test>
    </tests>

    <help>
<![CDATA[
.. class:: infomark

**What it does**

A catalog can be built from any set of samples processed by the ustacks or pstacks programs. It will create a set of consensus loci, merging alleles together. In the case of a genetic cross, a catalog would be constructed from the parents of the cross to create a set of all possible alleles expected in the progeny of the cross.

--------

**Input files**

Output from denovo_map, refmap, ustacks or pstacks

**Output files**

- XXX.tags.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.


- XXX.snps.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.


- XXX.alleles.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation" />
</tool>
author	iuc
date	Tue, 14 Jun 2016 14:05:09 -0400
parents
children	6a7c5e218732