view stacks_cstacks.xml @ 11:be3df81c0353 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit 5f2ec13ecca435abaac2b99ba21f1d6497ec7139"
author iuc
date Tue, 22 Mar 2022 23:19:02 +0000
parents 759ba5c7faca
children 40cde06ae34a
line wrap: on
line source

<tool id="stacks_cstacks" name="Stacks: cstacks" version="@WRAPPER_VERSION@.0">
    <description>build a catalogue of loci</description>
    <expand macro="bio_tools"/>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <command><![CDATA[
        #import re

        mkdir stacks_inputs stacks_outputs

        &&

        #set $samples = ""
        #for $input_file in $input_col
            #set $filename = str($input_file.element_identifier)
            #if not filename.endswith('.tsv')
                #set $filename = $filename + ".tsv"
            #end if
            #if not re.search('catalog\.[a-z]+(\.tsv)?$', $filename)
                ln -s '${input_file}' 'stacks_inputs/$filename' &&

                #if $filename.endswith('.tags.tsv')
                    #set samples += " -s 'stacks_inputs/" + $filename[:-9] + "'"
                #end if
            #end if
        #end for

        cstacks

            ## Batch description
            -b 1

            -p \${GALAXY_SLOTS:-1}

            #if $popmap
                -P stacks_inputs -M '$popmap'
            #else
                $samples
                -o stacks_outputs
            #end if

            $g

            -n $n

            $include_multiple

            #if $gapped.use_gapped == "yes"
                --gapped
                --max_gaps $gapped.max_gaps
                --min_aln_len $gapped.min_aln_len
            #end if

            2>&1 | tee cstacks.log

            #if $popmap
                ## When using a popmap, stacks write to the input dir
                && mv stacks_inputs/batch_1.catalog.*.tsv stacks_outputs/
            #end if

            &&

            stacks_summary.py --stacks-prog cstacks --res-dir stacks_outputs --logfile cstacks.log --summary stacks_outputs/summary.html
    ]]></command>

    <inputs>
        <param name="input_col" format="tabular,txt" type="data_collection" collection_type="list" label="Output from previous Stacks pipeline steps (e.g. denovo_map, refmap, ustacks or pstacks)" />

        <param name="popmap" type="data" format="tabular,txt" label="Population map" help="If set, the catalog will be built from samples listed in this file" optional="true" argument="-M" />

        <param name="g" argument="-g" type="boolean" checked="false" truevalue="-g" falsevalue="" label="Base catalog matching on genomic location, not sequence identity" />

        <param name="n" argument="-n" type="integer" value="1" label="Number of mismatches allowed between sample tags when building the catalog"/>

        <param name="include_multiple" argument="-m" type="boolean" checked="false" truevalue="-m" falsevalue="" label="Include tags in the catalog that match to more than one entry" />

        <conditional name="gapped">
            <param name="use_gapped" argument="--gapped" type="select" label="Perform gapped alignments between stacks">
                <option value="no" selected="true">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="no"/>
            <when value="yes">
                <param name="max_gaps" argument="--max_gaps" type="integer" value="2" label="Number of gaps allowed between stacks before merging"/>
                <param name="min_aln_len" argument="--min_aln_len" type="float" value="0.8" min="0.0" max="1.0" label="Minimum length of aligned sequence in a gapped alignment"/>
            </when>
        </conditional>
    </inputs>

    <outputs>
        <data format="txt" name="output_log" label="cstacks.log with ${tool.name} on ${on_string}" from_work_dir="cstacks.log" />

        <data format="html" name="output_summary" label="Summary from ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/summary.html" />

        <data format="tabular" name="catalogtags" label="Catalog assembled loci (tags) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.tags.tsv" />
        <data format="tabular" name="catalogsnps" label="Catalog model calls (snps) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.snps.tsv" />
        <data format="tabular" name="catalogalleles" label="Catalog haplotypes (alleles) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.alleles.tsv" />
    </outputs>

    <tests>
        <test>
            <param name="input_col">
                <collection type="list">
                    <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
                    <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
                    <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
                    <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
                    <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
                    <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
                    <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
                    <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
                    <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
                    <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
                    <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
               </collection>
            </param>

            <output name="output_log">
                <assert_contents>
                    <has_text text="done." />
                </assert_contents>
            </output>
            <output name="output_summary">
                <assert_contents>
                    <has_text text="Stacks Statistics" />
                </assert_contents>
            </output>

            <!-- catalog -->
            <output name="catalogtags">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogsnps">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogalleles">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="input_col">
                <collection type="list">
                    <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
                    <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
                    <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
                    <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
                    <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
                    <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
                    <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
                    <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
                    <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
                    <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
                    <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
               </collection>
            </param>
            <param name="gapped|use_gapped" value="yes" />

            <output name="output_log">
                <assert_contents>
                    <has_text text="done." />
                </assert_contents>
            </output>
            <output name="output_summary">
                <assert_contents>
                    <has_text text="Stacks Statistics" />
                </assert_contents>
            </output>

            <!-- catalog -->
            <output name="catalogtags">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogsnps">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogalleles">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="input_col">
                <collection type="list">
                    <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
                    <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
                    <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
                    <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
                    <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
                    <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
                    <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
                    <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
                    <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
                    <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
                    <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
               </collection>
            </param>

            <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />

            <output name="output_log">
                <assert_contents>
                    <has_text text="done." />
                </assert_contents>
            </output>
            <output name="output_summary">
                <assert_contents>
                    <has_text text="Stacks Statistics" />
                </assert_contents>
            </output>

            <!-- catalog -->
            <output name="catalogtags">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogsnps">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
            <output name="catalogalleles">
                <assert_contents>
                    <has_text text="catalog generated on" />
                </assert_contents>
            </output>
        </test>
    </tests>

    <help>
<![CDATA[
.. class:: infomark

**What it does**

A catalog can be built from any set of samples processed by the ustacks or pstacks programs. It will create a set of consensus loci, merging alleles together. In the case of a genetic cross, a catalog would be constructed from the parents of the cross to create a set of all possible alleles expected in the progeny of the cross.

--------

**Input files**

Output from denovo_map, refmap, ustacks or pstacks

**Output files**

- XXX.tags.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.


- XXX.snps.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.


- XXX.alleles.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation" />
</tool>