view stacks_rxstacks.xml @ 0:a1f5a4be394a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit f3a59c91c231cc1582479109e776d05602b7f24d-dirty
author iuc
date Tue, 14 Jun 2016 14:03:38 -0400
parents
children 55814b6afe69
line wrap: on
line source

<tool id="stacks_rxstacks" name="Stacks: rxstacks" version="@WRAPPER_VERSION@.0">
    <description>make corrections to genotype and haplotype calls</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <command><![CDATA[

        mkdir stacks_inputs stacks_outputs

        &&

        #for $input_file in $input_col:
            #set $ext = ""
            #if not str($input_file.name).endswith('.tsv'):
                #set $ext = ".tsv"
            #end if
            ln -s "${input_file}" "stacks_inputs/${input_file.name}${ext}" &&
        #end for

        rxstacks

            -t \${GALAXY_SLOTS:-1}

            -P stacks_inputs

            ## Batch description
            -b 1

            #if str($options_filtering.lnl):
                --lnl_filter
                --lnl_lim $options_filtering.lnl
            #end if

            $options_filtering.lnl_dist

            #if str($options_filtering.conf):
                --conf_filter
                --conf_lim $options_filtering.conf
            #end if

            #if $options_filtering.prune.prune_haplo:
                --prune_haplo
                #if str($options_filtering.prune.max_haplo):
                    --max_haplo $options_filtering.prune.max_haplo
                #end if
            #end if

            ## snp_model
            #if str( $snp_options.select_model.model_type) == "bounded":
                --model_type bounded
                --bound_low $snp_options.select_model.bound_low
                --bound_high $snp_options.select_model.bound_high
                --alpha $snp_options.select_model.alpha
            #else if str( $snp_options.select_model.model_type) == "snp":
                --model_type snp
                --alpha $snp_options.select_model.alpha
            #else
                --model_type fixed
            #end if

            -o stacks_outputs
    ]]></command>

    <inputs>
        <param name="input_col" format="tabular,txt" type="data_collection" collection_type="list" label="Output from previous Stacks pipeline steps (e.g. denovo_map or refmap)" />

        <section name="options_filtering" title="Data filtering options" expanded="true">
            <param name="lnl" type="float" value="" optional="true" argument="--lnl_lim" label="Filter loci with log likelihood values below this threshold" />
            <param name="lnl_dist" argument="--lnl_dist" truevalue="--lnl_dist" falsevalue="" type="boolean" checked="false" label="Print distribution of mean log likelihoods for catalog loci." />

            <param name="conf" type="float" value="0.75" optional="true" argument="--conf_lim" min="0.0" max="1.0" label="Proportion of loci in population that must be confounded relative to the catalog locus (between 0.0 and 1.0)."/>

            <conditional name="prune">
                <param name="prune_haplo" argument="--prune_haplo" type="boolean" checked="false" label="Prune out non-biological haplotypes unlikely to occur in the population." />
                <when value="false"></when>
                <when value="true">
                    <param name="max_haplo" argument="--max_haplo" type="integer" value="" optional="true" label="Only consider haplotypes for pruning if they occur in fewer than this value."/>
                </when>
            </conditional>
        </section>

        <!-- SNP Model options -->
        <section name="snp_options" title="SNP Model Options (ustacks options)" expanded="False">
            <expand macro="snp_options"/>
        </section>
    </inputs>

    <outputs>
        <data format="txt" name="output_log" label="rxstacks.log with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.rxstacks.log" />

        <data format="txt" name="output_lnl_dist" label="Distribution of mean log likelihoods with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.rxstacks_lnls.tsv">
            <filter>options_filtering['lnl_dist']</filter>
        </data>

        <collection name="tags" type="list" label="Assembled loci filtered from ${on_string}">
            <discover_datasets pattern="(?P&lt;name&gt;.+\.tags)\.tsv" ext="tabular" directory="stacks_outputs" />
        </collection>

        <collection name="snps" type="list" label="Model calls from each locus filtered from ${on_string}">
            <discover_datasets pattern="(?P&lt;name&gt;.+\.snps)\.tsv" ext="tabular" directory="stacks_outputs" />
        </collection>

        <collection name="alleles" type="list" label="Haplotypes/alleles recorded from each locus filtered from ${on_string}">
            <discover_datasets pattern="(?P&lt;name&gt;.+\.alleles)\.tsv" ext="tabular" directory="stacks_outputs" />
        </collection>

        <collection name="all_output" type="list" label="Full output from rxstacks on ${on_string}">
            <discover_datasets pattern="(?P&lt;name&gt;.+\.(tags|snps|alleles))\.tsv" ext="tabular" directory="stacks_outputs" />
        </collection>
    </outputs>

    <tests>
        <test>
            <param name="input_col">
                <collection type="list">
                    <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
                    <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
                    <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
                    <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
                    <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
                    <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
                    <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
                    <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
                    <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
                    <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
                    <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
               </collection>
            </param>
            <param name="options_filtering|lnl" value="-10.0" />
            <param name="options_filtering|lnl_dist" value="true" />
            <param name="options_filtering|prune_haplo" value="true" />
            <param name="options_filtering|max_haplo" value="1" />

            <output name="output_log">
                <assert_contents>
                    <has_text text="rxstacks executed" />
                </assert_contents>
            </output>

            <output name="output_lnl_dist">
                <assert_contents>
                    <has_text text="Median" />
                </assert_contents>
            </output>

            <!-- samples -->
            <output_collection name="tags">
                <element name="PopA_01.tags">
                    <assert_contents>
                        <has_text text="lane1_fakedata7_0" />
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="snps">
                <element name="PopA_01.snps">
                    <assert_contents>
                        <has_text text="24.950" />
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="alleles">
                <element name="PopA_01.alleles">
                    <assert_contents>
                        <has_text text="AC" />
                    </assert_contents>
                </element>
            </output_collection>
        </test>
    </tests>

    <help>
<![CDATA[
.. class:: infomark

**What it does**

This program will run each of the Stacks components: first, running ustacks on each of the samples specified, building loci and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci that were marked as 'parents' or 'samples' on the command line, and finally, sstacks will be executed to match each sample against the catalog. A bit more detail on this process can be found in the FAQ. The denovo_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching, the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering.

--------

**Input files**

Output from denovo_map or ref_map

**Output files**

- XXX.tags.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.


- XXX.snps.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.


- XXX.alleles.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

- XXX.matches.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation" />
</tool>