Mercurial > repos > iuc > stacks_pstacks

<tool id="stacks_pstacks" name="Stacks: pstacks" version="@WRAPPER_VERSION@.0">
    <description>find stacks from short reads mapped to a reference genome</description>
    <expand macro="bio_tools"/>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <command><![CDATA[

        @CLEAN_EXT@

        mkdir stacks_inputs stacks_outputs

        &&

        #if $sample.is_of_type('sam')
            #set $data_path = "stacks_inputs/" + $clean_ext($sample.element_identifier) + ".sam"
            #set inputype = "sam"
        #else
            #set $data_path = "stacks_inputs/" + $clean_ext($sample.element_identifier) + ".bam"
            #set inputype = "bam"
        #end if

        ln -s '${sample}' '${data_path}'

        &&

        pstacks

            -p \${GALAXY_SLOTS:-1}

            -f $data_path

            -t $inputype

            ## Batch description
            -i $sample_id

            -m $m

            --max_clipped $max_clipped
            --min_mapq $min_mapq
            $keep_sec_alns

            ## snp_model
            #if str( $snp_options.select_model.model_type) == "bounded"
                --model_type bounded
                --bound_low $snp_options.select_model.bound_low
                --bound_high $snp_options.select_model.bound_high
                --alpha $snp_options.select_model.alpha
            #else if str( $snp_options.select_model.model_type) == "snp"
                --model_type snp
                --alpha $snp_options.select_model.alpha
            #else
                --model_type fixed
                --bc_err_freq $bc_err_freq
            #end if

            -o stacks_outputs

             2>&1 | tee pstacks.log

            ## If input is in bam format, stacks will output gzipped files (no option to control this)
            && if ls stacks_outputs/*.gz > /dev/null 2>&1; then gunzip stacks_outputs/*.gz; fi

            &&

            stacks_summary.py --stacks-prog pstacks --res-dir stacks_outputs --logfile pstacks.log --summary stacks_outputs/summary.html
    ]]></command>

    <inputs>
        <param name="sample" argument="-f" format="sam,bam" type="data" label="Input short reads from an individual" />

        <param name="sample_id" argument="-i" type="integer" value="" label="Give a unique numeric ID to this sample"/>

        <param name="m" argument="-m" type="integer" value="1" label="Minimum depth of coverage required to create a stack"/>

        <param name="max_clipped" argument="--max_clipped" type="float" value="0.15" min="0.0" max="1.0" label="Alignments with more than this fraction of soft-clipped bases are discarded"/>

        <param name="min_mapq" argument="--min_mapq" type="integer" value="10" label="Minimum required mapping quality"/>

        <param name="keep_sec_alns" argument="--keep_sec_alns" type="boolean" checked="false" truevalue="--keep_sec_alns" falsevalue="" label="Keep secondary alignments" />

        <!-- SNP Model options -->
        <section name="snp_options" title="SNP Model Options (pstacks options)" expanded="False">
            <expand macro="snp_options_full"/>
        </section>
    </inputs>

    <outputs>
        <data format="txt" name="output_log" label="pstacks.log with ${tool.name} on ${on_string}" from_work_dir="pstacks.log" />

        <data format="html" name="output_summary" label="Summary from ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/summary.html" />

        <collection name="tabs" type="list" label="Stacks from ${on_string}">
            <discover_datasets pattern="(?P&lt;name&gt;.+\.tags)\.tsv$" ext="tabular" directory="stacks_outputs" />
            <discover_datasets pattern="(?P&lt;name&gt;.+\.snps)\.tsv$" ext="tabular" directory="stacks_outputs" />
            <discover_datasets pattern="(?P&lt;name&gt;.+\.alleles)\.tsv$" ext="tabular" directory="stacks_outputs" />
            <discover_datasets pattern="(?P&lt;name&gt;.+\.models)\.tsv$" ext="tabular" directory="stacks_outputs" />
        </collection>
    </outputs>

    <tests>
        <test>
            <param name="sample" value="refmap/PopA_01.bam" ftype="bam" />
            <param name="sample_id" value="4" />

            <output name="output_log">
                <assert_contents>
                    <has_text text="done." />
                </assert_contents>
            </output>
            <output name="output_summary">
                <assert_contents>
                    <has_text text="Stacks Statistics" />
                </assert_contents>
            </output>

            <output_collection name="tabs">
                <element name="PopA_01.tags">
                    <assert_contents>
                        <has_text text="generated on " />
                    </assert_contents>
                </element>
                <element name="PopA_01.snps">
                    <assert_contents>
                        <has_text text="generated on " />
                    </assert_contents>
                </element>
                <element name="PopA_01.alleles">
                    <assert_contents>
                        <has_text text="generated on " />
                    </assert_contents>
                </element>
                <element name="PopA_01.models">
                    <assert_contents>
                        <has_text text="generated on " />
                    </assert_contents>
                </element>
            </output_collection>
        </test>
    </tests>

    <help>
<![CDATA[
.. class:: infomark

**What it does**

Similar to ustacks, except this program will extract stacks that have been aligned to a reference genome by a program such as Bowtie and identify SNPs. These stacks can then be processed with cstacks or sstacks.

--------

**Input files**

- SAM, BAM (e.g. from bowtie or bwa)

**Output files**

- XXX.tags.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.


- XXX.snps.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.


- XXX.alleles.tsv file:

See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_

@STACKS_INFOS@
]]>
    </help>
    <expand macro="citation" />
</tool>
author	iuc
date	Tue, 22 Mar 2022 23:21:58 +0000
parents	f42f9ae6d109
children	a9ff3d0184b8