Mercurial > repos > iuc > stacks2_populations

<?xml version="1.0"?>
<macros>
    <xml name="requirements">
        <requirements>
            <requirement type="package" version="@TOOL_VERSION@">stacks</requirement>
            <requirement type="package" version="3.7">python</requirement>
            <requirement type="package" version="4.6.0">findutils</requirement>
            <yield/>
        </requirements>
    </xml>

    <token name="@TOOL_VERSION@">2.55</token>
    <token name="@VERSION_SUFFIX@">4</token>
    <token name="@PROFILE@">20.05</token>

    <xml name="citation">
        <citations>
            <citation type="doi">10.1111/mec.12354</citation>
            <citation type="doi">10.1111/mec.12330</citation>
            <citation type="doi">10.1534/g3.111.000240</citation>
            <citation type="doi">10.1534/genetics.111.127324</citation>
            <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>
            <citation type="doi">10.1073/pnas.1006538107</citation>
        </citations>
    </xml>

    <xml name="version_cmd">
        <version_command><![CDATA[
        process_radtags -h |& grep process_radtags  | head -n 1 | cut -d" " -f 2
        ]]>
        </version_command>
    </xml>

    <token name="@STACKS_INFOS@">
<![CDATA[
--------

**Created by:**

Stacks was developed by Julian Catchen with contributions from Angel Amores, Paul Hohenlohe, and Bill Cresko

**Project links:**

`Stacks website <http://catchenlab.life.illinois.edu/stacks/>`_

`Stacks manual <http://catchenlab.life.illinois.edu/stacks/manual/>`_

`Stacks google group <http://groups.google.com/group/stacks-users>`_
]]></token>

    <!-- enzyme list for procrad and denovo -->
    <xml name="enzymes">
        <option value="">Unspecified</option>
        <option value="aciI">aciI</option>
        <option value="ageI">ageI</option>
        <option value="aluI">aluI</option>
        <option value="apaLI">apaLI</option>
        <option value="apeKI">apeKI</option>
        <option value="apoI">apoI</option>
        <option value="aseI">aseI</option>
        <option value="bamHI">bamHI</option>
        <option value="bbvCI">bbvCI</option>
        <option value="bfaI">bfaI</option>
        <option value="bfuCI">bfuCI</option>
        <option value="bgIII">bgIII</option>
        <option value="bsaHI">bsaHI</option>
        <option value="bspDI">bspDI</option>
        <option value="bstYI">bstYI</option>
        <option value="btgI">btgI</option>
        <option value="cac8I">cac8I</option>
        <option value="claI">claI</option>
        <option value="csp6I">csp6I</option>
        <option value="ddeI">ddeI</option>
        <option value="dpnII">dpnII</option>
        <option value="eaeI">eaeI</option>
        <option value="ecoRI">ecoRI</option>
        <option value="ecoRV">ecoRV</option>
        <option value="ecoT22I">ecoT22I</option>
        <option value="haeIII">haeIII</option>
        <option value="hindIII">hindIII</option>
        <option value="hinP1I">hinP1I</option>
        <option value="hpaII">hpaII</option>
        <option value="hpyCH4IV">hpyCH4IV</option>
        <option value="kpnI">kpnI</option>
        <option value="mluCI">mluCI</option>
        <option value="mseI">mseI</option>
        <option value="mslI">mslI</option>
        <option value="mspI">mspI</option>
        <option value="ncoI">ncoI</option>
        <option value="ndeI">ndeI</option>
        <option value="nheI">nheI</option>
        <option value="ngoMIV">ngoMIV</option>
        <option value="nlaIII">nlaIII</option>
        <option value="notI">notI</option>
        <option value="nsiI">nsiI</option>
        <option value="nspI">nspI</option>
        <option value="pacI">pacI</option>
        <option value="pspXI">pspXI</option>
        <option value="pstI">pstI</option>
        <option value="rsaI">rsaI</option>
        <option value="sacI">sacI</option>
        <option value="sau3AI">sau3AI</option>
        <option value="sbfI">sbfI</option>
        <option value="sexAI">sexAI</option>
        <option value="sgrAI">sgrAI</option>
        <option value="speI">speI</option>
        <option value="sphI">sphI</option>
        <option value="taqI">taqI</option>
        <option value="xbaI">xbaI</option>
        <option value="xhoI">xhoI</option>
    </xml>

    <!-- log file handling -->
    <token name="@TEE_APPEND_LOG@"><![CDATA[
        #if $output_log
            2> '$output_log'
        #end if
    ]]></token>
    <xml name="in_log">
        <param name="add_log" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Add log output as dataset"/>
    </xml>
    <xml name="out_log">
        <data format="txt" name="output_log" label="${tool.name} on ${on_string} log file">
            <filter>add_log</filter>
        </data>
    </xml>

    <!-- inputs from previous pipeline steps -->
    <xml name="input_stacks_macro">
        <param name="input_stacks" format="tabular,txt" type="data_collection" collection_type="list" label="Loci and polymorphism" help="output from previous Stacks pipeline steps e.g. denovo_map, refmap or ustacks"/>
    </xml>
    <xml name="input_cat_macro">
        <param name="input_cat" format="tabular,txt" type="data_collection" collection_type="list" label="Catalog of loci" help="output from a previous Stacks pipeline steps e.g. denovo_map, refmap or cstacks"/>
    </xml>
    <xml name="input_matches_macro">
        <param name="input_matches" format="tabular,txt" type="data_collection" collection_type="list" label="Matches to the catalog" help="output from previous Stacks pipeline steps e.g. denovo_map, refmap or sstacks"/>
    </xml>
    <xml name="bam_input_macro">
        <param name="input_bam" format="bam" type="data" multiple="true" optional="false" label="Aligned data" help="either the matches to the catalog (bam), i.e. tsv2bam, or reads aligned to a reference"/>
    </xml>
    <xml name="input_aln_macro">
        <param name="input_aln" format="vcf,fasta.gz" type="data_collection" collection_type="list" label="Assembled contigs and variant sites" help="output from previous Stacks pipeline steps (e.g. gstacks, denovo_map, or refmap)" argument="-P"/>
    </xml>

    <!-- code for creating links to the data sets from previous pipeline steps
         - stacks (i.e ustacks outputs POP.tags.tsv, POP.alleles.tsv, POP.snps.tsv)
             also stores sample names in list (samples)
         - cat (cstacks catalog.tags.tsv, catalog.alleles.tsv, catalog.snps.tsv)
         - matches (sstacks POP.matches.tsv) -->
    <token name="@LINK_STACKS_INPUT@"><![CDATA[
    #set $samples = []
    #for $input_file in $input_stacks
        #set $filename = str($input_file.element_identifier)
        #if not filename.endswith('.tsv')
            #set $filename = $filename + ".tsv"
        #end if
        #if re.search('^(?!catalog).+\.(tags|alleles|snps)\.tsv$', $filename)
            ln -s '${input_file}' 'stacks_inputs/$filename' &&
            #if $filename.endswith('.tags.tsv')
                $samples.append($filename[:-9])
            #end if
        #end if
    #end for
    ]]>
    </token>
    <token name="@LINK_CAT_INPUT@"><![CDATA[
    #for $input_file in $input_cat
        #set $filename = str($input_file.element_identifier)
        #if not filename.endswith('.tsv')
            #set $filename = $filename + ".tsv"
        #end if
        #if re.search('^catalog\.(tags|alleles|snps)\.tsv$', $filename)
            ln -s '${input_file}' 'stacks_inputs/$filename' &&
        #end if
    #end for
    ]]></token>
    <token name="@LINK_MATCHES_INPUT@"><![CDATA[
    #for $input_file in $input_matches
        #set $filename = str($input_file.element_identifier)
        #if not filename.endswith('.tsv')
            #set $filename = $filename + ".tsv"
        #end if
        #if re.search('matches.tsv$', $filename)
            ln -s '${input_file}' 'stacks_inputs/$filename' &&
        #end if
    #end for
    ]]></token>

    <!-- fastq input for process_radtags/shortreads (non-batch) and clone/kmerfilter (batch)
         for batch processing chose multiple=false and paired
         otherwise multiple=true and listtype=list:paired -->
    <xml name="fastq_input_bc" token_multiple="false" token_listtype="paired">
        <conditional name="input_type">
            <param name="input_type_select" type="select" label="Single-end or paired-end reads">
                <option value="single" selected="True">Single-end files</option>
                <option value="paired">Paired-end files</option>
            </param>
            <when value="single">
                <param name="fqinputs" argument="-f" type="data" format="fastqsanger,fastqsanger.gz" multiple="@MULTIPLE@" label="Singles-end reads"/>
                <param name="barcode_encoding" type="select" label="Barcode location">
                    <expand macro="barcode_encoding_single" type="Barcode"/>
                </param>
            </when>
            <when value="paired">
                <param name="fqinputs" type="data_collection" collection_type="@LISTTYPE@" label="Paired-end reads" format="fastqsanger,fastqsanger.gz"/>
                <param name="barcode_encoding" type="select" label="Barcode location">
                    <expand macro="barcode_encoding_pair" type="Barcode"/>
                </param>
            </when>
        </conditional>
        <yield/>
    </xml>

    <xml name="fastq_input_bc_file" token_multiple="false" token_listtype="paired">
        <expand macro="fastq_input_bc" multiple="@MULTIPLE@" listtype="@LISTTYPE@">
            <param name="barcode" argument="-b" type="data" format="tabular,txt" label="Barcode file"/>
        </expand>
    </xml>


    <xml name="barcode_selector">
        <param name="barcode_encoding" type="select" label="Barcode location">
            <option value="">Disabled</option>
            <yield/>
        </param>
        <param name="barcode" argument="-b" type="data" format="tabular,txt" optional="true" label="Barcode file" help="The barcode file is only used if barcode_input"/>
    </xml>


    <!-- fastq input (used in denovomap, tsv2bam, ustacks)
         - fastq_optional: makes fastq input optional (true/false)
         - se_option: wording for "single end" option (for tsv2bam this is the
              reverse reads for the others its the forward reads)
         - help: help text -->
    <xml name="fastq_input" token_fastq_optional="false" token_se_option="single end or forward reads" token_help="" token_multiple="false" token_listtype="paired">
        <conditional name="input_type">
            <param name="input_type_select" type="select" label="Short read data from individuals" help="@HELP@">
                <option value="single" selected="true">@SE_OPTION@</option>
                <option value="paired">(paired) dataset list</option>
            </param>
            <when value="single">
                <param name="fqinputs" argument="-f" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" type="data" label="Reads" multiple="@MULTIPLE@" optional="@FASTQ_OPTIONAL@"/>
            </when>
            <when value="paired">
                <param name="fqinputs" argument="-f" type="data_collection" collection_type="@LISTTYPE@" format="fastqsanger,fastqsanger.gz,fasta,fasta.gz" label="List for forward reads or read pairs" optional="@FASTQ_OPTIONAL@"/>
            </when>
        </conditional>
    </xml>

    <!-- helper functions for linking fastq data sets -->
    <token name="@FASTQ_INPUT_FUNCTIONS@"><![CDATA[
    #from os.path import splitext
    #import re

    #def clean_ext($identifier)
        #while $identifier.endswith(('.1', '.2', '.fa', '.fq', '.fasta', '.fastq', '.gz', '.gzip', '.sam', '.bam'))
            #set $identifier = splitext($identifier)[0]
        #end while
$identifier#slurp
    #end def

    #def fastq_input_foo( $sample, $read_direction="", $infix="" )
        #set $name = $clean_ext($sample.element_identifier)
        #if $sample.is_collection:
            #set $cur_sample=$sample[$read_direction]
        #else:
            #set $cur_sample=$sample
        #end if

        #if $cur_sample.is_of_type('fastqsanger')
            #set $ext =  "fastq"
            #set $inputype = "fastq"
        #else if $cur_sample.is_of_type('fastqsanger.gz')
            #set $ext = "fastq.gz"
            #set $inputype = "gzfastq"
        #else if $cur_sample.is_of_type('fasta')
            #set $ext = "fasta"
            #set $inputype = "fasta"
        #else if $cur_sample.is_of_type('fasta.gz')
            #set $ext = "fasta.gz"
            #set $inputype = "gzfasta"
        #else
            #set $inputype = "UNKNOWN"
        #end if
        #set $data_path = "stacks_inputs/"+$name+$infix+"."+$ext
        #set $link_cmd = "ln -s '%s' '%s' &&" % ($cur_sample, $data_path)
        #return ($link_cmd, $data_path, $name, $inputype)
    #end def

    ## fastq_input_batch determine link command, access path(s), and input type
    ## for batch tools
    ##
    ## inputs
    ## - sample data set / pair
    ## - type "single" / "paired"
    ## return (link_command, fwd_path, rev_path, inputype)
    ## - link_command bash command(s) to link the data sets
    ## - fwd_path file name of the link to the forward data set
    ## - rev_path file name of the link to the forward data set (if type=paired)
    ## - inputype input type as used in stacks ([gz]fast(a|q))
    #def fastq_input_batch($sample, $type)
        #if $type == "single"
            #set ($link_cmd, $path, $name, $inputype) = $fastq_input_foo($sample, "", "")
            #return ($link_cmd, $path, "", $inputype)
        #else:
            #set ($fwd_link_cmd, $fwd_path, $name, $inputype) = $fastq_input_foo($sample, "forward", ".1")
            #set ($rev_link_cmd, $rev_path, $name, $inputype) = $fastq_input_foo($sample, "reverse", ".2")
            #return ( $fwd_link_cmd+$rev_link_cmd, $fwd_path, $rev_path, $inputype)
        #end if
    #end def

    ## fastq_input_nonbatch determine link command, access path(s), and input type
    ## for non-batch tools (procrad, shortreads, denovomap the former need R[12]_
    ## and the latter needs .[12])
    ##
    ## inputs
    ## - samples list of data set / pair
    ## - type "single" / "paired"
    ## - infix_pattern pattern for the infix of the files (needs to contain %d which is replaced by 1/2)
    ## return (link_command, inputype)
    ## - link_command bash command(s) to link the data sets
    ## - inputype input type as used in stacks ([gz]fast(a|q))
    #def fastq_input_nonbatch( $samples, $type, $infix_pattern )
        #set $link_command = ""
        #for $sample in $samples
            #if $type == "single"
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "", "")
                #set link_command += lc
            #else:
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "forward", $infix_pattern % (1))
                #set link_command += lc
                #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "reverse", $infix_pattern % (2))
                #set link_command += lc
            #end if
        #end for
        #return ($link_command, $inputype)
    #end def
    ]]></token>

    <!-- macro and token for BAM input, for each bam file w identifier NAME
       - creates a link NAME.bam <- bam_inputs/NAME.INFIX.bam
       - appends -B bam_inputs/NAME.INFIX.bam to variable bamlist

         INFIX is set to"" per default, can be set with optional variable $infix
         only needed for gstacks in denovo mode which needs .matches.bam -->
    <token name="@BAM_INPUT@"><![CDATA[
    #set $bamlist = ""
    #for $bam in $input_bam:
        #if $bam.is_of_type('bam')
            #set $filename = $clean_ext($bam.element_identifier)+".bam"
            ln -s '$bam' bam_inputs/$filename &&
            #set bamlist += " -B 'bam_inputs/"+$filename+"'"
        #end if
    #end for
    ]]></token>

    <token name="@EXTRACT_VCF@"><![CDATA[
    ## the catalog.calls output is a gzip-ed vcf extract it
    ## to make it usable in Galaxy (with the downside that we
    ## need to gzip it again for downstream calls like populations)
    && gunzip -c stacks_outputs/catalog.calls > stacks_outputs/catalog.calls.vcf
    ]]></token>


    <!-- tokens and macros for gapped alignment options
         the _onoff macro gives an empty conditional (which is not so nice
         but allows to be used also in the full macro) -->
    <token name="@GAP_OPTIONS@"><![CDATA[
    #if $gapped.use_gapped == "yes"
        --max_gaps $gapped.max_gaps
        --min_aln_len $gapped.min_aln_len
    #else
        --disable-gapped
    #end if
    ]]></token>
    <token name="@GAP_OPTIONS_ONOFF@"><![CDATA[
    #if $gapped.use_gapped != "yes"
        --disable-gapped
    #end if
    ]]></token>
    <xml name="gap_options">
        <expand macro="gap_options_onoff">
            <param argument="--max_gaps" type="float" value="2.0" label="Number of gaps allowed between stacks before merging"/>
            <param argument="--min_aln_len" type="float" value="0.8" min="0.0" max="1.0" label="Minimum length of aligned sequence in a gapped alignment"/>
        </expand>
    </xml>
    <xml name="gap_options_onoff">
        <conditional name="gapped">
            <param name="use_gapped" argument="--disable-gapped" type="select" label="Perform gapped alignments between stacks">
                <option value="no">No</option>
                <option value="yes" selected="true">Yes</option>
            </param>
            <when value="no"/>
            <when value="yes">
                <yield/>
            </when>
        </conditional>
    </xml>


    <!-- ustacks outputs collection containing SAMPLE.tags.tsv, SAMPLE.snps.tsv, SAMPLE.alleles.tsv (SAMPLE!=catalog) -->
    <!-- TODO tags, snps, and alleles could go to sub collections; same for other tools -->
    <xml name="ustacks_outputs_macro" token_tooladd="">
        <collection name="tabs" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Loci and polymorphism">
            <discover_datasets pattern="(?P&lt;name&gt;(?!catalog).+\.tags)\.tsv$" ext="tabular" directory="stacks_outputs"/>
            <discover_datasets pattern="(?P&lt;name&gt;(?!catalog).+\.snps)\.tsv$" ext="tabular" directory="stacks_outputs"/>
            <discover_datasets pattern="(?P&lt;name&gt;(?!catalog).+\.alleles)\.tsv$" ext="tabular" directory="stacks_outputs"/>
        </collection>
    </xml>
    <!-- cstacks outputs collection containing catalog.tags.tsv, catalog.snps.tsv, catalog.alleles.tsv -->
    <xml name="cstacks_outputs_macro" token_tooladd="">
        <collection name="catalog" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Catalog of loci">
            <discover_datasets pattern="(?P&lt;name&gt;catalog\.(tags|snps|alleles))\.tsv$" ext="tabular" directory="stacks_outputs"/>
        </collection>
    </xml>
    <!-- sstacks outputs collection containing SAMPLE.matches.tsv -->
    <xml name="sstacks_outputs_macro" token_tooladd="">
        <collection name="matches" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Matches to the catalog">
            <discover_datasets pattern="(?P&lt;name&gt;.+\.matches)\.tsv$" ext="tabular" directory="stacks_outputs"/>
        </collection>
    </xml>
    <!-- tsv2bam outputs collection containing SAMPLE.matches.bam -->
    <xml name="tsv2bam_outputs_macro" token_tooladd="">
        <collection name="bams" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Matches to the catalog (bam)">
            <discover_datasets pattern="(?P&lt;name&gt;.+\.matches)\.bam$" ext="bam" directory="stacks_outputs"/>
        </collection>
    </xml>
    <!-- gstacks outputs collection containing catalog.calls.vcf and catalog.fa.gz
         + optional output alignments.bam (if no popmap is given) and POP.alns.bam otherwise-->
    <xml name="gstacks_outputs_full_macro" token_tooladd="">
        <expand macro="gstacks_outputs_macro"/>
        <data format="txt" name="distribs" label="${tool.name} on ${on_string} log.distribs" from_work_dir="stacks_outputs/gstacks.log.distribs">
            <filter>add_log_distribs</filter>
        </data>
        <collection name="gstacks_alns_out" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Read alignments">
            <discover_datasets pattern="(?P&lt;name&gt;.*).alns.bam$" format="bam" directory="stacks_outputs"/>
            <filter>mode_cond['mode_select'] == 'denovo' and mode_cond['advanced_cond']['advanced_select'] == "yes" and mode_cond['advanced_cond']['write_alignments'] and popmap!=None</filter>
        </collection>
        <data name="gstacks_aln_out" format="bam" label="${tool.name} @TOOLADD@ on ${on_string} Read alignment" from_work_dir="stacks_outputs/alignments.bam">
            <filter>mode_cond['mode_select'] == 'denovo' and mode_cond['advanced_cond']['advanced_select'] == "yes" and mode_cond['advanced_cond']['write_alignments'] and popmap==None</filter>
        </data>
    </xml>
    <xml name="gstacks_outputs_macro" token_tooladd="">
        <collection name="gstacks_out" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Assembled contigs and variant sites">
            <discover_datasets pattern="(?P&lt;name&gt;catalog\.calls\.vcf)$" ext="vcf" directory="stacks_outputs"/>
            <discover_datasets pattern="(?P&lt;name&gt;catalog\.fa\.gz)$" ext="fasta.gz" directory="stacks_outputs"/>
        </collection>
    </xml>

    <!-- default output of populations -->
    <xml name="populations_output_light" token_tooladd="">
        <data format="tabular" name="out_haplotypes" label="${tool.name} @TOOLADD@ on ${on_string} Raw Genotypes/Haplotypes" from_work_dir="stacks_outputs/populations.haplotypes.tsv"/>
        <data format="tabular" name="out_hapstats" label="${tool.name} @TOOLADD@ on ${on_string} Population-level haplotype summary statistics" from_work_dir="stacks_outputs/populations.hapstats.tsv"/>
        <data format="txt" name="out_populations_log_distribs" label="${tool.name} @TOOLADD@ on ${on_string} Populations log distributions" from_work_dir="stacks_outputs/populations.log.distribs"/>
        <data format="tabular" name="out_sumstats_sum" label="${tool.name} @TOOLADD@ on ${on_string} Summary of Population-level summary statistics" from_work_dir="stacks_outputs/populations.sumstats_summary.tsv"/>
        <data format="tabular" name="out_sumstats" label="${tool.name} @TOOLADD@ on ${on_string} Population-level summary statistics" from_work_dir="stacks_outputs/populations.sumstats.tsv"/>
    </xml>

    <xml name="populations_output_full">
        <expand macro="populations_output_light"/>

        <data format="txt" name="out_sql" label="${tool.name} on ${on_string} Genotyping markers" from_work_dir="stacks_outputs/populations.sql.tsv">
            <filter>genetic_map_options['map_type'] and genetic_map_options['map_format']</filter>
        </data>

        <!-- log_fst_comp populations.fst_summary.tsv populations.phistats_summary.tsv populations.phistats.tsv-->
        <data format="tabular" name="out_phistats" label="${tool.name} on ${on_string} Phi_st statistics" from_work_dir="stacks_outputs/populations.phistats.tsv">
            <filter>fstats_conditional['fstats']=='yes' or kernel_smoothing['options_kernel']['kernel'] in ['--k', '--smooth-fstats']</filter>
        </data>
        <data format="tabular" name="out_phistats_sum" label="${tool.name} on ${on_string} Summary of Phi_st statistics" from_work_dir="stacks_outputs/populations.phistats_summary.tsv">
            <filter>fstats_conditional['fstats']=='yes' or kernel_smoothing['options_kernel']['kernel'] in ['--k', '--smooth-fstats']</filter>
        </data>
        <data format="tabular" name="out_fststats_sum" label="${tool.name} on ${on_string} Summary of Fst statistics" from_work_dir="stacks_outputs/populations.fst_summary.tsv">
            <filter>fstats_conditional['fstats']=='yes' or kernel_smoothing['options_kernel']['kernel'] in ['--k', '--smooth-fstats']</filter>
        </data>
        <collection type="list" name="out_fstats_popcompare" label="${tool.name} on ${on_string} Fst statistics between populations">
            <discover_datasets pattern="populations\.fst_(?P&lt;designation&gt;[\w]+-[\w]+)\.tsv" format="tabular" directory="stacks_outputs"/>
            <filter>fstats_conditional['fstats']=='yes' or kernel_smoothing['options_kernel']['kernel'] in ['--k', '--smooth-fstats']</filter>
        </collection>

        <!-- fasta_loci populations.loci.fa
             fasta_samples populations.samples.fa
             fasta_samples_raw populations.samples-raw.fa-->
        <data format="tabular" name="out_fasta_strict" label="${tool.name} on ${on_string} per-locus consensus sequences" from_work_dir="stacks_outputs/populations.loci.fa">
            <filter>populations_output['fasta_loci']</filter>
        </data>
        <data format="tabular" name="out_fasta" label="${tool.name} on ${on_string} per-locus, per-haplotpye sequences" from_work_dir="stacks_outputs/populations.samples.fa">
            <filter>populations_output['fasta_samples']</filter>
        </data>
        <data format="tabular" name="out_fasta_raw" label="${tool.name} on ${on_string} per-locus, per-haplotpye sequences (regardless of biological plausibility)" from_work_dir="stacks_outputs/populations.samples-raw.fa">
            <filter>populations_output['fasta_samples_raw']</filter>
        </data>

        <!-- phylip populations.fixed.phylip populations.fixed.phylip.log
             phylip_var populations.var.phylip populations.var.phylip.log-->
        <data format="tabular" name="out_phylip_all_pop_fix" label="${tool.name} on ${on_string} Phylip nucleotides that are fixed-within, and variant among populations" from_work_dir="stacks_outputs/populations.fixed.phylip">
            <filter>populations_output['phylip']</filter>
        </data>
        <data format="tabular" name="out_phylip_all_loci_fix" label="${tool.name} on ${on_string} Phylip (loci) nucleotides that are fixed-within, and variant among populations" from_work_dir="stacks_outputs/populations.fixed.phylip.log">
            <filter>populations_output['phylip']</filter>
        </data>
        <data format="tabular" name="out_phylip_all_pop_var" label="${tool.name} on ${on_string} Phylip all sequence as well as variable sites" from_work_dir="stacks_outputs/populations.var.phylip">
            <filter>populations_output['phylip_var']</filter>
        </data>
        <data format="tabular" name="out_phylip_all_loci_var" label="${tool.name} on ${on_string} Phylip (loci) all sequence as well as variable sites" from_work_dir="stacks_outputs/populations.var.phylip.log">
            <filter>populations_output['phylip_var']</filter>
        </data>

        <!-- genepop populations.haps.genepop populations.snps.genepop -->
        <data format="tabular" name="out_genepop_snps" label="${tool.name} on ${on_string} SNPs in GenePop format" from_work_dir="stacks_outputs/populations.snps.genepop">
            <filter>populations_output['genepop']</filter>
        </data>
        <data format="tabular" name="out_genepop_haps" label="${tool.name} on ${on_string} Haplotypes in GenePop format" from_work_dir="stacks_outputs/populations.haps.genepop">
            <filter>populations_output['genepop']</filter>
        </data>

        <!-- vcf populations.haps.vcf populations.snps.vcf -->
        <data format="vcf" name="out_vcf_haplotypes_snps" label="${tool.name} on ${on_string} SNPs in VCF format" from_work_dir="stacks_outputs/populations.snps.vcf">
            <filter>populations_output['vcf']</filter>
        </data>
        <data format="vcf" name="out_vcf_haplotypes_haps" label="${tool.name} on ${on_string} Haplotypes in VCF format" from_work_dir="stacks_outputs/populations.haps.vcf">
            <filter>populations_output['vcf']</filter>
        </data>

        <!--plink populations.plink.map populations.plink.ped-->
        <data format="tabular" name="out_plink_markers" label="${tool.name} on ${on_string} PLINK (makers)" from_work_dir="stacks_outputs/populations.plink.map">
            <filter>populations_output['plink']</filter>
        </data>
        <data format="tabular" name="out_plink_genotypes" label="${tool.name} on ${on_string} PLINK format (genotypes)" from_work_dir="stacks_outputs/populations.plink.ped">
            <filter>populations_output['plink']</filter>
        </data>

		<!--structure populations.hzar-->
        <data format="tabular" name="out_hzar" label="${tool.name} on ${on_string} hzar format" from_work_dir="stacks_outputs/populations.hzar.csv">
            <filter>populations_output['hzar']</filter>
        </data>
        <!--structure populations.structure-->
        <data format="tabular" name="out_structure" label="${tool.name} on ${on_string} Structure format" from_work_dir="stacks_outputs/populations.structure">
            <filter>populations_output['structure']</filter>
        </data>

        <!-- radpainter populations.haps.radpainter -->
        <data format="tabular" name="out_radpainter" label="${tool.name} on ${on_string} Radpainter format" from_work_dir="stacks_outputs/populations.haps.radpainter">
            <filter>populations_output['radpainter']</filter>
        </data>

        <!-- treemix populations.treemix -->
        <data format="tabular" name="out_treemix" label="${tool.name} on ${on_string} Treemix format" from_work_dir="stacks_outputs/populations.treemix">
            <filter>populations_output['treemix']</filter>
        </data>

        <!-- gtf populations.treemix -->
        <data format="gtf" name="out_gtf" label="${tool.name} on ${on_string} gtf" from_work_dir="stacks_outputs/populations.gtf">
            <filter>populations_output['gtf']</filter>
        </data>
    </xml>

    <!-- fastq output for kmer/clone-filter -->
    <xml name="fastq_output_filter" token_format="">
        <data name="clean" format="@FORMAT@" label="${tool.name} on ${on_string}">
            <filter>input_type['input_type_select'] == 'single'</filter>
            <yield/>
        </data>
        <collection name="clean_pair" type="paired" format="@FORMAT@" label="${tool.name} on ${on_string}">
            <filter>input_type['input_type_select'] == 'paired'</filter>
            <yield/>
        </collection>
        <data name="discarded" format="@FORMAT@" label="${tool.name} on ${on_string}: discarded reads">
            <filter>capture and input_type['input_type_select'] == 'single'</filter>
            <yield/>
        </data>
        <collection name="discarded_pair" format="@FORMAT@" type="paired" label="${tool.name} on ${on_string}: discarded reads">
            <filter>capture and input_type['input_type_select'] == 'paired'</filter>
            <yield/>
        </collection>
    </xml>

    <xml name="snp_options_alpha">
        <param argument="--alpha" type="select" label="Chi square significance level required to call a heterozygote or homozygote" >
            <option value="0.1">0.1</option>
            <option value="0.05" selected="True">0.05</option>
            <option value="0.01">0.01</option>
            <option value="0.001">0.001</option>
        </param>
    </xml>

    <xml name="snp_options">
        <conditional name="select_model">
            <param argument="--model_type" type="select" label="Choose the model">
                <option value="snp" selected="true">SNP</option>
                <option value="bounded">Bounded SNP</option>
                <option value="fixed">Fixed</option>
            </param>
            <when value="snp">
                <expand macro="snp_options_alpha"/>
            </when>
            <when value="bounded">
                <param argument="--bound_low" type="float" value="0.0" min="0.0" max="1.0" label="Lower bound for epsilon, the error rate" help="between 0 and 1.0"/>
                <param argument="--bound_high" type="float" value="1.0" min="0.0" max="1.0" label="Upper bound for epsilon, the error rate" help="between 0 and 1.0"/>
                <expand macro="snp_options_alpha"/>
            </when>
            <when value="fixed">
                <yield/>
            </when>
        </conditional>
    </xml>
    <xml name="snp_options_full">
        <expand macro="snp_options">
            <param argument="--bc_err_freq" type="float" value="0.1" min="0.0" max="1.0" label="Barcode error frequency" help="between 0 and 1.0"/>
        </expand>
    </xml>

	<!-- variant calling option for use in gstacks and denovomap
         default for var_alpha is 0.01 if model == marukilow (which is the case in denovomap and refmap)
         otherwise no default is is available and gstacks will output and error
         "Error: No value was provided for \-\-var-alpha and there is no default for this model)"
	-->
    <xml name="variant_calling_options_vg" token_varalpha_default="">
        <param argument="--var-alpha" name="var_alpha" type="float" value="@VARALPHA_DEFAULT@" min="0" label="Alpha threshold for discovering SNPs" help="Default is 0.01 if the marukilow model is used (which is the case in refmap and denovomap), otherwise no default value is available."/>
        <param argument="--gt-alpha" name="gt_alpha" type="float" value="0.05" min="0" label="Alpha threshold for calling genotypes"/>
    </xml>

    <xml name="barcode_encoding_single" token_type="">
        <option value="--inline_null" selected="True">@TYPE@ is inline with sequence, occurs only on single-end read (--inline_null)</option>
        <option value="--index_null">@TYPE@ is provded in FASTQ header (Illumina i5 or i7 read) (--index_null)</option>
        <yield/>
    </xml>

    <xml name="barcode_encoding_pair" token_type="">
        <expand macro="barcode_encoding_single" type="@TYPE@">
            <option value="--null_index">@TYPE@ is provded in FASTQ header (Illumina i7 read if both i5 and i7 read are provided) (--null_index)</option>
            <option value="--inline_inline">@TYPE@ is inline with sequence, occurs on single and paired-end read (--inline_inline)</option>
            <option value="--index_index">@TYPE@ is provded in FASTQ header (Illumina i5 and i7 reads) (--index_index)</option>
            <option value="--inline_index">@TYPE@ is inline with sequence on single-end read and occurs in FASTQ header (from either i5 or i7 read) (--inline_index)</option>
            <option value="--index_inline">@TYPE@ occurs in FASTQ header (Illumina i5 or i7 read) and is inline with sequence on single-end read (if single read data) or paired-end read (if paired data) (--index_inline)</option>
        </expand>
    </xml>
    <!-- for tests that check the output for "stacks completed" -->
    <xml name="test_element_stacks_completed" token_element_name="">
        <element name="@ELEMENT_NAME@"><assert_contents><has_text text="stacks completed on" /></assert_contents></element>
    </xml>
</macros>
author	iuc
date	Mon, 23 May 2022 17:47:23 +0000
parents	a5969aa9347e
children