view bayescan.xml @ 0:72f3a333f155 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bayescan/ commit 33637968e1e32c02d7765a6701e930a0ea0dd903
author iuc
date Fri, 17 Mar 2017 15:20:38 -0400
parents
children
line wrap: on
line source

<tool id="BayeScan" name="BayeScan" version="2.1">
    <description>Detecting natural selection from population-based genetic data</description>
    <requirements>
        <requirement type="package" version="2.0.1">bayescan</requirement>
    </requirements>
    <command>

        <![CDATA[
        mkdir 'output_dir';

        bayescan2 '$input'
        -od output_dir
        #if '$loci_file.loci' == "1"
            -d '$loci_file.input_loci'
        #end if
        '$snp_genotypes_matrix'
        '$fstats'
        '$pilot_runs'
        '$allele_frequency'
        -o bayescan
        -n '$sample_size'
        -thin '$thinning_interval'
        -nbp '$num_pilot_runs'
        -pilot '$length_pilot_run'
        -burn '$burn'
        -pr_odds '$prior_odds'
        -lb_fis '$lower_prior'
        -hb_fis '$higher_prior'
        -aflp_pc '$threshold' > '$output'
        ]]>

    </command>

    <inputs>

        <param name="input" type="data" format="tabular,txt" label="Input genotype data file" help="must be space/tab delimitted plain text file" />
        <conditional name="loci_file">
            <param name="loci" type="select" label="Discard loci?" help="" >
                <option value="0">No</option>
                <option value="1">Yes</option>
            </param>
            <when value="0"></when>
            <when value="1">
                <param name="input_loci" type="data" format="tabular,txt" label="Discard loci file" help="Optional input file containing list of loci to discard" />
            </when>
        </conditional>
        <param name="snp_genotypes_matrix" type="boolean" checked="false" truevalue="-fstat" falsevalue="" label="SNP genotypes matrix data" help="Use SNP genotypes matrix"/>

        <param name="fstats" type="boolean" checked="false" truevalue="-snp" falsevalue="" label="Only estimate F-stats (no selection)"/>

        <param name="sample_size" type="integer" value="5000" label="Number of outputted iterations"/>
        <param name="thinning_interval" type="integer" value="10" label="Thinning interval size"/>
        <param name="num_pilot_runs" type="integer" value="20" label="Number of pilot runs" />
        <param name="length_pilot_run" type="integer" value="5000" label="Length of pilot runs" />
        <param name="burn" type="integer" value="50000" label="Additional burn-in length" help="Nlength of an interval between two draws in MCMC"/>

        <param name="prior_odds" type="integer" value="10" size="3" label="Prior odds for the neutral model"/>
        <param name="lower_prior" type="float" value="0.0" label="Lower bound for uniform prior on Fis (dominant data)"/>
        <param name="higher_prior" type="float" value="1.0" label="Higher bound for uniform prior on Fis (dominant data)"/>
        <param name="threshold" type="float" value="0.1" label="Threshold for the recessive genotype as a fraction of maximum band intensity"/>

        <param name="pilot_runs" type="boolean" checked="false" truevalue="-out_pilot" falsevalue="" label="Optional output file for pilot runs"/>
        <param name="allele_frequency" type="boolean" checked="false" truevalue="-out_freq" falsevalue="" label="Optional output file for allele frequencies"/>


    </inputs>

    <outputs>
        <data format="txt"  name="output">
            <discover_datasets pattern="__designation_and_ext__" directory="output_dir" visible="true" />
        </data>
    </outputs>

    <tests>
        <test>
            <param name="input" value="test_binary_AFLP.txt" />
            <param name="loci" value="0" />
            <param name="snp_genotypes_matrix" value="true" />
            <param name="fstats" value="true"/>

            <param name="sample_size" value="5000" />
            <param name="thinning_interval" value="10" />
            <param name="num_pilot_runs" value="20" />
            <param name="length_pilot_run" value="5000" />
            <param name="burn" value="50000" />

            <param name="prior_odds" value="10"/>
            <param name="lower_prior" value="0.0" />
            <param name="higher_prior" value="1.0"/>
            <param name="threshold" value="0.1"/>

            <param name="pilot_runs" value="true"/>
            <param name="allele_frequency" value="true" />

            <output name="output" file="result.out" ftype="txt">
                <discovered_dataset designation="bayescan" ftype="sel">
                    <assert_contents>
                        <has_text text="logL Fis1 Fis2 Fis3 Fis4 Fis5 Fis6 Fis7 Fis8 Fis9 Fis10 Fst1 Fst2 Fst3 Fst4 Fst5 Fst6 Fst7 Fst8 Fst9 Fst10" />
                    </assert_contents>
                </discovered_dataset>
                <discovered_dataset designation="bayescan_Verif" ftype="txt" value="verif.txt" />
                <discovered_dataset designation="bayescan_AccRte" ftype="txt">
                    <assert_contents>
                        <has_line_matching expression="alpha beta  ances freq   a_p.*" />
                    </assert_contents>
                </discovered_dataset>
                <discovered_dataset designation="bayescan_prop" ftype="txt" value="pilot.txt" />
                <discovered_dataset designation="bayescan_freq" ftype="txt">
                    <assert_contents>
                        <has_text text="locus1 locus2 locus3 locus4 locus5 locus6 locus7 locus8 locus9 locus10" />
                    </assert_contents>
                </discovered_dataset>
            </output>
        </test>
    </tests>

    <help><![CDATA[

**What it does**

This program, BayeScan aims at identifying candidate loci under natural selection from genetic data, using differences in allele frequencies between populations.

`BayeScan`_ is based on the multinomial-Dirichlet model.

One of the simplest possible scenarios covered consists of an island model in which subpopulation allele frequencies are correlated through a common migrant gene pool from which they differ in varying degrees. The difference in allele frequency between this common gene pool and each subpopulation is measured by a subpopulation specific FST coefficient.

Therefore, this formulation can consider realistic ecological scenarios where the effective size and the immigration rate may differ among subpopulations.

.. _Bayescan: http://cmpg.unibe.ch/software/BayeScan/

.. class:: infomark

**Input file**

BayeScan uses its own input file formats, which depend on the type of data used. All input files are simply in text format.

Read the `manual`_ to create the input file.

.. _manual: http://cmpg.unibe.ch/software/BayeScan/files/BayeScan2.1_manual.pdf

]]>

    </help>
    <citations>
        <citation type="doi">10.1534/genetics.108.092221</citation>
    </citations>
</tool>