view hyphy_fubar.xml @ 24:c5ef306c2041 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
author iuc
date Tue, 20 Apr 2021 10:24:45 +0000
parents 3db56014322f
children cd41e7a4d35f
line wrap: on
line source

<?xml version="1.0"?>
<tool id="hyphy_fubar" name="HyPhy-FUBAR" version="@VERSION@+galaxy0" profile="19.09">
    <description>Fast Unconstrained Bayesian AppRoximation</description>
    <macros>
        <import>macros.xml</import>
        <token name="@OPERATION@">FUBAR</token>
        <token name="@operation@">fubar</token>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        @SYMLINK_FILES@
        hyphy fubar
            --alignment ./$input_file
            #if $input_nhx:
                --tree ./input.nhx
            #end if
            --code '$gencodeid'
            --method '$posteriorEstimationMethod.method'
            --grid '$grid_points'
            @posteriorEstimationMethod_cmd@
            --concentration_parameter '$concentration'
            > '$fubar_log' ;
        @CATCH_ERROR@
    ]]></command>
    <inputs>
        <expand macro="inputs"/>
        <expand macro="gencode"/>
        <param argument="--grid" name="grid_points" type="integer" value="20" min="5" max="50" label="Grid points" />
        <expand macro="conditional_posteriorEstimationMethod" />
        <param argument="--concentration_parameter" name="concentration" type="float" value="0.5" min="0.001" max="1" label="Concentration parameter of the Dirichlet prior" />

    </inputs>
    <outputs>
        <data name="fubar_log" format="txt"/>
        <data name="fubar_output" format="hyphy_results.json" />
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="fasta.gz" value="fubar-in1.fa.gz"/>
            <param name="input_nhx" ftype="nhx" value="fubar-in1.nhx"/>
            <param name="posterior" value="Variational-Bayes"/>
            <output name="fubar_output" file="fubar-out1.json" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[

FUBAR : Faste Unbiased Bayesian AppRoximation
=============================================

What question does this method answer?
--------------------------------------

Which site(s) in a gene are subject to pervasive, i.e. consistently across the entire phylogeny, diversifying selection?

Recommended Applications
------------------------

The phenomenon of pervasive selection is generally most prevalent in pathogen evolution and any biological system influenced by evolutionary arms race dynamics
(or balancing selection), including adaptive immune escape by viruses. As such, FUBAR is ideally suited to identify sites under positive selection which
represent candidate sites subject to strong selective pressures across the entire phylogeny.

FUBAR is our recommended method for detecting pervasive selection at individual sites on large (> 500 sequences) datasets for which other methods have prohibitive runtimes, unless you have access to a computer cluster.

Brief description
-----------------

Perform a Fast Unbiased AppRoximate Bayesian (FUBAR) analysis of a
coding sequence alignment to determine whether some sites have been
subject to pervasive purifying or diversifying selection. There are three methods
for estimating the posterior distribution of
grid weights: collapsed Gibbs MCMC (faster), 0-th order Variation
Bayes approximation (fastest), full Metropolis-Hastings (slowest).

Input
-----

1. A *FASTA* sequence alignment.
2. A phylogenetic tree in the *Newick* format

Note: the names of sequences in the alignment must match the names of the sequences in the tree.


Output
------

A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf).

A custom visualization module for viewing these results is available (see http://vision.hyphy.org/FUBAR for an example)

Further reading
---------------

http://hyphy.org/methods/selection-methods/#FUBAR


Tool options
------------
::


    --code             Which genetic code to use

    --grid             The number of grid points
                        Smaller : faster
                        Larger : more precise posterior estimation but slower
                        default value: 20

    --method           Inference method to use
                            Variational-Bayes : 0-th order Variational Bayes approximation; fastest [default]
                            Metropolis-Hastings : Full Metropolis-Hastings MCMC algorithm; orignal method [slowest]
                            Collapsed-Gibbs  : Collapsed Gibbs sampler [intermediate speed]


    --chains           How many MCMC chains to run (does not apply to Variational-Bayes)
                            default value: 5

    --chain-length     MCMC chain length (does not apply to Variational-Bayes)
                            default value: 2,000,000

    --burn-in          MCMC chain burn in (does not apply to Variational-Bayes)
                            default value: 1,000,000

    --samples          MCMC samples to draw (does not apply to Variational-Bayes)
                            default value: 1,000

    --concentration_parameter
                        The concentration parameter of the Dirichlet prior
                        default value: 0.5


    ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1093/molbev/mst030</citation>
    </expand>
</tool>