comparison hyphy_fubar.xml @ 6:3285fd1f4bde draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 8d5ae1d04c43988fdcc458f4f08376a15e72db8e"
author iuc
date Thu, 20 Feb 2020 18:10:47 -0500
parents 93a0cf4ea5fc
children 3db56014322f
comparison
equal deleted inserted replaced
5:bece0bad8e89 6:3285fd1f4bde
10 ln -s '$input_nhx' fubar_input.nhx && 10 ln -s '$input_nhx' fubar_input.nhx &&
11 hyphy fubar 11 hyphy fubar
12 --alignment ./fubar_input.fa 12 --alignment ./fubar_input.fa
13 --tree ./fubar_input.nhx 13 --tree ./fubar_input.nhx
14 --code '$gencodeid' 14 --code '$gencodeid'
15 --method '$posterior' 15 --method '$posteriorEstimationMethod.method'
16 --grid '$grid_points' 16 --grid '$grid_points'
17 --chains '$mcmc' 17 @posteriorEstimationMethod_cmd@
18 --chain-length '$chain_length'
19 --burn-in '$samples'
20 --samples '$samples_per_chain'
21 --concentration_parameter '$concentration' 18 --concentration_parameter '$concentration'
22 > '$fubar_log' 19 > '$fubar_log'
23 ]]></command> 20 ]]></command>
24 <inputs> 21 <inputs>
25 <expand macro="inputs"/> 22 <expand macro="inputs"/>
26 <expand macro="gencode"/> 23 <expand macro="gencode"/>
27 <param name="grid_points" type="integer" value="20" min="5" max="50" label="Grid points"/> 24 <param argument="--grid" name="grid_points" type="integer" value="20" min="5" max="50" label="Grid points" />
28 <param name="posterior" type="select" label="Posterior estimation method"> 25 <expand macro="conditional_posteriorEstimationMethod" />
29 <option value="Metropolis-Hastings">Full Metropolis-Hastings MCMC algorithm</option> 26 <param argument="--concentration_parameter" name="concentration" type="float" value="0.5" min="0.001" max="1" label="Concentration parameter of the Dirichlet prior" />
30 <option value="Collapsed-Gibbs">Collapsed Gibbs sampler</option> 27
31 <option value="Variational-Bayes">0-th order Variational Bayes approximations</option>
32 </param>
33 <param name="mcmc" type="integer" value="5" min="2" max="20" label="Number of MCMC chains"/>
34 <param name="chain_length" type="integer" value="2000000" min="500000" max="50000000" label="Length of each chain"/>
35 <param name="samples" type="integer" value="1000000" min="100000" max="1900000" label="Samples to use for burn-in"/>
36 <param name="samples_per_chain" type="integer" value="100" min="50" max="1000000" label="Samples to draw from each chain"/>
37 <param name="concentration" type="float" value="0.5" min="0.001" max="1" label="Concentration parameter of the Dirichlet prior"/>
38 </inputs> 28 </inputs>
39 <outputs> 29 <outputs>
40 <data name="fubar_log" format="txt"/> 30 <data name="fubar_log" format="txt"/>
41 <data name="fubar_output" format="hyphy_results.json" from_work_dir="fubar_input.fa.FUBAR.json" /> 31 <data name="fubar_output" format="hyphy_results.json" from_work_dir="fubar_input.fa.FUBAR.json" />
42 </outputs> 32 </outputs>
47 <param name="posterior" value="Variational-Bayes"/> 37 <param name="posterior" value="Variational-Bayes"/>
48 <output name="fubar_output" file="fubar-out1.json" compare="sim_size"/> 38 <output name="fubar_output" file="fubar-out1.json" compare="sim_size"/>
49 </test> 39 </test>
50 </tests> 40 </tests>
51 <help><![CDATA[ 41 <help><![CDATA[
52 Model-based selection analyses (such as those performed by PAML and HyPhy) can be slow, becoming impractical for large alignments. We present a method to model and detect selection much faster than existing methods and to leverage Bayesian MCMC to robustly account for parameter estimation errors.
53 42
54 Results: By exploiting some commonly used approximations, FUBAR can perform detection of positive selection under a model that allows rich site- to-site rate variation about 30 to 50 times faster than existing random effects likelihood methods, and 10 to 30 times faster than existing fixed effects likelihood methods. We introduce an ultra-fast MCMC routine that allows a flexible prior specification, with no parametric constraints on the prior shape. Furthermore, our method allows us to visualize Bayesian inference for each site, revealing the model supported by the data. 43 FUBAR : Faste Unbiased Bayesian AppRoximation
44 =============================================
55 45
56 See the online documentation_ for more information. 46 What question does this method answer?
47 --------------------------------------
57 48
58 .. _documentation: http://hyphy.org/methods/selection-methods/#fubar 49 Which site(s) in a gene are subject to pervasive, i.e. consistently across the entire phylogeny, diversifying selection?
50
51 Recommended Applications
52 ------------------------
53
54 The phenomenon of pervasive selection is generally most prevalent in pathogen evolution and any biological system influenced by evolutionary arms race dynamics
55 (or balancing selection), including adaptive immune escape by viruses. As such, FUBAR is ideally suited to identify sites under positive selection which
56 represent candidate sites subject to strong selective pressures across the entire phylogeny.
57
58 FUBAR is our recommended method for detecting pervasive selection at individual sites on large (> 500 sequences) datasets for which other methods have prohibitive runtimes, unless you have access to a computer cluster.
59
60 Brief description
61 -----------------
62
63 Perform a Fast Unbiased AppRoximate Bayesian (FUBAR) analysis of a
64 coding sequence alignment to determine whether some sites have been
65 subject to pervasive purifying or diversifying selection. There are three methods
66 for estimating the posterior distribution of
67 grid weights: collapsed Gibbs MCMC (faster), 0-th order Variation
68 Bayes approximation (fastest), full Metropolis-Hastings (slowest).
69
70 Input
71 -----
72
73 1. A *FASTA* sequence alignment.
74 2. A phylogenetic tree in the *Newick* format
75
76 Note: the names of sequences in the alignment must match the names of the sequences in the tree.
77
78
79 Output
80 ------
81
82 A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf).
83
84 A custom visualization module for viewing these results is available (see http://vision.hyphy.org/FUBAR for an example)
85
86 Further reading
87 ---------------
88
89 http://hyphy.org/methods/selection-methods/#FUBAR
90
91
92 Tool options
93 ------------
94 ::
95
96
97 --code Which genetic code to use
98
99 --grid The number of grid points
100 Smaller : faster
101 Larger : more precise posterior estimation but slower
102 default value: 20
103
104 --method Inference method to use
105 Variational-Bayes : 0-th order Variational Bayes approximation; fastest [default]
106 Metropolis-Hastings : Full Metropolis-Hastings MCMC algorithm; orignal method [slowest]
107 Collapsed-Gibbs : Collapsed Gibbs sampler [intermediate speed]
108
109
110 --chains How many MCMC chains to run (does not apply to Variational-Bayes)
111 default value: 5
112
113 --chain-length MCMC chain length (does not apply to Variational-Bayes)
114 default value: 2,000,000
115
116 --burn-in MCMC chain burn in (does not apply to Variational-Bayes)
117 default value: 1,000,000
118
119 --samples MCMC samples to draw (does not apply to Variational-Bayes)
120 default value: 1,000
121
122 --concentration_parameter
123 The concentration parameter of the Dirichlet prior
124 default value: 0.5
125
126
59 ]]></help> 127 ]]></help>
60 <expand macro="citations"> 128 <expand macro="citations">
61 <citation type="doi">10.1093/molbev/mst030</citation> 129 <citation type="doi">10.1093/molbev/mst030</citation>
62 </expand> 130 </expand>
63 </tool> 131 </tool>