diff hyphy_bgm.xml @ 5:823a5afee916 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 8d5ae1d04c43988fdcc458f4f08376a15e72db8e"
author iuc
date Thu, 20 Feb 2020 18:09:03 -0500
parents 1df765cc6bcb
children 26ad9a6b1293
line wrap: on
line diff
--- a/hyphy_bgm.xml	Mon Feb 17 14:51:07 2020 -0500
+++ b/hyphy_bgm.xml	Thu Feb 20 18:09:03 2020 -0500
@@ -47,8 +47,8 @@
         <param name="chain_length" type="integer" value="100000" min="0" max="1000000000" label="Length of MCMC chain"/>
         <param name="burn_in" type="integer" value="10000" min="0" max="1000000000" label="Number of samples to discard for burn-in"/>
         <param name="samples" type="integer" value="100" min="0" max="100" label="Number of steps to extract from chain sample"/>
-        <param name="parents" type="integer" value="1" min="1" max="3" label="Maximum number of parents allowed per node"/>
-        <param name="min_subs" type="integer" value="1" min="1" max="100000" label="Minimum number of ubstitutions per site to be included in the analysis"/>
+        <param argument="--max-parents" name="parents" type="integer" value="1" min="1" max="3" label="Maximum number of parents allowed per node" />
+        <param argument="--min-subs" name="min_subs" type="integer" value="1" min="1" max="100000" label="Minimum number of ubstitutions per site to be included in the analysis" />
     </inputs>
     <outputs>
         <data name="bgm_log" format="txt"/>
@@ -62,11 +62,91 @@
         </test>
     </tests>
     <help><![CDATA[
-The Bayesian Graphical Model (BGM) method is a tool for detecting coevolutionary interactions between amino acid positions in a protein.
+
+BGM : Bayesian Graphical Models
+===============================
+
+What does this do?
+------------------
+
+This tools identifies groups of sites in the alignments that experience substitutions along the same branches,
+i.g. *co-evolve*.
+
+Brief description
+-----------------
+
+GM (Bayesian Graphical Model) uses a maximum likelihood ancestral state
+reconstruction to map substitution (non-synonymous only for coding data)
+events to branches in the phylogeny and then analyzes the joint
+distribution of the substitution map using a Bayesian graphical model
+(network). Next, a Markov chain Monte Carlo analysis is used to generate
+a random sample of network structures from the posterior distribution
+given the data. Each node in the network represents a site in the
+alignment, and links (edges) between nodes indicate high posterior
+support for correlated substitutions at the two sites over time, which
+implies coevolution.
+
+
+Input
+-----
+
+1. A *FASTA* sequence alignment.
+2. A phylogenetic tree in the *Newick* format
+
+Note: the names of sequences in the alignment must match the names of the sequences in the tree.
+
+Output
+------
+
+A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf).
+
+A custom visualization module for viewing these results is available (see http://vision.hyphy.org/BGM for an example)
+
+Further reading
+---------------
 
-See the online documentation_ for more information.
+http://hyphy.org/methods/selection-methods/#BGM
+
+
+Tool options
+------------
+::
+
+    --branches          Which branches should be tested for selection?
+                            All [default] : test all branches
+
+                            Internal : test only internal branches (suitable for
+                            intra-host pathogen evolution for example, where terminal branches
+                            may contain polymorphism data)
+
+                            Leaves: test only terminal (leaf) branches
+
+                            Unlabeled: if the Newick string is labeled using the {} notation,
+                            test only branches without explicit labels
+                            (see http://hyphy.org/tutorials/phylotree/)
 
-.. _documentation: http://hyphy.org/methods/selection-methods/#bgm
+    --max-parents      The maximum number of parents allowed per node, i.e. how many sites
+                       can directly influence substitution patterns at another site
+                       Increasing this number scales complexity nonlinearly
+	                    default value: 1
+
+    --min-subs         The minium number of substitutions per site to include it in the analysis
+                       Filter low complexity (too few substitution) sites
+	                     default value: 1
+
+    --chains           How many MCMC chains to run (does not apply to Variational-Bayes)
+                            default value: 5
+
+    --steps            MCMC chain length (does not apply to Variational-Bayes)
+                            default value: 100,000
+
+    --burn-in          MCMC chain burn in (does not apply to Variational-Bayes)
+                            default value: 10,000
+
+    --samples          MCMC samples to draw (does not apply to Variational-Bayes)
+                            default value: 100
+
+
     ]]></help>
     <expand macro="citations">
         <citation type="doi">10.1371/journal.pcbi.0030231</citation>