changeset 2:681e9bb51cc4 draft default tip

Clean help, fix option descriptions, add genthreshfortopoterm, change filetypes to txt to make it more flexible.
author malex
date Thu, 05 Jul 2012 17:18:52 -0400
parents 9ce35d2d9937
children
files garli.xml
diffstat 1 files changed, 11 insertions(+), 460 deletions(-) [+]
line wrap: on
line diff
--- a/garli.xml	Fri Dec 02 17:07:27 2011 -0500
+++ b/garli.xml	Thu Jul 05 17:18:52 2012 -0400
@@ -5,14 +5,14 @@
 ## Arguments to the wrapper beyond the config file are just for Galaxy's benefit - all filenames are hardcoded
 <command interpreter="python">garli_wrapper.py $garli_conf $best_all_tre $best_tre $log00_log $screen_log </command>
   <inputs>
-    <param name="datafname" format="nexus" type="data" label="Nexus formated sequence file" force_select="true"/>
+    <param name="datafname" format="txt" type="data" label="Nexus formated sequence file" force_select="true"/>
     <conditional name="choose_search_type">
         <param name="search_type" type="select" label="Analysis Type">
             <option value="mlsearch" selected="true">ML Search</option>
             <option value="bootstrap">Bootstrap</option>
         </param>
             <when value="mlsearch">
-                <param name="searchreps" type="integer" size="4" value="1" label="Number of replicates">
+                <param name="searchreps" type="integer" size="4" value="1" label="Number of independent search replicates">
                     <validator type="in_range" message="(1-infinity)" min="1" max="inf"/>
                 </param>
                 <param name="bootstrapreps" type="hidden" value="0" />
@@ -25,7 +25,7 @@
             <when value="bootstrap">
                 <param name="searchreps" type="hidden" value="0" />
                 <param name="bootstrapreps" type="integer" size="4"
-                    value="1" label="Number of replicates">
+                    value="1" label="Number of bootstrap replicates">
                     <validator type="in_range" message="(1-infinity)" min="1" max="inf"/>
                 </param>
             </when>
@@ -44,7 +44,7 @@
             <param name="streefname" type="hidden" value="random"/>
         </when>
         <when value="file">
-            <param name="streefname" format="nexus" type="data" label="Starting Tree File"/>
+            <param name="streefname" format="txt" type="data" label="Starting Tree File"/>
         </when>
     </conditional>
     <param name="attachmentspertaxon" size="4" type="integer" value="50"
@@ -315,6 +315,10 @@
         label="Relative weight assigned to branch swaps based on locality">
         <validator type="in_range" message="(0.1-10.0)" min="0.1" max="10.0"/>
     </param>
+    <param name="genthreshfortopoterm" type="integer" size="5" value="20000"
+        label="Number of generations without topology improvement required for termination">
+        <validator type="in_range" message="(1-20000)" min="1" max="20000"/>
+    </param>
 </inputs>
 <outputs>
     <data format="nexus" name="best_tre" metadata_source="datafname" from_work_dir="garli.best.tre" label="${tool.name} on ${on_string}: garli.best.tre"/>
@@ -342,7 +346,7 @@
 outputeachbettertopology = 0
 outputcurrentbesttopology = 0
 enforcetermconditions = 1
-genthreshfortopoterm = 20000
+genthreshfortopoterm = ${genthreshfortopoterm}
 scorethreshforterm = 0.05
 significanttopochange = 0.01
 outputphyliptree = 0
@@ -408,461 +412,8 @@
 Garli is written and maintained by Derrick Zwickl
 
 Configuration options are adapted from
-https://www.nescent.org/wg_garli/GARLI_Configuration_Settings
-
------
-
-**Detailed description of the configuration options**
-
-
-**Analysis Type**
-
-    Specify whether to perform a maximum likelihood search for the best tree, or
-    a bootstrap analysis.
-
-
-**Number of replicates**
-
-    Number of independent search replicates to run.
-
-
-**Relative size of resample data**
-
-    This setting allows for bootstrap-like resampling, but with the
-    psuedoreplicate datasets having the number of alignment columns different
-    from the real data. Setting values below 1.0 is somewhat similar to
-    jackknifing, but not identical.
-
-
-**Attachment branches evaluated per taxon (min=1)**
-
-    The number of attachment branches evaluated for each taxon to be added to
-    the tree during the creation of an ML stepwise-addition starting tree.
-    Briefly, stepwise addition is an algorithm used to make a tree, and involves
-    adding taxa in a random order to a growing tree. For each taxon to be added,
-    a number of randomly chosen attachment branches are tried and scored, and
-    then the best scoring one is chosen as the location of that taxon. This
-    setting controls how many attachment points are evaluated for each taxon to
-    be added. A value of one is equivalent to a completely random tree (only one
-    randomly chosen location is evaluated). A value of greater than 2 times the
-    number of taxa in the dataset means that all attachment points will be
-    evaluated for each taxon, and will result in very good starting trees (but
-    may take a while on large datasets). Even fairly small values (less than 10)
-    can result in starting trees that are much, much better than random, but
-    still fairly different from one another.
-
-
-**Constraint file**
-
-     Select a file containing constraint specifications.
-
-
-**Random seed**
-
-    Random see can have a value of -1 or a positive integer. The random number
-    seed used by the random number generator. Specify “–1” to have a seed chosen
-    for you. Specifying the same seed number in multiple runs will give exactly
-    identical results, if all other parameters and settings are also identical.
-
-
-**Available memory**
-
-    This lets GARLI determine how much system memory it may be able to use to
-    store computations for reuse.
-
-
-**Perform initial rough optimization**
-
-    Specifies whether some initial rough optimization is performed on the
-    starting branch lengths and rate heterogeneity parameters. This is always
-    recommended.
-
-
-**Outgroup taxa numbers**
-
-    The outgroup option allows for orienting tree topologies in a consistent way
-    when they are written to a file. Note that this has NO effect whatsoever on
-    the actual inference and the specified outgroup is NOT constrained to be
-    present in the inferred trees. If multiple outgroup taxa are specified and
-    they do not form a monophyletic group, this setting will be ignored. If you
-    specify a single outgroup taxon it will always be present, and the tree will
-    always be consistently oriented. To specify an outgroup consisting of taxa
-    1, 3 and 5 the format is this: outgroup = 1 3 5. Dashes are used for ranges
-    e.g. 1-3 5.
-
-
-**Collapse branches**
-
-    Before version 1.0, all trees that are returned were fully resolved. This is
-    true even if the maximum-likelihood estimate of some internal branch lengths
-    are effectively zero (or GARLI's minimum, which is 1e-8). In such cases,
-    collapsing the branch into a polytomy would be a better representation. Note
-    that GARLI will never return a tree with an actual branch length of zero,
-    but rather with its minimum value of 1.0e-8. The drawback of always
-    returning fully resolved trees is that what is effectively a polytomy can be
-    resolved in three ways, and different independent searches may randomly
-    return one of those resolutions. Thus, if you compare the trees by topology
-    only, they will look different. If you pay attention to the branch lengths
-    and likelihood scores of the trees it will be apparent that they are
-    effectively the same. I think that collapsing of branches is particularly
-    important when bootstrapping, since no support should be given to a branch
-    that doesn't really exist, i.e., that is a random resolution of a polytomy.
-    Collapsing is also good when calculating tree to tree distances such as the
-    symmetric tree distance, for example when calculating phylogenetic error to
-    a known target tree. Zero-length branches would add to the distances
-    (~error) although they really should not.
-
-
-**Model type**
-
-    The codon-aminoacid datatype means that the data will be supplied as a
-    nucleotide alignment, but will be internally translated and analyzed using
-    an amino acid model. The codon and codon-aminoacid datatypes require
-    nucleotide sequence that is aligned in the correct reading frame. In other
-    words, all gaps in the alignment should be a multiple of 3 in length, and
-    the alignment should start at the first position of a codon. If the
-    alignment has extra columns at the start, middle or end, they should be
-    removed or excluded with a Nexus exset (see the FAQ for an example of exset
-    usage). The correct Genetic Code must also be set.
-
-
-
-
-**Datatype - nucleotide**
-
-**Rate matrix**
-
-    The number of relative substitution rate parameters (note that the number of
-    free parameters is this value minus one). Equivalent to the “nst” setting in
-    PAUP* and MrBayes. 1rate assumes that substitutions between all pairs of
-    nucleotides occur at the same rate (JC model), 2rate allows different rates
-    for transitions and transversions (K2P or HKY models), and 6rate allows a
-    different rate between each nucleotide pair (GTR). These rates are estimated
-    unless the fixed option is chosen. Since version 0.96, parameters for any
-    submodel of the GTR model may be estimated. The format for specifying this
-    is very similar to that used in the “rclass’ setting of PAUP*. Within
-    parentheses, six letters are specified, with spaces between them. The six
-    letters represent the rates of substitution between the six pairs of
-    nucleotides, with the order being A-C, A-G, A-T, C-G, C-T and G-T. Letters
-    within the parentheses that are the same mean that a single parameter is
-    shared by multiple nucleotide pairs.
-
-
-**State frequences**
-
-    Specifies how the equilibrium state frequencies (A, C, G and T) are treated.
-    The empirical setting fixes the frequencies at their observed proportions,
-    and the other options should be self-explanatory.
-
-
-**Datatype - nucleotide or amino-acid**
-
-
-**Treatment of proportion of invariable sites parameter**
-
-    Specifies whether a parameter representing the proportion of sites that are
-    unable to change (i.e. have a substitution rate of zero) will be included.
-    This is typically referred to as 'invariant sites', but would better be
-    termed 'invariable sites'.
-
-
-**Rate heterogeneity type**
-
-    (none, gamma, gammafixed) – The model of rate heterogeneity assumed.
-    “gammafixed” requires that the alpha shape parameter is provided, and a
-    setting of “gamma” estimates it.
-
-
-**Number of discrete dN/dS categories**
-
-    The number of categories of variable rates (not including the invariant site
-    class if it is being used). Must be set to 1 if ratehetmodel is set to none.
-    Note that runtimes and memory usage scale linearly with this setting.
-
-
-**Datatype - amino-acid or codon-aminoacid**
-
-**Rate matrix**
-
-    (poisson, jones, dayhoff, wag, mtmam, mtrev) – The fixed amino acid rate
-    matrix to use. You should use the matrix that gives the best likelihood, and
-    could use a program like PROTTEST (very much like MODELTEST, but for amino
-    acid models) to determine which fits best for your data. Poisson assumes a
-    single rate of substitution between all amino acid pairs, and is a very poor
-    model.
-
-
-**Equilibrium Base Frequences **
-
-    (equal, empirical, estimate, fixed, jones, dayhoff, wag, mtmam, mtrev) –
-    Specifies how the equilibrium state frequencies of the 20 amino acids are
-    treated. The “empirical” option fixes the frequencies at their observed
-    proportions (when describing a model this is often termed '+F').
-
-
-**Number of discrete dN/dS categories**
-
-    The number of categories of variable rates (not including the invariant site
-    class if it is being used). Must be set to 1 if ratehetmodel is set to none.
-    Note that runtimes and memory usage scale linearly with this setting.
-
-
-**Treatment of proportion of invariable sites parameter**
-
-    Specifies whether a parameter representing the proportion of sites that are
-    unable to change (i.e. have a substitution rate of zero) will be included.
-    This is typically referred to as 'invariant sites', but would better be
-    termed 'invariable sites'.
-
-
-**Datatype - codon**
-
-
-**Rate matrix**
-
-    (1rate, 2rate, 6rate, fixed, custom string) – This determines the relative
-    rates of nucleotide substitution assumed by the codon model. The options are
-    exactly the same as those allowed under a normal nucleotide model. A codon
-    model with ratematrix = 2rate specifies the standard Goldman and Yang (1994)
-    model, with different substitution rates for transitions and transversions.
-
-
-**State frequences**
-
-    The options are to use equal codon frequencies (not a good option), the
-    frequencies observed in your dataset (termed “empirical” in GARLI), or the
-    codon frequencies implied by the “F1x4” or “F3x4” methods (using PAML
-    terminology). These last two options calculate the codon frequencies as the
-    product of the frequencies of the three nucleotides that make up each codon.
-    In the “F1x4” case the nucleotide frequencies are those observed in the
-    dataset across all codon positions, while the “F3x4” option uses the
-    nucleotide frequencies observed in the data at each codon position
-    separately.
-
-
-**Rate Heterogeneity Type**
-
-    For codon models, the default is to infer a single dN/dS parameter.
-    Alternatively, a model can be specified that infers a given number of dN/dS
-    categories, with the dN/dS values and proportions falling in each category
-    estimated (ratehetmodel = nonsynonymous). This is the 'discrete' or 'M3'
-    model of Yang et al., 2000.
-
-
-**Number of discrete dN/dS categories**
-
-    When ratehetmodel = nonsynonymous, this is the number of dN/dS parameter
-    categories.
-
-
-**Datatype - codon or codon-aminoacid**
-
-
-**Genetic code**
-
-    The genetic code to be used in translating codons into amino acids.
-
-
-**Population Settings**
-
-
-**Number of individuals in population**
-
-    The number of individuals in the population. This may be increased, but
-    doing so is generally not beneficial. Note that typical genetic algorithms
-    tend to have much, much larger population sizes than GARLI defaults.
-
-
-**Unmutated copies of best individual**
-
-    The number of times the best individual is copied to the next generation
-    with no chance of mutation. It is best not to mess with this setting.
-
-
-**Strength of selection**
-
-    Controls the strength of selection, with larger numbers denoting stronger
-    selection. The relative probability of reproduction of two individuals
-    depends on the difference in their log likelihoods (ΔlnL) and is formulated
-    very similarly to the procedure of calculating Akaike weights.
-
-
-**Fitness handicap for the best individual**
-
-    This can be used to bias the probability of reproduction of the best
-    individual downward. Because the best individual is automatically copied
-    into the next generation, it has a bit of an unfair advantage and can cause
-    all population variation to be lost due to genetic drift, especially with
-    small populations sizes. The value specified here is subtracted from the
-    best individual’s lnL score before calculating the probabilities of
-    reproduction. It seems plausible that this might help maintain variation,
-    but I have not seen it cause a measurable effect.
-
-
-**Maximum number of generations to run**
-
-    Use if automatic termination is desired to prevent a runaway process.
-
-
-**Maximum time to run**
-
-    The maximum number of seconds for the run to continue. Use if automatic
-    termination is desired to prevent a runaway process.
-
-
-**Branch-length optimization settings**
-
-
-**Minimal optimization precision**
-
-    The minimum allowed value of the optimization precision - must not be larger
-    then the Starting optimization precision.
-
-
-**Number of steps down from Start Precision to Minimum Precision**
-
-    Specify the number of steps that it will take for the optimization precision
-    to decrease (linearly) from startoptrec to minoptprec.
-
-
-**Tree rejection threshold**
-
-    This setting controls which trees have more extensive branch-length
-    optimization applied to them. All trees created by a branch swap receive
-    optimization on a few branches that directly took part in the rearrangement.
-    If the difference in score between the partially optimized tree and the best
-    known tree is greater than treerejectionthreshold, no further optimization
-    is applied to the branches of that tree. Reducing this value can
-    significantly reduce runtimes, often with little or no effect on results.
-    However, it is possible that a better tree could be missed if this is set
-    too low. In cases in which obtaining the very best tree per search is not
-    critical (e.g., bootstrapping), setting this lower (~20) is probably safe.
-
-
-**Settings controlling the proportions of the mutation types**
-
-
-**Weight on topology mutations**
-
-    The prior weight assigned to the class of topology mutations (NNI, SPR and
-    limSPR). Note that setting this to 0.0 turns off topology mutations, meaning
-    that the tree topology is fixed for the run. This used to be a way to have
-    the program estimate only model parameters and branch-lengths, but the
-    optimizeinputonly setting is now a better way to go.
-
-
-**Weight on model parameter mutations**
-
-    The prior weight assigned to the class of model mutations. Note that setting
-    this at 0.0 fixes the model during the run.
-
-
-**Weight on branch-length parameter mutations**
-
-    The prior weight assigned to branch-length mutations. The same procedure
-    used above to determine the proportion of Topology:Model:Branch-Length
-    mutations is also used to determine the relative proportions of the three
-    types of topological mutations (NNI:SPR:limSPR), controlled by the following
-    three weights. Note that the proportion of mutations applied to each of the
-    model parameters is not user controlled.
-
-
-**Weight on NNI topology changes**
-
-    The prior weight assigned to NNI mutations
-
-
-**Weight on SPR topology changes**
-
-    The prior weight assigned to random SPR mutations. For very large datasets
-    it is often best to set this to 0.0, as random SPR mutations essentially
-    never result in score increases.
-
-
-**Weight on localized SPR topology changes**
-
-    The prior weight assigned to SPR mutations with the reconnection branch
-    limited to being a maximum of limsprrange branches away from where the
-    branch was detached.
-
-
-**Interval Length**
-
-    The number of generations in each interval during which the number and
-    benefit of each mutation type are stored.
-
-
-**Number of intervals to store**
-
-    The number of intervals to be stored. Thus, records of mutations are kept
-    for the last (intervallength x intervalstostore) generations. Every
-    intervallength generations the probabilities of the mutation types are
-    updated by the scheme described above.
-
-
-**Settings controlling mutation details**
-
-
-**Max range for localized SPR topology changes**
-
-    The maximum number of branches away from its original location that a branch
-    may be reattached during a limited SPR move. Setting this too high (&gt; 10)
-    can seriously degrade performance, but if you do so in conjunction with a
-    large increase in genthreshfort.
-
-
-**Settings controlling mutation details**
-
-    The mean of the binomial distribution from which the number of branch
-    lengths mutated is drawn during a branch length mutation.
-
-
-**Magnitude of branch-length mutations**
-
-    The shape parameter of the gamma distribution (with a mean of 1.0) from
-    which the branch-length multipliers are drawn for branch-length mutations.
-    Larger numbers cause smaller changes in branch lengths. (Note that this has
-    nothing to do with gamma rate heterogeneity.)
-
-
-**Magnitude of model parameter mutations**
-
-    The shape parameter of the gamma distribution (with a mean of 1.0) from
-    which the model mutation multipliers are drawn for model parameters
-    mutations. Larger numbers cause smaller changes in model parameters. (Note
-    that this has nothing to do with gamma rate heterogeneity.)
-
-
-**Relative weight assigned to already attempted branch swaps**
-
-    With version 0.95 and later, GARLI keeps track of which branch swaps it has
-    attempted on the current best tree. Because swaps are applied randomly, it
-    is possible that some swaps are tried twice before others are tried at all.
-    This option allows the program to bias the swaps applied toward those that
-    have not yet been attempted. Each swap is assigned a relative weight
-    depending on the number of times that it has been attempted on the current
-    best tree. This weight is equal to (uniqueswapbias) raised to the (# times
-    swap attempted) power. In other words, a value of 0.5 means that swaps that
-    have already been tried once will be half as likely as those not yet
-    attempted, swaps attempted twice will be ¼ as likely, etc. A value of 1.0
-    means no biasing. Use of this option may allow the use of somewhat larger
-    values of limsprrange.
-
-
-**Relative weight assigned to branch swaps based on locality**
-
-    This option is similar to uniqueswapbias, except that it biases toward
-    certain swaps based on the topological distance between the initial and
-    rearranged trees. The distance is measured as in the limsprrange, and is
-    half the the Robinson-Foulds distance between the trees. As with
-    uniqueswapbias, distanceswapbias assigns a relative weight to each potential
-    swap. In this case the weight is (distanceswapbias) raised to the
-    (reconnection distance - 1) power. Thus, given a value of 0.5, the weight of
-    an NNI is 1.0, the weight of an SPR with distance 2 is 0.5, with distance 3
-    is 0.25, etc. Note that values less than 1.0 bias toward more localized
-    swaps, while values greater than 1.0 bias toward more extreme swaps. Also
-    note that this bias is only applied to limSPR rearrangements. Be careful in
-    setting this, as extreme values can have a very large effect.
+https://www.nescent.org/wg_garli/GARLI_Configuration_Settings. Please see that
+page for more details.
 
 </help>
 </tool>