comparison garli.xml @ 0:4025ba8b84d6

Uploaded Tool Config file
author malex
date Fri, 02 Dec 2011 17:06:40 -0500
parents
children 681e9bb51cc4
comparison
equal deleted inserted replaced
-1:000000000000 0:4025ba8b84d6
1 <tool id="Garli" name="Garli" version="2.0" force_history_refresh="True">
2 <description> phylogenetic inference using the maximum-likelihood</description>
3 ## The command is a Cheetah template which allows some Python based syntax.
4 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
5 ## Arguments to the wrapper beyond the config file are just for Galaxy's benefit - all filenames are hardcoded
6 <command interpreter="python">garli_wrapper.py $garli_conf $best_all_tre $best_tre $log00_log $screen_log </command>
7 <inputs>
8 <param name="datafname" format="nexus" type="data" label="Nexus formated sequence file" force_select="true"/>
9 <conditional name="choose_search_type">
10 <param name="search_type" type="select" label="Analysis Type">
11 <option value="mlsearch" selected="true">ML Search</option>
12 <option value="bootstrap">Bootstrap</option>
13 </param>
14 <when value="mlsearch">
15 <param name="searchreps" type="integer" size="4" value="1" label="Number of replicates">
16 <validator type="in_range" message="(1-infinity)" min="1" max="inf"/>
17 </param>
18 <param name="bootstrapreps" type="hidden" value="0" />
19 <param name="resampleproportion" type="float"
20 value="1.0" label="Relative size of resample data
21 matrix (0.1-10.0)">
22 <validator type="in_range" message="(0.1-10.0)" min="0.1" max="10.0"/>
23 </param>
24 </when>
25 <when value="bootstrap">
26 <param name="searchreps" type="hidden" value="0" />
27 <param name="bootstrapreps" type="integer" size="4"
28 value="1" label="Number of replicates">
29 <validator type="in_range" message="(1-infinity)" min="1" max="inf"/>
30 </param>
31 </when>
32 </conditional>
33 <param name="constraintfile" type="data" format="text" label="Constraint file" optional="true"/>
34 <conditional name="choose_streefname">
35 <param name="streefname_menu" type="select" label="Source of starting tree and/or model">
36 <option value="stepwise" selected="true">Stepwise</option>
37 <option value="random">Random</option>
38 <option value="file">User Tree</option>
39 </param>
40 <when value="stepwise">
41 <param name="streefname" type="hidden" value="stepwise"/>
42 </when>
43 <when value="random">
44 <param name="streefname" type="hidden" value="random"/>
45 </when>
46 <when value="file">
47 <param name="streefname" format="nexus" type="data" label="Starting Tree File"/>
48 </when>
49 </conditional>
50 <param name="attachmentspertaxon" size="4" type="integer" value="50"
51 label="Attachment branches evaluated per taxon (min=1)" >
52 <validator type="in_range" message="(1-infinity)" min="1" max="inf"/>
53 </param>
54 <param name="randseed" type="text" size="4" value="-1" label="Random Seed (-1 or
55 int)">
56 <validator type="in_range" message="(-1 to infinity)" min="-1" max="inf"/>
57 </param>
58 <param name="availablememory" size="4" value="512" type="integer"
59 label="Available Memory"/>
60 <param name="refinestart" type="select" label="Perform
61 initial rough optimization">
62 <option value="1" selected="true">Yes</option>
63 <option value="0">No</option>
64 </param>
65 <param name="outgroup" type="integer" size="20" value="1" label="Outgroup taxa
66 numbers"/>
67 <param name="collapsebranches" type="select" label="Collapse
68 Branches">
69 <option value="1" selected="true">Yes</option>
70 <option value="0">No</option>
71 </param>
72
73 <conditional name="choose_datatype">
74 <param name="datatype" type="select" label="Model Type">
75 <option value="nucleotide" selected="true">Nucleotide</option>
76 <option value="aminoacid">Amino Acid</option>
77 <option value="codon-aminoacid">Codon-Amino Acid</option>
78 <option value="codon">Codon</option>
79 </param>
80 <when value="nucleotide">
81 <conditional name="choose_ratematrix">
82 <param name="ratematrix" type="select" label="Rate Matrix">
83 <option value="1rate">1rate</option>
84 <option value="2rate">2rate</option>
85 <option value="6rate" selected="true">6rate</option>
86 <option value="fixed">fixed</option>
87 <option value="custom">custom</option>
88 </param>
89 <when value="1rate" />
90 <when value="2rate" />
91 <when value="6rate" />
92 <when value="fixed" />
93 <when value="custom">
94 <param name="ratematrix" type="text" size="20" value="(a b a a b a)" label="Custom Rate Matrix"/>
95 </when>
96 </conditional>
97 <param name="statefrequencies" type="select" label="State
98 Frequencies">
99 <option value="equal">Equal</option>
100 <option value="empirical">Empirical</option>
101 <option value="estimate" selected="true">Estimate</option>
102 <option value="fixed">Fixed</option>
103 </param>
104 <param name="ratehetmodel" type="select" label="Rate Heterogeneity
105 Type">
106 <option value="none">None</option>
107 <option value="gamma" selected="true">Gamma</option>
108 <option value="gammafixed">Gamma Fixed</option>
109 </param>
110 <param name="numratecats" type="integer" size="2" value="4"
111 label="Number of discrete dN/dS categories">
112 <validator type="in_range" message="(1-20)" min="1" max="20"/>
113 </param>
114 <param name="invariantsites" type="select" label="Treatment of
115 proportion of invariable sites parameter">
116 <option value="none">None</option>
117 <option value="estimate" select="true">Estimate</option>
118 <option value="fixed">Fixed</option>
119 </param>
120 </when>
121 <when value="aminoacid">
122 <param name="ratematrix" type="select" label="Rate Matrix">
123 <option value="poisson">Poisson</option>
124 <option value="jones" selected="true">Jones</option>
125 <option value="dayhoff">Dayhoff</option>
126 <option value="wag">WAG</option>
127 <option value="mtmam">mtmam</option>
128 <option value="mtrev">mtREV</option>
129 </param>
130 <param name="statefrequences" type="select" label="Equilibrium Base
131 Frequences">
132 <option value="equal">Equal</option>
133 <option value="empirical" selected="true">Empirical</option>
134 <option value="estimate">Estimate</option>
135 <option value="fixed">Fixed</option>
136 <option value="jones">Jones</option>
137 <option value="dayhoff">Dayhoff</option>
138 <option value="wag">WAG</option>
139 <option value="mtmam">mtmam</option>
140 <option value="mtrev">mtREV</option>
141 </param>
142 <param name="numratecats" type="integer" size="2" value="4"
143 label="Number of discrete dN/dS categories">
144 <validator type="in_range" message="(1-20)" min="1" max="20"/>
145 </param>
146 <param name="invariantsites" type="select" label="Treatment of
147 proportion of invariable sites parameter">
148 <option value="none">None</option>
149 <option value="estimate" select="true">Estimate</option>
150 <option value="fixed">Fixed</option>
151 </param>
152 </when>
153 <when value="codon-aminoacid">
154 <param name="ratematrix" type="select" label="Rate Matrix">
155 <option value="poisson">Poisson</option>
156 <option value="jones" selected="true">Jones</option>
157 <option value="dayhoff">Dayhoff</option>
158 <option value="wag">WAG</option>
159 <option value="mtmam">mtmam</option>
160 <option value="mtrev">mtREV</option>
161 </param>
162 <param name="statefrequences" type="select" label="Equilibrium Base
163 Frequences">
164 <option value="equal">Equal</option>
165 <option value="empirical" selected="true">Empirical</option>
166 <option value="estimate">Estimate</option>
167 <option value="fixed">Fixed</option>
168 <option value="jones">Jones</option>
169 <option value="dayhoff">Dayhoff</option>
170 <option value="wag">WAG</option>
171 <option value="mtmam">mtmam</option>
172 <option value="mtrev">mtREV</option>
173 </param>
174 <param name="geneticcode" type="select" label="Genetic Code">
175 <option value="standard" selected="true">Standard</option>
176 <option value="vertmito">Vertmito</option>
177 <option value="invertmito">Invertmito</option>
178 </param>
179 </when>
180 <when value="codon">
181 <conditional name="choose_ratematrix">
182 <param name="ratematrix" type="select" label="Rate Matrix">
183 <option value="1rate">1rate</option>
184 <option value="2rate">2rate</option>
185 <option value="6rate" selected="true">6rate</option>
186 <option value="fixed">fixed</option>
187 <option value="custom">custom</option>
188 </param>
189 <when value="1rate" />
190 <when value="2rate" />
191 <when value="6rate" />
192 <when value="fixed" />
193 <when value="custom">
194 <param name="ratematrix" type="text" size="20" value="(a b a a b a)" label="Custom Rate Matrix"/>
195 </when>
196 </conditional>
197 <param name="statefrequencies" type="select" label="State
198 Frequencies">
199 <option value="equal">Equal</option>
200 <option value="empirical" selected="true">Empirical</option>
201 <option value="f1x4">F1x4</option>
202 <option value="f3x4">F3x4</option>
203 </param>
204 <param name="ratehetmodel" type="select" label="Rate Heterogeneity
205 Type">
206 <option value="none" selected="true">None</option>
207 <option value="nonsynonymous">Non-synonymous</option>
208 </param>
209 <param name="numratecats" type="integer" size="2" value="1"
210 label="Number of discrete dN/dS categories">
211 <validator type="in_range" message="(1-20)" min="1" max="20"/>
212 </param>
213 <param name="invariantsites" type="hidden" value="none"/>
214 <param name="geneticcode" type="select" label="Genetic Code">
215 <option value="standard" selected="true">Standard</option>
216 <option value="vertmito">Vertmito</option>
217 <option value="invertmito">Invertmito</option>
218 </param>
219 </when>
220 </conditional>
221 <param name="nindivs" type="integer" size="3" value="4" label="Number of individuals in population">
222 <validator type="in_range" message="(2-100)" min="2" max="100"/>
223 </param>
224 <param name="holdover" type="integer" size="2" value="1" label="Unmutated copies of
225 best individual">
226 <validator type="in_range" message="(1-99)" min="1" max="99"/>
227 </param>
228 <param name="selectionintensity" type="float" size="3" value="0.5" label="Strength of
229 selection">
230 <validator type="in_range" message="(0.1-5.0)" min="0.1" max="5.0"/>
231 </param>
232 <param name="holdoverpenalty" type="integer" size="3" value="0" label="Fitness
233 handicap for best individual">
234 <validator type="in_range" message="(0-100)" min="0" max="100"/>
235 </param>
236 <param name="stopgen" type="integer" size="10" value="5000000" label="Maximum number
237 of generations to run">
238 <validator type="in_range" message="(0-50000000)" min="0" max="50000000"/>
239 </param>
240 <param name="stoptime" type="integer" size="10" value="5000000" label="Maximum time to
241 run">
242 <validator type="in_range" message="(0-50000000)" min="0" max="50000000"/>
243 </param>
244 <param name="startoptprec" type="float" size="5" value="0.5" label="Starting
245 optimization precision">
246 <validator type="in_range" message="(0.005-5.0)" min="0.005" max="5.0"/>
247 </param>
248 <param name="minoptprec" type="float" size="5" value="0.01" label="Minimal
249 optimization precision">
250 <validator type="in_range" message="(0.001-5.0)" min="0.001" max="5.0"/>
251 </param>
252 <param name="numberofprecreductions" type="integer" size="3" value="10"
253 label="Number of steps down from Start Precision to Minimum Precision"
254 >
255 <validator type="in_range" message="(0-100)" min="0" max="100"/>
256 </param>
257 <param name="treerejectionthreshold" type="float" size="5" value="50.0"
258 label="Tree Rejection Threshold">
259 <validator type="in_range" message="(0-500.0)" min="0" max="500.0"/>
260 </param>
261 <param name="topoweight" type="float" size="10" value="1.0"
262 label="Weight on topology mutations">
263 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
264 </param>
265 <param name="modweight" type="float" size="10" value="0.05"
266 label="Weight on model parameter mutations">
267 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
268 </param>
269 <param name="brlenweight" type="float" size="10" value="0.2"
270 label="Weight on branch-length parameter mutations">
271 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
272 </param>
273 <param name="randnniweight" type="float" size="10" value="0.1"
274 label="Weight on NNI topology changes">
275 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
276 </param>
277 <param name="randsprweight" type="float" size="10" value="0.3"
278 label="Weight on SPR topology changes">
279 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
280 </param>
281 <param name="limsprweight" type="float" size="10" value="0.6"
282 label="Weight on localized SPR topology changes">
283 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
284 </param>
285 <param name="intervallength" type="integer" size="4" value="100"
286 label="Interval Length">
287 <validator type="in_range" message="(0-1000)" min="0" max="1000"/>
288 </param>
289 <param name="intervalstostore" type="integer" size="2" value="5"
290 label="Number of intervals to store">
291 <validator type="in_range" message="(0-1000)" min="0" max="10"/>
292 </param>
293 <param name="limsprrange" type="integer" size="10" value="6"
294 label="Max range for localized SPR topology changes">
295 <validator type="in_range" message="(0-1000)" min="0" max="inf"/>
296 </param>
297 <param name="meanbrlenmuts" type="integer" size="7" value="5"
298 label="Mean number of branch lengths to change per mutation">
299 <validator type="in_range" message="(0-# of taxa)" min="0" max="10000000000"/>
300 </param>
301 <param name="gammashapebrlen" type="integer" size="4" value="1000"
302 label="Magnitude of branch-length mutations">
303 <validator type="in_range" message="(50-2000)" min="50" max="2000"/>
304 </param>
305 <param name="gammashapemodel" type="integer" size="4" value="1000"
306 label="Magnitude of model parameter mutations">
307 <validator type="in_range" message="(50-2000)" min="50" max="2000"/>
308 </param>
309 <param name="uniqueswapbias" type="float" size="4" value="0.1"
310 label="Relative weight assigned to already attempted branch swaps"
311 >
312 <validator type="in_range" message="(0.01-1.0)" min="0.01" max="1.0"/>
313 </param>
314 <param name="distanceswapbias" type="float" size="3" value="1.0"
315 label="Relative weight assigned to branch swaps based on locality">
316 <validator type="in_range" message="(0.1-10.0)" min="0.1" max="10.0"/>
317 </param>
318 </inputs>
319 <outputs>
320 <data format="nexus" name="best_tre" metadata_source="datafname" from_work_dir="garli.best.tre" label="${tool.name} on ${on_string}: garli.best.tre"/>
321 <data format="nexus" name="best_all_tre" metadata_source="datafname" from_work_dir="garli.best.all.tre" label="${tool.name} on ${on_string}: garli.all.best.tre"/>
322 <data format="txt" name="garli_conf" from_work_dir="garli.conf" label="${tool.name} on ${on_string}: garli.conf"/>
323 <data format="txt" name="log00_log" from_work_dir="garli.log00.log" label="${tool.name} on ${on_string}: garli.log00.log"/>
324 <data format="txt" name="screen_log" from_work_dir="garli.screen.log" label="${tool.name} on ${on_string}: garli.screen.log"/>
325 </outputs>
326 <configfiles>
327 <configfile name="garli_conf">
328 [general]
329 datafname = ${datafname}
330 searchreps = ${choose_search_type.searchreps}
331 bootstrapreps = ${choose_search_type.bootstrapreps}
332 constraintfile = ${constraintfile}
333 streefname = ${choose_streefname.streefname}
334 attachmentspertaxon = ${attachmentspertaxon}
335 ofprefix = garli
336 randseed = ${randseed}
337 availablememory = ${availablememory}
338 logevery = 10
339 writecheckpoints = 0
340 saveevery = 100
341 refinestart = ${refinestart}
342 outputeachbettertopology = 0
343 outputcurrentbesttopology = 0
344 enforcetermconditions = 1
345 genthreshfortopoterm = 20000
346 scorethreshforterm = 0.05
347 significanttopochange = 0.01
348 outputphyliptree = 0
349 outputmostlyuselessfiles = 0
350 restart = 0
351 outgroup = ${outgroup}
352 resampleproportion = ${choose_search_type.resampleproportion}
353 inferinternalstateprobs = 0
354 outputsitelikelihoods = 0
355 optimizeinputonly = 0
356 collapsebranches = ${collapsebranches}
357
358 [model1]
359 datatype = ${choose_datatype.datatype}
360 ratematrix = ${choose_datatype.choose_ratematrix.ratematrix}
361 statefrequencies = ${choose_datatype.statefrequencies}
362 ratehetmodel = ${choose_datatype.ratehetmodel}
363 #if $choose_datatype.numratecats > 0
364 numratecats = ${choose_datatype.numratecats}
365 #end if
366 #if $choose_datatype.invariantsites != ""
367 invariantsites = ${choose_datatype.invariantsites}
368 #end if
369
370 [master]
371 nindivs = $nindivs
372 holdover = $holdover
373 selectionintensity = $selectionintensity
374 holdoverpenalty = $holdoverpenalty
375 stopgen = $stopgen
376 stoptime = $stoptime
377
378 startoptprec = $startoptprec
379 minoptprec = $minoptprec
380 numberofprecreductions = $numberofprecreductions
381 treerejectionthreshold = $treerejectionthreshold
382 topoweight = $topoweight
383 modweight = $modweight
384 brlenweight = $brlenweight
385 randnniweight = $randnniweight
386 randsprweight = $randsprweight
387 limsprweight = $limsprweight
388 intervallength = $intervallength
389 intervalstostore = $intervalstostore
390 limsprrange = $limsprrange
391 meanbrlenmuts = $meanbrlenmuts
392 gammashapebrlen = $gammashapebrlen
393 gammashapemodel = $gammashapemodel
394 uniqueswapbias = $uniqueswapbias
395 distanceswapbias = 1.0
396 </configfile>
397 </configfiles>
398
399 <help>
400
401 **What it does**
402
403 GARLI is a program that performs phylogenetic inference using the
404 maximum-likelihood criterion. Several sequence types are supported,
405 including nucleotide, amino acid and codon. Version 2.0 adds support
406 for partitioned models and morphology-like datatypes.
407
408 Garli is written and maintained by Derrick Zwickl
409
410 Configuration options are adapted from
411 https://www.nescent.org/wg_garli/GARLI_Configuration_Settings
412
413 -----
414
415 **Detailed description of the configuration options**
416
417
418 **Analysis Type**
419
420 Specify whether to perform a maximum likelihood search for the best tree, or
421 a bootstrap analysis.
422
423
424 **Number of replicates**
425
426 Number of independent search replicates to run.
427
428
429 **Relative size of resample data**
430
431 This setting allows for bootstrap-like resampling, but with the
432 psuedoreplicate datasets having the number of alignment columns different
433 from the real data. Setting values below 1.0 is somewhat similar to
434 jackknifing, but not identical.
435
436
437 **Attachment branches evaluated per taxon (min=1)**
438
439 The number of attachment branches evaluated for each taxon to be added to
440 the tree during the creation of an ML stepwise-addition starting tree.
441 Briefly, stepwise addition is an algorithm used to make a tree, and involves
442 adding taxa in a random order to a growing tree. For each taxon to be added,
443 a number of randomly chosen attachment branches are tried and scored, and
444 then the best scoring one is chosen as the location of that taxon. This
445 setting controls how many attachment points are evaluated for each taxon to
446 be added. A value of one is equivalent to a completely random tree (only one
447 randomly chosen location is evaluated). A value of greater than 2 times the
448 number of taxa in the dataset means that all attachment points will be
449 evaluated for each taxon, and will result in very good starting trees (but
450 may take a while on large datasets). Even fairly small values (less than 10)
451 can result in starting trees that are much, much better than random, but
452 still fairly different from one another.
453
454
455 **Constraint file**
456
457 Select a file containing constraint specifications.
458
459
460 **Random seed**
461
462 Random see can have a value of -1 or a positive integer. The random number
463 seed used by the random number generator. Specify “–1” to have a seed chosen
464 for you. Specifying the same seed number in multiple runs will give exactly
465 identical results, if all other parameters and settings are also identical.
466
467
468 **Available memory**
469
470 This lets GARLI determine how much system memory it may be able to use to
471 store computations for reuse.
472
473
474 **Perform initial rough optimization**
475
476 Specifies whether some initial rough optimization is performed on the
477 starting branch lengths and rate heterogeneity parameters. This is always
478 recommended.
479
480
481 **Outgroup taxa numbers**
482
483 The outgroup option allows for orienting tree topologies in a consistent way
484 when they are written to a file. Note that this has NO effect whatsoever on
485 the actual inference and the specified outgroup is NOT constrained to be
486 present in the inferred trees. If multiple outgroup taxa are specified and
487 they do not form a monophyletic group, this setting will be ignored. If you
488 specify a single outgroup taxon it will always be present, and the tree will
489 always be consistently oriented. To specify an outgroup consisting of taxa
490 1, 3 and 5 the format is this: outgroup = 1 3 5. Dashes are used for ranges
491 e.g. 1-3 5.
492
493
494 **Collapse branches**
495
496 Before version 1.0, all trees that are returned were fully resolved. This is
497 true even if the maximum-likelihood estimate of some internal branch lengths
498 are effectively zero (or GARLI's minimum, which is 1e-8). In such cases,
499 collapsing the branch into a polytomy would be a better representation. Note
500 that GARLI will never return a tree with an actual branch length of zero,
501 but rather with its minimum value of 1.0e-8. The drawback of always
502 returning fully resolved trees is that what is effectively a polytomy can be
503 resolved in three ways, and different independent searches may randomly
504 return one of those resolutions. Thus, if you compare the trees by topology
505 only, they will look different. If you pay attention to the branch lengths
506 and likelihood scores of the trees it will be apparent that they are
507 effectively the same. I think that collapsing of branches is particularly
508 important when bootstrapping, since no support should be given to a branch
509 that doesn't really exist, i.e., that is a random resolution of a polytomy.
510 Collapsing is also good when calculating tree to tree distances such as the
511 symmetric tree distance, for example when calculating phylogenetic error to
512 a known target tree. Zero-length branches would add to the distances
513 (~error) although they really should not.
514
515
516 **Model type**
517
518 The codon-aminoacid datatype means that the data will be supplied as a
519 nucleotide alignment, but will be internally translated and analyzed using
520 an amino acid model. The codon and codon-aminoacid datatypes require
521 nucleotide sequence that is aligned in the correct reading frame. In other
522 words, all gaps in the alignment should be a multiple of 3 in length, and
523 the alignment should start at the first position of a codon. If the
524 alignment has extra columns at the start, middle or end, they should be
525 removed or excluded with a Nexus exset (see the FAQ for an example of exset
526 usage). The correct Genetic Code must also be set.
527
528
529
530
531 **Datatype - nucleotide**
532
533 **Rate matrix**
534
535 The number of relative substitution rate parameters (note that the number of
536 free parameters is this value minus one). Equivalent to the “nst” setting in
537 PAUP* and MrBayes. 1rate assumes that substitutions between all pairs of
538 nucleotides occur at the same rate (JC model), 2rate allows different rates
539 for transitions and transversions (K2P or HKY models), and 6rate allows a
540 different rate between each nucleotide pair (GTR). These rates are estimated
541 unless the fixed option is chosen. Since version 0.96, parameters for any
542 submodel of the GTR model may be estimated. The format for specifying this
543 is very similar to that used in the “rclass’ setting of PAUP*. Within
544 parentheses, six letters are specified, with spaces between them. The six
545 letters represent the rates of substitution between the six pairs of
546 nucleotides, with the order being A-C, A-G, A-T, C-G, C-T and G-T. Letters
547 within the parentheses that are the same mean that a single parameter is
548 shared by multiple nucleotide pairs.
549
550
551 **State frequences**
552
553 Specifies how the equilibrium state frequencies (A, C, G and T) are treated.
554 The empirical setting fixes the frequencies at their observed proportions,
555 and the other options should be self-explanatory.
556
557
558 **Datatype - nucleotide or amino-acid**
559
560
561 **Treatment of proportion of invariable sites parameter**
562
563 Specifies whether a parameter representing the proportion of sites that are
564 unable to change (i.e. have a substitution rate of zero) will be included.
565 This is typically referred to as 'invariant sites', but would better be
566 termed 'invariable sites'.
567
568
569 **Rate heterogeneity type**
570
571 (none, gamma, gammafixed) – The model of rate heterogeneity assumed.
572 “gammafixed” requires that the alpha shape parameter is provided, and a
573 setting of “gamma” estimates it.
574
575
576 **Number of discrete dN/dS categories**
577
578 The number of categories of variable rates (not including the invariant site
579 class if it is being used). Must be set to 1 if ratehetmodel is set to none.
580 Note that runtimes and memory usage scale linearly with this setting.
581
582
583 **Datatype - amino-acid or codon-aminoacid**
584
585 **Rate matrix**
586
587 (poisson, jones, dayhoff, wag, mtmam, mtrev) – The fixed amino acid rate
588 matrix to use. You should use the matrix that gives the best likelihood, and
589 could use a program like PROTTEST (very much like MODELTEST, but for amino
590 acid models) to determine which fits best for your data. Poisson assumes a
591 single rate of substitution between all amino acid pairs, and is a very poor
592 model.
593
594
595 **Equilibrium Base Frequences **
596
597 (equal, empirical, estimate, fixed, jones, dayhoff, wag, mtmam, mtrev) –
598 Specifies how the equilibrium state frequencies of the 20 amino acids are
599 treated. The “empirical” option fixes the frequencies at their observed
600 proportions (when describing a model this is often termed '+F').
601
602
603 **Number of discrete dN/dS categories**
604
605 The number of categories of variable rates (not including the invariant site
606 class if it is being used). Must be set to 1 if ratehetmodel is set to none.
607 Note that runtimes and memory usage scale linearly with this setting.
608
609
610 **Treatment of proportion of invariable sites parameter**
611
612 Specifies whether a parameter representing the proportion of sites that are
613 unable to change (i.e. have a substitution rate of zero) will be included.
614 This is typically referred to as 'invariant sites', but would better be
615 termed 'invariable sites'.
616
617
618 **Datatype - codon**
619
620
621 **Rate matrix**
622
623 (1rate, 2rate, 6rate, fixed, custom string) – This determines the relative
624 rates of nucleotide substitution assumed by the codon model. The options are
625 exactly the same as those allowed under a normal nucleotide model. A codon
626 model with ratematrix = 2rate specifies the standard Goldman and Yang (1994)
627 model, with different substitution rates for transitions and transversions.
628
629
630 **State frequences**
631
632 The options are to use equal codon frequencies (not a good option), the
633 frequencies observed in your dataset (termed “empirical” in GARLI), or the
634 codon frequencies implied by the “F1x4” or “F3x4” methods (using PAML
635 terminology). These last two options calculate the codon frequencies as the
636 product of the frequencies of the three nucleotides that make up each codon.
637 In the “F1x4” case the nucleotide frequencies are those observed in the
638 dataset across all codon positions, while the “F3x4” option uses the
639 nucleotide frequencies observed in the data at each codon position
640 separately.
641
642
643 **Rate Heterogeneity Type**
644
645 For codon models, the default is to infer a single dN/dS parameter.
646 Alternatively, a model can be specified that infers a given number of dN/dS
647 categories, with the dN/dS values and proportions falling in each category
648 estimated (ratehetmodel = nonsynonymous). This is the 'discrete' or 'M3'
649 model of Yang et al., 2000.
650
651
652 **Number of discrete dN/dS categories**
653
654 When ratehetmodel = nonsynonymous, this is the number of dN/dS parameter
655 categories.
656
657
658 **Datatype - codon or codon-aminoacid**
659
660
661 **Genetic code**
662
663 The genetic code to be used in translating codons into amino acids.
664
665
666 **Population Settings**
667
668
669 **Number of individuals in population**
670
671 The number of individuals in the population. This may be increased, but
672 doing so is generally not beneficial. Note that typical genetic algorithms
673 tend to have much, much larger population sizes than GARLI defaults.
674
675
676 **Unmutated copies of best individual**
677
678 The number of times the best individual is copied to the next generation
679 with no chance of mutation. It is best not to mess with this setting.
680
681
682 **Strength of selection**
683
684 Controls the strength of selection, with larger numbers denoting stronger
685 selection. The relative probability of reproduction of two individuals
686 depends on the difference in their log likelihoods (ΔlnL) and is formulated
687 very similarly to the procedure of calculating Akaike weights.
688
689
690 **Fitness handicap for the best individual**
691
692 This can be used to bias the probability of reproduction of the best
693 individual downward. Because the best individual is automatically copied
694 into the next generation, it has a bit of an unfair advantage and can cause
695 all population variation to be lost due to genetic drift, especially with
696 small populations sizes. The value specified here is subtracted from the
697 best individual’s lnL score before calculating the probabilities of
698 reproduction. It seems plausible that this might help maintain variation,
699 but I have not seen it cause a measurable effect.
700
701
702 **Maximum number of generations to run**
703
704 Use if automatic termination is desired to prevent a runaway process.
705
706
707 **Maximum time to run**
708
709 The maximum number of seconds for the run to continue. Use if automatic
710 termination is desired to prevent a runaway process.
711
712
713 **Branch-length optimization settings**
714
715
716 **Minimal optimization precision**
717
718 The minimum allowed value of the optimization precision - must not be larger
719 then the Starting optimization precision.
720
721
722 **Number of steps down from Start Precision to Minimum Precision**
723
724 Specify the number of steps that it will take for the optimization precision
725 to decrease (linearly) from startoptrec to minoptprec.
726
727
728 **Tree rejection threshold**
729
730 This setting controls which trees have more extensive branch-length
731 optimization applied to them. All trees created by a branch swap receive
732 optimization on a few branches that directly took part in the rearrangement.
733 If the difference in score between the partially optimized tree and the best
734 known tree is greater than treerejectionthreshold, no further optimization
735 is applied to the branches of that tree. Reducing this value can
736 significantly reduce runtimes, often with little or no effect on results.
737 However, it is possible that a better tree could be missed if this is set
738 too low. In cases in which obtaining the very best tree per search is not
739 critical (e.g., bootstrapping), setting this lower (~20) is probably safe.
740
741
742 **Settings controlling the proportions of the mutation types**
743
744
745 **Weight on topology mutations**
746
747 The prior weight assigned to the class of topology mutations (NNI, SPR and
748 limSPR). Note that setting this to 0.0 turns off topology mutations, meaning
749 that the tree topology is fixed for the run. This used to be a way to have
750 the program estimate only model parameters and branch-lengths, but the
751 optimizeinputonly setting is now a better way to go.
752
753
754 **Weight on model parameter mutations**
755
756 The prior weight assigned to the class of model mutations. Note that setting
757 this at 0.0 fixes the model during the run.
758
759
760 **Weight on branch-length parameter mutations**
761
762 The prior weight assigned to branch-length mutations. The same procedure
763 used above to determine the proportion of Topology:Model:Branch-Length
764 mutations is also used to determine the relative proportions of the three
765 types of topological mutations (NNI:SPR:limSPR), controlled by the following
766 three weights. Note that the proportion of mutations applied to each of the
767 model parameters is not user controlled.
768
769
770 **Weight on NNI topology changes**
771
772 The prior weight assigned to NNI mutations
773
774
775 **Weight on SPR topology changes**
776
777 The prior weight assigned to random SPR mutations. For very large datasets
778 it is often best to set this to 0.0, as random SPR mutations essentially
779 never result in score increases.
780
781
782 **Weight on localized SPR topology changes**
783
784 The prior weight assigned to SPR mutations with the reconnection branch
785 limited to being a maximum of limsprrange branches away from where the
786 branch was detached.
787
788
789 **Interval Length**
790
791 The number of generations in each interval during which the number and
792 benefit of each mutation type are stored.
793
794
795 **Number of intervals to store**
796
797 The number of intervals to be stored. Thus, records of mutations are kept
798 for the last (intervallength x intervalstostore) generations. Every
799 intervallength generations the probabilities of the mutation types are
800 updated by the scheme described above.
801
802
803 **Settings controlling mutation details**
804
805
806 **Max range for localized SPR topology changes**
807
808 The maximum number of branches away from its original location that a branch
809 may be reattached during a limited SPR move. Setting this too high (&gt; 10)
810 can seriously degrade performance, but if you do so in conjunction with a
811 large increase in genthreshfort.
812
813
814 **Settings controlling mutation details**
815
816 The mean of the binomial distribution from which the number of branch
817 lengths mutated is drawn during a branch length mutation.
818
819
820 **Magnitude of branch-length mutations**
821
822 The shape parameter of the gamma distribution (with a mean of 1.0) from
823 which the branch-length multipliers are drawn for branch-length mutations.
824 Larger numbers cause smaller changes in branch lengths. (Note that this has
825 nothing to do with gamma rate heterogeneity.)
826
827
828 **Magnitude of model parameter mutations**
829
830 The shape parameter of the gamma distribution (with a mean of 1.0) from
831 which the model mutation multipliers are drawn for model parameters
832 mutations. Larger numbers cause smaller changes in model parameters. (Note
833 that this has nothing to do with gamma rate heterogeneity.)
834
835
836 **Relative weight assigned to already attempted branch swaps**
837
838 With version 0.95 and later, GARLI keeps track of which branch swaps it has
839 attempted on the current best tree. Because swaps are applied randomly, it
840 is possible that some swaps are tried twice before others are tried at all.
841 This option allows the program to bias the swaps applied toward those that
842 have not yet been attempted. Each swap is assigned a relative weight
843 depending on the number of times that it has been attempted on the current
844 best tree. This weight is equal to (uniqueswapbias) raised to the (# times
845 swap attempted) power. In other words, a value of 0.5 means that swaps that
846 have already been tried once will be half as likely as those not yet
847 attempted, swaps attempted twice will be ¼ as likely, etc. A value of 1.0
848 means no biasing. Use of this option may allow the use of somewhat larger
849 values of limsprrange.
850
851
852 **Relative weight assigned to branch swaps based on locality**
853
854 This option is similar to uniqueswapbias, except that it biases toward
855 certain swaps based on the topological distance between the initial and
856 rearranged trees. The distance is measured as in the limsprrange, and is
857 half the the Robinson-Foulds distance between the trees. As with
858 uniqueswapbias, distanceswapbias assigns a relative weight to each potential
859 swap. In this case the weight is (distanceswapbias) raised to the
860 (reconnection distance - 1) power. Thus, given a value of 0.5, the weight of
861 an NNI is 1.0, the weight of an SPR with distance 2 is 0.5, with distance 3
862 is 0.25, etc. Note that values less than 1.0 bias toward more localized
863 swaps, while values greater than 1.0 bias toward more extreme swaps. Also
864 note that this bias is only applied to limSPR rearrangements. Be careful in
865 setting this, as extreme values can have a very large effect.
866
867 </help>
868 </tool>