0
|
1 <tool id="Garli" name="Garli" version="2.0" force_history_refresh="True">
|
|
2 <description> phylogenetic inference using the maximum-likelihood</description>
|
|
3 ## The command is a Cheetah template which allows some Python based syntax.
|
|
4 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
|
|
5 ## Arguments to the wrapper beyond the config file are just for Galaxy's benefit - all filenames are hardcoded
|
|
6 <command interpreter="python">garli_wrapper.py $garli_conf $best_all_tre $best_tre $log00_log $screen_log </command>
|
|
7 <inputs>
|
|
8 <param name="datafname" format="nexus" type="data" label="Nexus formated sequence file" force_select="true"/>
|
|
9 <conditional name="choose_search_type">
|
|
10 <param name="search_type" type="select" label="Analysis Type">
|
|
11 <option value="mlsearch" selected="true">ML Search</option>
|
|
12 <option value="bootstrap">Bootstrap</option>
|
|
13 </param>
|
|
14 <when value="mlsearch">
|
|
15 <param name="searchreps" type="integer" size="4" value="1" label="Number of replicates">
|
|
16 <validator type="in_range" message="(1-infinity)" min="1" max="inf"/>
|
|
17 </param>
|
|
18 <param name="bootstrapreps" type="hidden" value="0" />
|
|
19 <param name="resampleproportion" type="float"
|
|
20 value="1.0" label="Relative size of resample data
|
|
21 matrix (0.1-10.0)">
|
|
22 <validator type="in_range" message="(0.1-10.0)" min="0.1" max="10.0"/>
|
|
23 </param>
|
|
24 </when>
|
|
25 <when value="bootstrap">
|
|
26 <param name="searchreps" type="hidden" value="0" />
|
|
27 <param name="bootstrapreps" type="integer" size="4"
|
|
28 value="1" label="Number of replicates">
|
|
29 <validator type="in_range" message="(1-infinity)" min="1" max="inf"/>
|
|
30 </param>
|
|
31 </when>
|
|
32 </conditional>
|
|
33 <param name="constraintfile" type="data" format="text" label="Constraint file" optional="true"/>
|
|
34 <conditional name="choose_streefname">
|
|
35 <param name="streefname_menu" type="select" label="Source of starting tree and/or model">
|
|
36 <option value="stepwise" selected="true">Stepwise</option>
|
|
37 <option value="random">Random</option>
|
|
38 <option value="file">User Tree</option>
|
|
39 </param>
|
|
40 <when value="stepwise">
|
|
41 <param name="streefname" type="hidden" value="stepwise"/>
|
|
42 </when>
|
|
43 <when value="random">
|
|
44 <param name="streefname" type="hidden" value="random"/>
|
|
45 </when>
|
|
46 <when value="file">
|
|
47 <param name="streefname" format="nexus" type="data" label="Starting Tree File"/>
|
|
48 </when>
|
|
49 </conditional>
|
|
50 <param name="attachmentspertaxon" size="4" type="integer" value="50"
|
|
51 label="Attachment branches evaluated per taxon (min=1)" >
|
|
52 <validator type="in_range" message="(1-infinity)" min="1" max="inf"/>
|
|
53 </param>
|
|
54 <param name="randseed" type="text" size="4" value="-1" label="Random Seed (-1 or
|
|
55 int)">
|
|
56 <validator type="in_range" message="(-1 to infinity)" min="-1" max="inf"/>
|
|
57 </param>
|
|
58 <param name="availablememory" size="4" value="512" type="integer"
|
|
59 label="Available Memory"/>
|
|
60 <param name="refinestart" type="select" label="Perform
|
|
61 initial rough optimization">
|
|
62 <option value="1" selected="true">Yes</option>
|
|
63 <option value="0">No</option>
|
|
64 </param>
|
|
65 <param name="outgroup" type="integer" size="20" value="1" label="Outgroup taxa
|
|
66 numbers"/>
|
|
67 <param name="collapsebranches" type="select" label="Collapse
|
|
68 Branches">
|
|
69 <option value="1" selected="true">Yes</option>
|
|
70 <option value="0">No</option>
|
|
71 </param>
|
|
72
|
|
73 <conditional name="choose_datatype">
|
|
74 <param name="datatype" type="select" label="Model Type">
|
|
75 <option value="nucleotide" selected="true">Nucleotide</option>
|
|
76 <option value="aminoacid">Amino Acid</option>
|
|
77 <option value="codon-aminoacid">Codon-Amino Acid</option>
|
|
78 <option value="codon">Codon</option>
|
|
79 </param>
|
|
80 <when value="nucleotide">
|
|
81 <conditional name="choose_ratematrix">
|
|
82 <param name="ratematrix" type="select" label="Rate Matrix">
|
|
83 <option value="1rate">1rate</option>
|
|
84 <option value="2rate">2rate</option>
|
|
85 <option value="6rate" selected="true">6rate</option>
|
|
86 <option value="fixed">fixed</option>
|
|
87 <option value="custom">custom</option>
|
|
88 </param>
|
|
89 <when value="1rate" />
|
|
90 <when value="2rate" />
|
|
91 <when value="6rate" />
|
|
92 <when value="fixed" />
|
|
93 <when value="custom">
|
|
94 <param name="ratematrix" type="text" size="20" value="(a b a a b a)" label="Custom Rate Matrix"/>
|
|
95 </when>
|
|
96 </conditional>
|
|
97 <param name="statefrequencies" type="select" label="State
|
|
98 Frequencies">
|
|
99 <option value="equal">Equal</option>
|
|
100 <option value="empirical">Empirical</option>
|
|
101 <option value="estimate" selected="true">Estimate</option>
|
|
102 <option value="fixed">Fixed</option>
|
|
103 </param>
|
|
104 <param name="ratehetmodel" type="select" label="Rate Heterogeneity
|
|
105 Type">
|
|
106 <option value="none">None</option>
|
|
107 <option value="gamma" selected="true">Gamma</option>
|
|
108 <option value="gammafixed">Gamma Fixed</option>
|
|
109 </param>
|
|
110 <param name="numratecats" type="integer" size="2" value="4"
|
|
111 label="Number of discrete dN/dS categories">
|
|
112 <validator type="in_range" message="(1-20)" min="1" max="20"/>
|
|
113 </param>
|
|
114 <param name="invariantsites" type="select" label="Treatment of
|
|
115 proportion of invariable sites parameter">
|
|
116 <option value="none">None</option>
|
|
117 <option value="estimate" select="true">Estimate</option>
|
|
118 <option value="fixed">Fixed</option>
|
|
119 </param>
|
|
120 </when>
|
|
121 <when value="aminoacid">
|
|
122 <param name="ratematrix" type="select" label="Rate Matrix">
|
|
123 <option value="poisson">Poisson</option>
|
|
124 <option value="jones" selected="true">Jones</option>
|
|
125 <option value="dayhoff">Dayhoff</option>
|
|
126 <option value="wag">WAG</option>
|
|
127 <option value="mtmam">mtmam</option>
|
|
128 <option value="mtrev">mtREV</option>
|
|
129 </param>
|
|
130 <param name="statefrequences" type="select" label="Equilibrium Base
|
|
131 Frequences">
|
|
132 <option value="equal">Equal</option>
|
|
133 <option value="empirical" selected="true">Empirical</option>
|
|
134 <option value="estimate">Estimate</option>
|
|
135 <option value="fixed">Fixed</option>
|
|
136 <option value="jones">Jones</option>
|
|
137 <option value="dayhoff">Dayhoff</option>
|
|
138 <option value="wag">WAG</option>
|
|
139 <option value="mtmam">mtmam</option>
|
|
140 <option value="mtrev">mtREV</option>
|
|
141 </param>
|
|
142 <param name="numratecats" type="integer" size="2" value="4"
|
|
143 label="Number of discrete dN/dS categories">
|
|
144 <validator type="in_range" message="(1-20)" min="1" max="20"/>
|
|
145 </param>
|
|
146 <param name="invariantsites" type="select" label="Treatment of
|
|
147 proportion of invariable sites parameter">
|
|
148 <option value="none">None</option>
|
|
149 <option value="estimate" select="true">Estimate</option>
|
|
150 <option value="fixed">Fixed</option>
|
|
151 </param>
|
|
152 </when>
|
|
153 <when value="codon-aminoacid">
|
|
154 <param name="ratematrix" type="select" label="Rate Matrix">
|
|
155 <option value="poisson">Poisson</option>
|
|
156 <option value="jones" selected="true">Jones</option>
|
|
157 <option value="dayhoff">Dayhoff</option>
|
|
158 <option value="wag">WAG</option>
|
|
159 <option value="mtmam">mtmam</option>
|
|
160 <option value="mtrev">mtREV</option>
|
|
161 </param>
|
|
162 <param name="statefrequences" type="select" label="Equilibrium Base
|
|
163 Frequences">
|
|
164 <option value="equal">Equal</option>
|
|
165 <option value="empirical" selected="true">Empirical</option>
|
|
166 <option value="estimate">Estimate</option>
|
|
167 <option value="fixed">Fixed</option>
|
|
168 <option value="jones">Jones</option>
|
|
169 <option value="dayhoff">Dayhoff</option>
|
|
170 <option value="wag">WAG</option>
|
|
171 <option value="mtmam">mtmam</option>
|
|
172 <option value="mtrev">mtREV</option>
|
|
173 </param>
|
|
174 <param name="geneticcode" type="select" label="Genetic Code">
|
|
175 <option value="standard" selected="true">Standard</option>
|
|
176 <option value="vertmito">Vertmito</option>
|
|
177 <option value="invertmito">Invertmito</option>
|
|
178 </param>
|
|
179 </when>
|
|
180 <when value="codon">
|
|
181 <conditional name="choose_ratematrix">
|
|
182 <param name="ratematrix" type="select" label="Rate Matrix">
|
|
183 <option value="1rate">1rate</option>
|
|
184 <option value="2rate">2rate</option>
|
|
185 <option value="6rate" selected="true">6rate</option>
|
|
186 <option value="fixed">fixed</option>
|
|
187 <option value="custom">custom</option>
|
|
188 </param>
|
|
189 <when value="1rate" />
|
|
190 <when value="2rate" />
|
|
191 <when value="6rate" />
|
|
192 <when value="fixed" />
|
|
193 <when value="custom">
|
|
194 <param name="ratematrix" type="text" size="20" value="(a b a a b a)" label="Custom Rate Matrix"/>
|
|
195 </when>
|
|
196 </conditional>
|
|
197 <param name="statefrequencies" type="select" label="State
|
|
198 Frequencies">
|
|
199 <option value="equal">Equal</option>
|
|
200 <option value="empirical" selected="true">Empirical</option>
|
|
201 <option value="f1x4">F1x4</option>
|
|
202 <option value="f3x4">F3x4</option>
|
|
203 </param>
|
|
204 <param name="ratehetmodel" type="select" label="Rate Heterogeneity
|
|
205 Type">
|
|
206 <option value="none" selected="true">None</option>
|
|
207 <option value="nonsynonymous">Non-synonymous</option>
|
|
208 </param>
|
|
209 <param name="numratecats" type="integer" size="2" value="1"
|
|
210 label="Number of discrete dN/dS categories">
|
|
211 <validator type="in_range" message="(1-20)" min="1" max="20"/>
|
|
212 </param>
|
|
213 <param name="invariantsites" type="hidden" value="none"/>
|
|
214 <param name="geneticcode" type="select" label="Genetic Code">
|
|
215 <option value="standard" selected="true">Standard</option>
|
|
216 <option value="vertmito">Vertmito</option>
|
|
217 <option value="invertmito">Invertmito</option>
|
|
218 </param>
|
|
219 </when>
|
|
220 </conditional>
|
|
221 <param name="nindivs" type="integer" size="3" value="4" label="Number of individuals in population">
|
|
222 <validator type="in_range" message="(2-100)" min="2" max="100"/>
|
|
223 </param>
|
|
224 <param name="holdover" type="integer" size="2" value="1" label="Unmutated copies of
|
|
225 best individual">
|
|
226 <validator type="in_range" message="(1-99)" min="1" max="99"/>
|
|
227 </param>
|
|
228 <param name="selectionintensity" type="float" size="3" value="0.5" label="Strength of
|
|
229 selection">
|
|
230 <validator type="in_range" message="(0.1-5.0)" min="0.1" max="5.0"/>
|
|
231 </param>
|
|
232 <param name="holdoverpenalty" type="integer" size="3" value="0" label="Fitness
|
|
233 handicap for best individual">
|
|
234 <validator type="in_range" message="(0-100)" min="0" max="100"/>
|
|
235 </param>
|
|
236 <param name="stopgen" type="integer" size="10" value="5000000" label="Maximum number
|
|
237 of generations to run">
|
|
238 <validator type="in_range" message="(0-50000000)" min="0" max="50000000"/>
|
|
239 </param>
|
|
240 <param name="stoptime" type="integer" size="10" value="5000000" label="Maximum time to
|
|
241 run">
|
|
242 <validator type="in_range" message="(0-50000000)" min="0" max="50000000"/>
|
|
243 </param>
|
|
244 <param name="startoptprec" type="float" size="5" value="0.5" label="Starting
|
|
245 optimization precision">
|
|
246 <validator type="in_range" message="(0.005-5.0)" min="0.005" max="5.0"/>
|
|
247 </param>
|
|
248 <param name="minoptprec" type="float" size="5" value="0.01" label="Minimal
|
|
249 optimization precision">
|
|
250 <validator type="in_range" message="(0.001-5.0)" min="0.001" max="5.0"/>
|
|
251 </param>
|
|
252 <param name="numberofprecreductions" type="integer" size="3" value="10"
|
|
253 label="Number of steps down from Start Precision to Minimum Precision"
|
|
254 >
|
|
255 <validator type="in_range" message="(0-100)" min="0" max="100"/>
|
|
256 </param>
|
|
257 <param name="treerejectionthreshold" type="float" size="5" value="50.0"
|
|
258 label="Tree Rejection Threshold">
|
|
259 <validator type="in_range" message="(0-500.0)" min="0" max="500.0"/>
|
|
260 </param>
|
|
261 <param name="topoweight" type="float" size="10" value="1.0"
|
|
262 label="Weight on topology mutations">
|
|
263 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
|
|
264 </param>
|
|
265 <param name="modweight" type="float" size="10" value="0.05"
|
|
266 label="Weight on model parameter mutations">
|
|
267 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
|
|
268 </param>
|
|
269 <param name="brlenweight" type="float" size="10" value="0.2"
|
|
270 label="Weight on branch-length parameter mutations">
|
|
271 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
|
|
272 </param>
|
|
273 <param name="randnniweight" type="float" size="10" value="0.1"
|
|
274 label="Weight on NNI topology changes">
|
|
275 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
|
|
276 </param>
|
|
277 <param name="randsprweight" type="float" size="10" value="0.3"
|
|
278 label="Weight on SPR topology changes">
|
|
279 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
|
|
280 </param>
|
|
281 <param name="limsprweight" type="float" size="10" value="0.6"
|
|
282 label="Weight on localized SPR topology changes">
|
|
283 <validator type="in_range" message="(0-infinity)" min="0" max="inf"/>
|
|
284 </param>
|
|
285 <param name="intervallength" type="integer" size="4" value="100"
|
|
286 label="Interval Length">
|
|
287 <validator type="in_range" message="(0-1000)" min="0" max="1000"/>
|
|
288 </param>
|
|
289 <param name="intervalstostore" type="integer" size="2" value="5"
|
|
290 label="Number of intervals to store">
|
|
291 <validator type="in_range" message="(0-1000)" min="0" max="10"/>
|
|
292 </param>
|
|
293 <param name="limsprrange" type="integer" size="10" value="6"
|
|
294 label="Max range for localized SPR topology changes">
|
|
295 <validator type="in_range" message="(0-1000)" min="0" max="inf"/>
|
|
296 </param>
|
|
297 <param name="meanbrlenmuts" type="integer" size="7" value="5"
|
|
298 label="Mean number of branch lengths to change per mutation">
|
|
299 <validator type="in_range" message="(0-# of taxa)" min="0" max="10000000000"/>
|
|
300 </param>
|
|
301 <param name="gammashapebrlen" type="integer" size="4" value="1000"
|
|
302 label="Magnitude of branch-length mutations">
|
|
303 <validator type="in_range" message="(50-2000)" min="50" max="2000"/>
|
|
304 </param>
|
|
305 <param name="gammashapemodel" type="integer" size="4" value="1000"
|
|
306 label="Magnitude of model parameter mutations">
|
|
307 <validator type="in_range" message="(50-2000)" min="50" max="2000"/>
|
|
308 </param>
|
|
309 <param name="uniqueswapbias" type="float" size="4" value="0.1"
|
|
310 label="Relative weight assigned to already attempted branch swaps"
|
|
311 >
|
|
312 <validator type="in_range" message="(0.01-1.0)" min="0.01" max="1.0"/>
|
|
313 </param>
|
|
314 <param name="distanceswapbias" type="float" size="3" value="1.0"
|
|
315 label="Relative weight assigned to branch swaps based on locality">
|
|
316 <validator type="in_range" message="(0.1-10.0)" min="0.1" max="10.0"/>
|
|
317 </param>
|
|
318 </inputs>
|
|
319 <outputs>
|
|
320 <data format="nexus" name="best_tre" metadata_source="datafname" from_work_dir="garli.best.tre" label="${tool.name} on ${on_string}: garli.best.tre"/>
|
|
321 <data format="nexus" name="best_all_tre" metadata_source="datafname" from_work_dir="garli.best.all.tre" label="${tool.name} on ${on_string}: garli.all.best.tre"/>
|
|
322 <data format="txt" name="garli_conf" from_work_dir="garli.conf" label="${tool.name} on ${on_string}: garli.conf"/>
|
|
323 <data format="txt" name="log00_log" from_work_dir="garli.log00.log" label="${tool.name} on ${on_string}: garli.log00.log"/>
|
|
324 <data format="txt" name="screen_log" from_work_dir="garli.screen.log" label="${tool.name} on ${on_string}: garli.screen.log"/>
|
|
325 </outputs>
|
|
326 <configfiles>
|
|
327 <configfile name="garli_conf">
|
|
328 [general]
|
|
329 datafname = ${datafname}
|
|
330 searchreps = ${choose_search_type.searchreps}
|
|
331 bootstrapreps = ${choose_search_type.bootstrapreps}
|
|
332 constraintfile = ${constraintfile}
|
|
333 streefname = ${choose_streefname.streefname}
|
|
334 attachmentspertaxon = ${attachmentspertaxon}
|
|
335 ofprefix = garli
|
|
336 randseed = ${randseed}
|
|
337 availablememory = ${availablememory}
|
|
338 logevery = 10
|
|
339 writecheckpoints = 0
|
|
340 saveevery = 100
|
|
341 refinestart = ${refinestart}
|
|
342 outputeachbettertopology = 0
|
|
343 outputcurrentbesttopology = 0
|
|
344 enforcetermconditions = 1
|
|
345 genthreshfortopoterm = 20000
|
|
346 scorethreshforterm = 0.05
|
|
347 significanttopochange = 0.01
|
|
348 outputphyliptree = 0
|
|
349 outputmostlyuselessfiles = 0
|
|
350 restart = 0
|
|
351 outgroup = ${outgroup}
|
|
352 resampleproportion = ${choose_search_type.resampleproportion}
|
|
353 inferinternalstateprobs = 0
|
|
354 outputsitelikelihoods = 0
|
|
355 optimizeinputonly = 0
|
|
356 collapsebranches = ${collapsebranches}
|
|
357
|
|
358 [model1]
|
|
359 datatype = ${choose_datatype.datatype}
|
|
360 ratematrix = ${choose_datatype.choose_ratematrix.ratematrix}
|
|
361 statefrequencies = ${choose_datatype.statefrequencies}
|
|
362 ratehetmodel = ${choose_datatype.ratehetmodel}
|
|
363 #if $choose_datatype.numratecats > 0
|
|
364 numratecats = ${choose_datatype.numratecats}
|
|
365 #end if
|
|
366 #if $choose_datatype.invariantsites != ""
|
|
367 invariantsites = ${choose_datatype.invariantsites}
|
|
368 #end if
|
|
369
|
|
370 [master]
|
|
371 nindivs = $nindivs
|
|
372 holdover = $holdover
|
|
373 selectionintensity = $selectionintensity
|
|
374 holdoverpenalty = $holdoverpenalty
|
|
375 stopgen = $stopgen
|
|
376 stoptime = $stoptime
|
|
377
|
|
378 startoptprec = $startoptprec
|
|
379 minoptprec = $minoptprec
|
|
380 numberofprecreductions = $numberofprecreductions
|
|
381 treerejectionthreshold = $treerejectionthreshold
|
|
382 topoweight = $topoweight
|
|
383 modweight = $modweight
|
|
384 brlenweight = $brlenweight
|
|
385 randnniweight = $randnniweight
|
|
386 randsprweight = $randsprweight
|
|
387 limsprweight = $limsprweight
|
|
388 intervallength = $intervallength
|
|
389 intervalstostore = $intervalstostore
|
|
390 limsprrange = $limsprrange
|
|
391 meanbrlenmuts = $meanbrlenmuts
|
|
392 gammashapebrlen = $gammashapebrlen
|
|
393 gammashapemodel = $gammashapemodel
|
|
394 uniqueswapbias = $uniqueswapbias
|
|
395 distanceswapbias = 1.0
|
|
396 </configfile>
|
|
397 </configfiles>
|
|
398
|
|
399 <help>
|
|
400
|
|
401 **What it does**
|
|
402
|
|
403 GARLI is a program that performs phylogenetic inference using the
|
|
404 maximum-likelihood criterion. Several sequence types are supported,
|
|
405 including nucleotide, amino acid and codon. Version 2.0 adds support
|
|
406 for partitioned models and morphology-like datatypes.
|
|
407
|
|
408 Garli is written and maintained by Derrick Zwickl
|
|
409
|
|
410 Configuration options are adapted from
|
|
411 https://www.nescent.org/wg_garli/GARLI_Configuration_Settings
|
|
412
|
|
413 -----
|
|
414
|
|
415 **Detailed description of the configuration options**
|
|
416
|
|
417
|
|
418 **Analysis Type**
|
|
419
|
|
420 Specify whether to perform a maximum likelihood search for the best tree, or
|
|
421 a bootstrap analysis.
|
|
422
|
|
423
|
|
424 **Number of replicates**
|
|
425
|
|
426 Number of independent search replicates to run.
|
|
427
|
|
428
|
|
429 **Relative size of resample data**
|
|
430
|
|
431 This setting allows for bootstrap-like resampling, but with the
|
|
432 psuedoreplicate datasets having the number of alignment columns different
|
|
433 from the real data. Setting values below 1.0 is somewhat similar to
|
|
434 jackknifing, but not identical.
|
|
435
|
|
436
|
|
437 **Attachment branches evaluated per taxon (min=1)**
|
|
438
|
|
439 The number of attachment branches evaluated for each taxon to be added to
|
|
440 the tree during the creation of an ML stepwise-addition starting tree.
|
|
441 Briefly, stepwise addition is an algorithm used to make a tree, and involves
|
|
442 adding taxa in a random order to a growing tree. For each taxon to be added,
|
|
443 a number of randomly chosen attachment branches are tried and scored, and
|
|
444 then the best scoring one is chosen as the location of that taxon. This
|
|
445 setting controls how many attachment points are evaluated for each taxon to
|
|
446 be added. A value of one is equivalent to a completely random tree (only one
|
|
447 randomly chosen location is evaluated). A value of greater than 2 times the
|
|
448 number of taxa in the dataset means that all attachment points will be
|
|
449 evaluated for each taxon, and will result in very good starting trees (but
|
|
450 may take a while on large datasets). Even fairly small values (less than 10)
|
|
451 can result in starting trees that are much, much better than random, but
|
|
452 still fairly different from one another.
|
|
453
|
|
454
|
|
455 **Constraint file**
|
|
456
|
|
457 Select a file containing constraint specifications.
|
|
458
|
|
459
|
|
460 **Random seed**
|
|
461
|
|
462 Random see can have a value of -1 or a positive integer. The random number
|
|
463 seed used by the random number generator. Specify ββ1β to have a seed chosen
|
|
464 for you. Specifying the same seed number in multiple runs will give exactly
|
|
465 identical results, if all other parameters and settings are also identical.
|
|
466
|
|
467
|
|
468 **Available memory**
|
|
469
|
|
470 This lets GARLI determine how much system memory it may be able to use to
|
|
471 store computations for reuse.
|
|
472
|
|
473
|
|
474 **Perform initial rough optimization**
|
|
475
|
|
476 Specifies whether some initial rough optimization is performed on the
|
|
477 starting branch lengths and rate heterogeneity parameters. This is always
|
|
478 recommended.
|
|
479
|
|
480
|
|
481 **Outgroup taxa numbers**
|
|
482
|
|
483 The outgroup option allows for orienting tree topologies in a consistent way
|
|
484 when they are written to a file. Note that this has NO effect whatsoever on
|
|
485 the actual inference and the specified outgroup is NOT constrained to be
|
|
486 present in the inferred trees. If multiple outgroup taxa are specified and
|
|
487 they do not form a monophyletic group, this setting will be ignored. If you
|
|
488 specify a single outgroup taxon it will always be present, and the tree will
|
|
489 always be consistently oriented. To specify an outgroup consisting of taxa
|
|
490 1, 3 and 5 the format is this: outgroup = 1 3 5. Dashes are used for ranges
|
|
491 e.g. 1-3 5.
|
|
492
|
|
493
|
|
494 **Collapse branches**
|
|
495
|
|
496 Before version 1.0, all trees that are returned were fully resolved. This is
|
|
497 true even if the maximum-likelihood estimate of some internal branch lengths
|
|
498 are effectively zero (or GARLI's minimum, which is 1e-8). In such cases,
|
|
499 collapsing the branch into a polytomy would be a better representation. Note
|
|
500 that GARLI will never return a tree with an actual branch length of zero,
|
|
501 but rather with its minimum value of 1.0e-8. The drawback of always
|
|
502 returning fully resolved trees is that what is effectively a polytomy can be
|
|
503 resolved in three ways, and different independent searches may randomly
|
|
504 return one of those resolutions. Thus, if you compare the trees by topology
|
|
505 only, they will look different. If you pay attention to the branch lengths
|
|
506 and likelihood scores of the trees it will be apparent that they are
|
|
507 effectively the same. I think that collapsing of branches is particularly
|
|
508 important when bootstrapping, since no support should be given to a branch
|
|
509 that doesn't really exist, i.e., that is a random resolution of a polytomy.
|
|
510 Collapsing is also good when calculating tree to tree distances such as the
|
|
511 symmetric tree distance, for example when calculating phylogenetic error to
|
|
512 a known target tree. Zero-length branches would add to the distances
|
|
513 (~error) although they really should not.
|
|
514
|
|
515
|
|
516 **Model type**
|
|
517
|
|
518 The codon-aminoacid datatype means that the data will be supplied as a
|
|
519 nucleotide alignment, but will be internally translated and analyzed using
|
|
520 an amino acid model. The codon and codon-aminoacid datatypes require
|
|
521 nucleotide sequence that is aligned in the correct reading frame. In other
|
|
522 words, all gaps in the alignment should be a multiple of 3 in length, and
|
|
523 the alignment should start at the first position of a codon. If the
|
|
524 alignment has extra columns at the start, middle or end, they should be
|
|
525 removed or excluded with a Nexus exset (see the FAQ for an example of exset
|
|
526 usage). The correct Genetic Code must also be set.
|
|
527
|
|
528
|
|
529
|
|
530
|
|
531 **Datatype - nucleotide**
|
|
532
|
|
533 **Rate matrix**
|
|
534
|
|
535 The number of relative substitution rate parameters (note that the number of
|
|
536 free parameters is this value minus one). Equivalent to the βnstβ setting in
|
|
537 PAUP* and MrBayes. 1rate assumes that substitutions between all pairs of
|
|
538 nucleotides occur at the same rate (JC model), 2rate allows different rates
|
|
539 for transitions and transversions (K2P or HKY models), and 6rate allows a
|
|
540 different rate between each nucleotide pair (GTR). These rates are estimated
|
|
541 unless the fixed option is chosen. Since version 0.96, parameters for any
|
|
542 submodel of the GTR model may be estimated. The format for specifying this
|
|
543 is very similar to that used in the βrclassβ setting of PAUP*. Within
|
|
544 parentheses, six letters are specified, with spaces between them. The six
|
|
545 letters represent the rates of substitution between the six pairs of
|
|
546 nucleotides, with the order being A-C, A-G, A-T, C-G, C-T and G-T. Letters
|
|
547 within the parentheses that are the same mean that a single parameter is
|
|
548 shared by multiple nucleotide pairs.
|
|
549
|
|
550
|
|
551 **State frequences**
|
|
552
|
|
553 Specifies how the equilibrium state frequencies (A, C, G and T) are treated.
|
|
554 The empirical setting fixes the frequencies at their observed proportions,
|
|
555 and the other options should be self-explanatory.
|
|
556
|
|
557
|
|
558 **Datatype - nucleotide or amino-acid**
|
|
559
|
|
560
|
|
561 **Treatment of proportion of invariable sites parameter**
|
|
562
|
|
563 Specifies whether a parameter representing the proportion of sites that are
|
|
564 unable to change (i.e. have a substitution rate of zero) will be included.
|
|
565 This is typically referred to as 'invariant sites', but would better be
|
|
566 termed 'invariable sites'.
|
|
567
|
|
568
|
|
569 **Rate heterogeneity type**
|
|
570
|
|
571 (none, gamma, gammafixed) β The model of rate heterogeneity assumed.
|
|
572 βgammafixedβ requires that the alpha shape parameter is provided, and a
|
|
573 setting of βgammaβ estimates it.
|
|
574
|
|
575
|
|
576 **Number of discrete dN/dS categories**
|
|
577
|
|
578 The number of categories of variable rates (not including the invariant site
|
|
579 class if it is being used). Must be set to 1 if ratehetmodel is set to none.
|
|
580 Note that runtimes and memory usage scale linearly with this setting.
|
|
581
|
|
582
|
|
583 **Datatype - amino-acid or codon-aminoacid**
|
|
584
|
|
585 **Rate matrix**
|
|
586
|
|
587 (poisson, jones, dayhoff, wag, mtmam, mtrev) β The fixed amino acid rate
|
|
588 matrix to use. You should use the matrix that gives the best likelihood, and
|
|
589 could use a program like PROTTEST (very much like MODELTEST, but for amino
|
|
590 acid models) to determine which fits best for your data. Poisson assumes a
|
|
591 single rate of substitution between all amino acid pairs, and is a very poor
|
|
592 model.
|
|
593
|
|
594
|
|
595 **Equilibrium Base Frequences **
|
|
596
|
|
597 (equal, empirical, estimate, fixed, jones, dayhoff, wag, mtmam, mtrev) β
|
|
598 Specifies how the equilibrium state frequencies of the 20 amino acids are
|
|
599 treated. The βempiricalβ option fixes the frequencies at their observed
|
|
600 proportions (when describing a model this is often termed '+F').
|
|
601
|
|
602
|
|
603 **Number of discrete dN/dS categories**
|
|
604
|
|
605 The number of categories of variable rates (not including the invariant site
|
|
606 class if it is being used). Must be set to 1 if ratehetmodel is set to none.
|
|
607 Note that runtimes and memory usage scale linearly with this setting.
|
|
608
|
|
609
|
|
610 **Treatment of proportion of invariable sites parameter**
|
|
611
|
|
612 Specifies whether a parameter representing the proportion of sites that are
|
|
613 unable to change (i.e. have a substitution rate of zero) will be included.
|
|
614 This is typically referred to as 'invariant sites', but would better be
|
|
615 termed 'invariable sites'.
|
|
616
|
|
617
|
|
618 **Datatype - codon**
|
|
619
|
|
620
|
|
621 **Rate matrix**
|
|
622
|
|
623 (1rate, 2rate, 6rate, fixed, custom string) β This determines the relative
|
|
624 rates of nucleotide substitution assumed by the codon model. The options are
|
|
625 exactly the same as those allowed under a normal nucleotide model. A codon
|
|
626 model with ratematrix = 2rate specifies the standard Goldman and Yang (1994)
|
|
627 model, with different substitution rates for transitions and transversions.
|
|
628
|
|
629
|
|
630 **State frequences**
|
|
631
|
|
632 The options are to use equal codon frequencies (not a good option), the
|
|
633 frequencies observed in your dataset (termed βempiricalβ in GARLI), or the
|
|
634 codon frequencies implied by the βF1x4β or βF3x4β methods (using PAML
|
|
635 terminology). These last two options calculate the codon frequencies as the
|
|
636 product of the frequencies of the three nucleotides that make up each codon.
|
|
637 In the βF1x4β case the nucleotide frequencies are those observed in the
|
|
638 dataset across all codon positions, while the βF3x4β option uses the
|
|
639 nucleotide frequencies observed in the data at each codon position
|
|
640 separately.
|
|
641
|
|
642
|
|
643 **Rate Heterogeneity Type**
|
|
644
|
|
645 For codon models, the default is to infer a single dN/dS parameter.
|
|
646 Alternatively, a model can be specified that infers a given number of dN/dS
|
|
647 categories, with the dN/dS values and proportions falling in each category
|
|
648 estimated (ratehetmodel = nonsynonymous). This is the 'discrete' or 'M3'
|
|
649 model of Yang et al., 2000.
|
|
650
|
|
651
|
|
652 **Number of discrete dN/dS categories**
|
|
653
|
|
654 When ratehetmodel = nonsynonymous, this is the number of dN/dS parameter
|
|
655 categories.
|
|
656
|
|
657
|
|
658 **Datatype - codon or codon-aminoacid**
|
|
659
|
|
660
|
|
661 **Genetic code**
|
|
662
|
|
663 The genetic code to be used in translating codons into amino acids.
|
|
664
|
|
665
|
|
666 **Population Settings**
|
|
667
|
|
668
|
|
669 **Number of individuals in population**
|
|
670
|
|
671 The number of individuals in the population. This may be increased, but
|
|
672 doing so is generally not beneficial. Note that typical genetic algorithms
|
|
673 tend to have much, much larger population sizes than GARLI defaults.
|
|
674
|
|
675
|
|
676 **Unmutated copies of best individual**
|
|
677
|
|
678 The number of times the best individual is copied to the next generation
|
|
679 with no chance of mutation. It is best not to mess with this setting.
|
|
680
|
|
681
|
|
682 **Strength of selection**
|
|
683
|
|
684 Controls the strength of selection, with larger numbers denoting stronger
|
|
685 selection. The relative probability of reproduction of two individuals
|
|
686 depends on the difference in their log likelihoods (ΞlnL) and is formulated
|
|
687 very similarly to the procedure of calculating Akaike weights.
|
|
688
|
|
689
|
|
690 **Fitness handicap for the best individual**
|
|
691
|
|
692 This can be used to bias the probability of reproduction of the best
|
|
693 individual downward. Because the best individual is automatically copied
|
|
694 into the next generation, it has a bit of an unfair advantage and can cause
|
|
695 all population variation to be lost due to genetic drift, especially with
|
|
696 small populations sizes. The value specified here is subtracted from the
|
|
697 best individualβs lnL score before calculating the probabilities of
|
|
698 reproduction. It seems plausible that this might help maintain variation,
|
|
699 but I have not seen it cause a measurable effect.
|
|
700
|
|
701
|
|
702 **Maximum number of generations to run**
|
|
703
|
|
704 Use if automatic termination is desired to prevent a runaway process.
|
|
705
|
|
706
|
|
707 **Maximum time to run**
|
|
708
|
|
709 The maximum number of seconds for the run to continue. Use if automatic
|
|
710 termination is desired to prevent a runaway process.
|
|
711
|
|
712
|
|
713 **Branch-length optimization settings**
|
|
714
|
|
715
|
|
716 **Minimal optimization precision**
|
|
717
|
|
718 The minimum allowed value of the optimization precision - must not be larger
|
|
719 then the Starting optimization precision.
|
|
720
|
|
721
|
|
722 **Number of steps down from Start Precision to Minimum Precision**
|
|
723
|
|
724 Specify the number of steps that it will take for the optimization precision
|
|
725 to decrease (linearly) from startoptrec to minoptprec.
|
|
726
|
|
727
|
|
728 **Tree rejection threshold**
|
|
729
|
|
730 This setting controls which trees have more extensive branch-length
|
|
731 optimization applied to them. All trees created by a branch swap receive
|
|
732 optimization on a few branches that directly took part in the rearrangement.
|
|
733 If the difference in score between the partially optimized tree and the best
|
|
734 known tree is greater than treerejectionthreshold, no further optimization
|
|
735 is applied to the branches of that tree. Reducing this value can
|
|
736 significantly reduce runtimes, often with little or no effect on results.
|
|
737 However, it is possible that a better tree could be missed if this is set
|
|
738 too low. In cases in which obtaining the very best tree per search is not
|
|
739 critical (e.g., bootstrapping), setting this lower (~20) is probably safe.
|
|
740
|
|
741
|
|
742 **Settings controlling the proportions of the mutation types**
|
|
743
|
|
744
|
|
745 **Weight on topology mutations**
|
|
746
|
|
747 The prior weight assigned to the class of topology mutations (NNI, SPR and
|
|
748 limSPR). Note that setting this to 0.0 turns off topology mutations, meaning
|
|
749 that the tree topology is fixed for the run. This used to be a way to have
|
|
750 the program estimate only model parameters and branch-lengths, but the
|
|
751 optimizeinputonly setting is now a better way to go.
|
|
752
|
|
753
|
|
754 **Weight on model parameter mutations**
|
|
755
|
|
756 The prior weight assigned to the class of model mutations. Note that setting
|
|
757 this at 0.0 fixes the model during the run.
|
|
758
|
|
759
|
|
760 **Weight on branch-length parameter mutations**
|
|
761
|
|
762 The prior weight assigned to branch-length mutations. The same procedure
|
|
763 used above to determine the proportion of Topology:Model:Branch-Length
|
|
764 mutations is also used to determine the relative proportions of the three
|
|
765 types of topological mutations (NNI:SPR:limSPR), controlled by the following
|
|
766 three weights. Note that the proportion of mutations applied to each of the
|
|
767 model parameters is not user controlled.
|
|
768
|
|
769
|
|
770 **Weight on NNI topology changes**
|
|
771
|
|
772 The prior weight assigned to NNI mutations
|
|
773
|
|
774
|
|
775 **Weight on SPR topology changes**
|
|
776
|
|
777 The prior weight assigned to random SPR mutations. For very large datasets
|
|
778 it is often best to set this to 0.0, as random SPR mutations essentially
|
|
779 never result in score increases.
|
|
780
|
|
781
|
|
782 **Weight on localized SPR topology changes**
|
|
783
|
|
784 The prior weight assigned to SPR mutations with the reconnection branch
|
|
785 limited to being a maximum of limsprrange branches away from where the
|
|
786 branch was detached.
|
|
787
|
|
788
|
|
789 **Interval Length**
|
|
790
|
|
791 The number of generations in each interval during which the number and
|
|
792 benefit of each mutation type are stored.
|
|
793
|
|
794
|
|
795 **Number of intervals to store**
|
|
796
|
|
797 The number of intervals to be stored. Thus, records of mutations are kept
|
|
798 for the last (intervallength x intervalstostore) generations. Every
|
|
799 intervallength generations the probabilities of the mutation types are
|
|
800 updated by the scheme described above.
|
|
801
|
|
802
|
|
803 **Settings controlling mutation details**
|
|
804
|
|
805
|
|
806 **Max range for localized SPR topology changes**
|
|
807
|
|
808 The maximum number of branches away from its original location that a branch
|
|
809 may be reattached during a limited SPR move. Setting this too high (> 10)
|
|
810 can seriously degrade performance, but if you do so in conjunction with a
|
|
811 large increase in genthreshfort.
|
|
812
|
|
813
|
|
814 **Settings controlling mutation details**
|
|
815
|
|
816 The mean of the binomial distribution from which the number of branch
|
|
817 lengths mutated is drawn during a branch length mutation.
|
|
818
|
|
819
|
|
820 **Magnitude of branch-length mutations**
|
|
821
|
|
822 The shape parameter of the gamma distribution (with a mean of 1.0) from
|
|
823 which the branch-length multipliers are drawn for branch-length mutations.
|
|
824 Larger numbers cause smaller changes in branch lengths. (Note that this has
|
|
825 nothing to do with gamma rate heterogeneity.)
|
|
826
|
|
827
|
|
828 **Magnitude of model parameter mutations**
|
|
829
|
|
830 The shape parameter of the gamma distribution (with a mean of 1.0) from
|
|
831 which the model mutation multipliers are drawn for model parameters
|
|
832 mutations. Larger numbers cause smaller changes in model parameters. (Note
|
|
833 that this has nothing to do with gamma rate heterogeneity.)
|
|
834
|
|
835
|
|
836 **Relative weight assigned to already attempted branch swaps**
|
|
837
|
|
838 With version 0.95 and later, GARLI keeps track of which branch swaps it has
|
|
839 attempted on the current best tree. Because swaps are applied randomly, it
|
|
840 is possible that some swaps are tried twice before others are tried at all.
|
|
841 This option allows the program to bias the swaps applied toward those that
|
|
842 have not yet been attempted. Each swap is assigned a relative weight
|
|
843 depending on the number of times that it has been attempted on the current
|
|
844 best tree. This weight is equal to (uniqueswapbias) raised to the (# times
|
|
845 swap attempted) power. In other words, a value of 0.5 means that swaps that
|
|
846 have already been tried once will be half as likely as those not yet
|
|
847 attempted, swaps attempted twice will be ΒΌ as likely, etc. A value of 1.0
|
|
848 means no biasing. Use of this option may allow the use of somewhat larger
|
|
849 values of limsprrange.
|
|
850
|
|
851
|
|
852 **Relative weight assigned to branch swaps based on locality**
|
|
853
|
|
854 This option is similar to uniqueswapbias, except that it biases toward
|
|
855 certain swaps based on the topological distance between the initial and
|
|
856 rearranged trees. The distance is measured as in the limsprrange, and is
|
|
857 half the the Robinson-Foulds distance between the trees. As with
|
|
858 uniqueswapbias, distanceswapbias assigns a relative weight to each potential
|
|
859 swap. In this case the weight is (distanceswapbias) raised to the
|
|
860 (reconnection distance - 1) power. Thus, given a value of 0.5, the weight of
|
|
861 an NNI is 1.0, the weight of an SPR with distance 2 is 0.5, with distance 3
|
|
862 is 0.25, etc. Note that values less than 1.0 bias toward more localized
|
|
863 swaps, while values greater than 1.0 bias toward more extreme swaps. Also
|
|
864 note that this bias is only applied to limSPR rearrangements. Be careful in
|
|
865 setting this, as extreme values can have a very large effect.
|
|
866
|
|
867 </help>
|
|
868 </tool>
|