0
|
1
|
|
2 #
|
|
3 # Trees.py
|
|
4 #
|
|
5 # Copyright 2005 by Frank Kauff & Cymon J. Cox. All rights reserved.
|
|
6 # This code is part of the Biopython distribution and governed by its
|
|
7 # license. Please see the LICENSE file that should have been included
|
|
8 # as part of this package.
|
|
9 #
|
|
10 # Tree class handles phylogenetic trees. Provides a set of methods to read and write newick-format tree
|
|
11 # descriptions, get information about trees (monphyly of taxon sets, congruence between trees, common ancestors,...)
|
|
12 # and to manipulate trees (reroot trees, split terminal nodes).
|
|
13 #
|
|
14 # Bug reports welcome: fkauff@duke.edu
|
|
15 #
|
|
16
|
|
17 import sys, random, sets
|
|
18 import Nodes
|
|
19
|
|
20 PRECISION_BRANCHLENGTH=6
|
|
21 PRECISION_SUPPORT=6
|
|
22
|
|
23 class TreeError(Exception): pass
|
|
24
|
|
25 class NodeData:
|
|
26 """Stores tree-relevant data associated with nodes (e.g. branches or otus)."""
|
|
27 def __init__(self,taxon=None,branchlength=0.0,support=None):
|
|
28 self.taxon=taxon
|
|
29 self.branchlength=branchlength
|
|
30 self.support=support
|
|
31
|
|
32 class Tree(Nodes.Chain):
|
|
33 """Represents a tree using a chain of nodes with on predecessor (=ancestor)
|
|
34 and multiple successors (=subclades).
|
|
35 """
|
|
36 # A newick tree is parsed into nested list and then converted to a node list in two stages
|
|
37 # mostly due to historical reasons. This could be done in one swoop). Note: parentheses ( ) and
|
|
38 # colon : are not allowed in taxon names. This is against NEXUS standard, but makes life much
|
|
39 # easier when parsing trees.
|
|
40
|
|
41 ## NOTE: Tree should store its data class in something like self.dataclass=data,
|
|
42 ## so that nodes that are generated have easy access to the data class
|
|
43 ## Some routines use automatically NodeData, this needs to be more concise
|
|
44
|
|
45 def __init__(self,tree=None,weight=1.0,rooted=False,name='',data=NodeData,values_are_support=False,max_support=1.0):
|
|
46 """Ntree(self,tree)."""
|
|
47 Nodes.Chain.__init__(self)
|
|
48 self.dataclass=data
|
|
49 self.__values_are_support=values_are_support
|
|
50 self.max_support=max_support
|
|
51 self.weight=weight
|
|
52 self.rooted=rooted
|
|
53 self.name=name
|
|
54 root=Nodes.Node(data())
|
|
55 self.add(root)
|
|
56 self.root=root.id
|
|
57 if tree: # use the tree we have
|
|
58 # if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc
|
|
59 tree=tree.strip().replace('\n','').replace('\r','')
|
|
60 # there's discrepancy whether newick allows semicolons et the end
|
|
61 tree=tree.rstrip(';')
|
|
62 self._add_subtree(parent_id=root.id,tree=self._parse(tree)[0])
|
|
63
|
|
64 def _parse(self,tree):
|
|
65 """Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively."""
|
|
66
|
|
67 if tree.count('(')!=tree.count(')'):
|
|
68 raise TreeError, 'Parentheses do not match in (sub)tree: '+tree
|
|
69 if tree.count('(')==0: # a leaf
|
|
70 colon=tree.rfind(':')
|
|
71 if colon>-1:
|
|
72 return [tree[:colon],self._get_values(tree[colon+1:])]
|
|
73 else:
|
|
74 return [tree,[None]]
|
|
75 else:
|
|
76 closing=tree.rfind(')')
|
|
77 val=self._get_values(tree[closing+1:])
|
|
78 if not val:
|
|
79 val=[None]
|
|
80 subtrees=[]
|
|
81 plevel=0
|
|
82 prev=1
|
|
83 for p in range(1,closing):
|
|
84 if tree[p]=='(':
|
|
85 plevel+=1
|
|
86 elif tree[p]==')':
|
|
87 plevel-=1
|
|
88 elif tree[p]==',' and plevel==0:
|
|
89 subtrees.append(tree[prev:p])
|
|
90 prev=p+1
|
|
91 subtrees.append(tree[prev:closing])
|
|
92 subclades=[self._parse(subtree) for subtree in subtrees]
|
|
93 return [subclades,val]
|
|
94
|
|
95 def _add_subtree(self,parent_id=None,tree=None):
|
|
96 """Adds leaf or tree (in newick format) to a parent_id. (self,parent_id,tree)."""
|
|
97
|
|
98 if parent_id is None:
|
|
99 raise TreeError('Need node_id to connect to.')
|
|
100 for st in tree:
|
|
101 if type(st[0])==list: # it's a subtree
|
|
102 nd=self.dataclass()
|
|
103 if len(st[1])>=2: # if there's two values, support comes first. Is that always so?
|
|
104 nd.support=st[1][0]
|
|
105 if st[1][1] is not None:
|
|
106 nd.branchlength=st[1][1]
|
|
107 elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths
|
|
108 if not self.__values_are_support: # default
|
|
109 if st[1][0] is not None:
|
|
110 nd.branchlength=st[1][0]
|
|
111 else:
|
|
112 nd.support=st[1][0]
|
|
113 sn=Nodes.Node(nd)
|
|
114 self.add(sn,parent_id)
|
|
115 self._add_subtree(sn.id,st[0])
|
|
116 else: # it's a leaf
|
|
117 nd=self.dataclass()
|
|
118 nd.taxon=st[0]
|
|
119 if len(st)>1:
|
|
120 if len(st[1])>=2: # if there's two values, support comes first. Is that always so?
|
|
121 nd.support=st[1][0]
|
|
122 if st[1][1] is not None:
|
|
123 nd.branchlength=st[1][1]
|
|
124 elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths
|
|
125 if not self.__values_are_support: # default
|
|
126 if st[1][0] is not None:
|
|
127 nd.branchlength=st[1][0]
|
|
128 else:
|
|
129 nd.support=st[1][0]
|
|
130 leaf=Nodes.Node(nd)
|
|
131 self.add(leaf,parent_id)
|
|
132
|
|
133 def _get_values(self, text):
|
|
134 """Extracts values (support/branchlength) from xx[:yyy], xx."""
|
|
135
|
|
136 if text=='':
|
|
137 return None
|
|
138 return [float(t) for t in text.split(':') if t.strip()]
|
|
139
|
|
140 def _walk(self,node=None):
|
|
141 """Return all node_ids downwards from a node."""
|
|
142
|
|
143 if node is None:
|
|
144 node=self.root
|
|
145 for n in self.node(node).succ:
|
|
146 yield n
|
|
147 for sn in self._walk(n):
|
|
148 yield sn
|
|
149
|
|
150 def node(self,node_id):
|
|
151 """Return the instance of node_id.
|
|
152
|
|
153 node = node(self,node_id)
|
|
154 """
|
|
155 if node_id not in self.chain:
|
|
156 raise TreeError('Unknown node_id: %d' % node_id)
|
|
157 return self.chain[node_id]
|
|
158
|
|
159 def split(self,parent_id=None,n=2,branchlength=1.0):
|
|
160 """Speciation: generates n (default two) descendants of a node.
|
|
161
|
|
162 [new ids] = split(self,parent_id=None,n=2,branchlength=1.0):
|
|
163 """
|
|
164 if parent_id is None:
|
|
165 raise TreeError('Missing node_id.')
|
|
166 ids=[]
|
|
167 parent_data=self.chain[parent_id].data
|
|
168 for i in range(n):
|
|
169 node=Nodes.Node()
|
|
170 if parent_data:
|
|
171 node.data=self.dataclass()
|
|
172 # each node has taxon and branchlength attribute
|
|
173 if parent_data.taxon:
|
|
174 node.data.taxon=parent_data.taxon+str(i)
|
|
175 node.data.branchlength=branchlength
|
|
176 ids.append(self.add(node,parent_id))
|
|
177 return ids
|
|
178
|
|
179 def search_taxon(self,taxon):
|
|
180 """Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes.
|
|
181
|
|
182 node_id = search_taxon(self,taxon)
|
|
183 """
|
|
184 for id,node in self.chain.items():
|
|
185 if node.data.taxon==taxon:
|
|
186 return id
|
|
187 return None
|
|
188
|
|
189 def prune(self,taxon):
|
|
190 """Prunes a terminal taxon from the tree.
|
|
191
|
|
192 id_of_previous_node = prune(self,taxon)
|
|
193 If taxon is from a bifurcation, the connectiong node will be collapsed
|
|
194 and its branchlength added to remaining terminal node. This might be no
|
|
195 longer a meaningful value'
|
|
196 """
|
|
197
|
|
198 id=self.search_taxon(taxon)
|
|
199 if id is None:
|
|
200 raise TreeError('Taxon not found: %s' % taxon)
|
|
201 elif id not in self.get_terminals():
|
|
202 raise TreeError('Not a terminal taxon: %s' % taxon)
|
|
203 else:
|
|
204 prev=self.unlink(id)
|
|
205 self.kill(id)
|
|
206 if not prev==self.root and len(self.node(prev).succ)==1:
|
|
207 succ=self.node(prev).succ[0]
|
|
208 new_bl=self.node(prev).data.branchlength+self.node(succ).data.branchlength
|
|
209 self.collapse(prev)
|
|
210 self.node(succ).data.branchlength=new_bl
|
|
211 return prev
|
|
212
|
|
213 def get_taxa(self,node_id=None):
|
|
214 """Return a list of all otus downwards from a node (self, node_id).
|
|
215
|
|
216 nodes = get_taxa(self,node_id=None)
|
|
217 """
|
|
218
|
|
219 if node_id is None:
|
|
220 node_id=self.root
|
|
221 if node_id not in self.chain:
|
|
222 raise TreeError('Unknown node_id: %d.' % node_id)
|
|
223 if self.chain[node_id].succ==[]:
|
|
224 if self.chain[node_id].data:
|
|
225 return [self.chain[node_id].data.taxon]
|
|
226 else:
|
|
227 return None
|
|
228 else:
|
|
229 list=[]
|
|
230 for succ in self.chain[node_id].succ:
|
|
231 list.extend(self.get_taxa(succ))
|
|
232 return list
|
|
233
|
|
234 def get_terminals(self):
|
|
235 """Return a list of all terminal nodes."""
|
|
236 return [i for i in self.all_ids() if self.node(i).succ==[]]
|
|
237
|
|
238 def sum_branchlength(self,root=None,node=None):
|
|
239 """Adds up the branchlengths from root (default self.root) to node.
|
|
240
|
|
241 sum = sum_branchlength(self,root=None,node=None)
|
|
242 """
|
|
243
|
|
244 if root is None:
|
|
245 root=self.root
|
|
246 if node is None:
|
|
247 raise TreeError('Missing node id.')
|
|
248 blen=0.0
|
|
249 while node is not None and node is not root:
|
|
250 blen+=self.node(node).data.branchlength
|
|
251 node=self.node(node).prev
|
|
252 return blen
|
|
253
|
|
254 def set_subtree(self,node):
|
|
255 """Return subtree as a set of nested sets.
|
|
256
|
|
257 sets = set_subtree(self,node)
|
|
258 """
|
|
259
|
|
260 if self.node(node).succ==[]:
|
|
261 return self.node(node).data.taxon
|
|
262 else:
|
|
263 return sets.Set([self.set_subtree(n) for n in self.node(node).succ])
|
|
264
|
|
265 def is_identical(self,tree2):
|
|
266 """Compare tree and tree2 for identity.
|
|
267
|
|
268 result = is_identical(self,tree2)
|
|
269 """
|
|
270 return self.set_subtree(self.root)==tree2.set_subtree(tree2.root)
|
|
271
|
|
272 def is_compatible(self,tree2,threshold,strict=True):
|
|
273 """Compares branches with support>threshold for compatibility.
|
|
274
|
|
275 result = is_compatible(self,tree2,threshold)
|
|
276 """
|
|
277
|
|
278 # check if both trees have the same set of taxa. strict=True enforces this.
|
|
279 missing2=sets.Set(self.get_taxa())-sets.Set(tree2.get_taxa())
|
|
280 missing1=sets.Set(tree2.get_taxa())-sets.Set(self.get_taxa())
|
|
281 if strict and (missing1 or missing2):
|
|
282 if missing1:
|
|
283 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1) , self.name)
|
|
284 if missing2:
|
|
285 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2) , tree2.name)
|
|
286 raise TreeError, 'Can\'t compare trees with different taxon compositions.'
|
|
287 t1=[(sets.Set(self.get_taxa(n)),self.node(n).data.support) for n in self.all_ids() if \
|
|
288 self.node(n).succ and\
|
|
289 (self.node(n).data and self.node(n).data.support and self.node(n).data.support>=threshold)]
|
|
290 t2=[(sets.Set(tree2.get_taxa(n)),tree2.node(n).data.support) for n in tree2.all_ids() if \
|
|
291 tree2.node(n).succ and\
|
|
292 (tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support>=threshold)]
|
|
293 conflict=[]
|
|
294 for (st1,sup1) in t1:
|
|
295 for (st2,sup2) in t2:
|
|
296 if not st1.issubset(st2) and not st2.issubset(st1): # don't hiccup on upstream nodes
|
|
297 intersect,notin1,notin2=st1 & st2, st2-st1, st1-st2 # all three are non-empty sets
|
|
298 # if notin1==missing1 or notin2==missing2 <==> st1.issubset(st2) or st2.issubset(st1) ???
|
|
299 if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)): # omit conflicts due to missing taxa
|
|
300 conflict.append((st1,sup1,st2,sup2,intersect,notin1,notin2))
|
|
301 return conflict
|
|
302
|
|
303 def common_ancestor(self,node1,node2):
|
|
304 """Return the common ancestor that connects to nodes.
|
|
305
|
|
306 node_id = common_ancestor(self,node1,node2)
|
|
307 """
|
|
308
|
|
309 l1=[self.root]+self.trace(self.root,node1)
|
|
310 l2=[self.root]+self.trace(self.root,node2)
|
|
311 return [n for n in l1 if n in l2][-1]
|
|
312
|
|
313
|
|
314 def distance(self,node1,node2):
|
|
315 """Add and return the sum of the branchlengths between two nodes.
|
|
316 dist = distance(self,node1,node2)
|
|
317 """
|
|
318
|
|
319 ca=self.common_ancestor(node1,node2)
|
|
320 return self.sum_branchlength(ca,node1)+self.sum_branchlength(ca,node2)
|
|
321
|
|
322 def is_monophyletic(self,taxon_list):
|
|
323 """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise.
|
|
324
|
|
325 result = is_monophyletic(self,taxon_list)
|
|
326 """
|
|
327 if isinstance(taxon_list,str):
|
|
328 taxon_set=sets.Set([taxon_list])
|
|
329 else:
|
|
330 taxon_set=sets.Set(taxon_list)
|
|
331 node_id=self.root
|
|
332 while 1:
|
|
333 subclade_taxa=sets.Set(self.get_taxa(node_id))
|
|
334 if subclade_taxa==taxon_set: # are we there?
|
|
335 return node_id
|
|
336 else: # check subnodes
|
|
337 for subnode in self.chain[node_id].succ:
|
|
338 if sets.Set(self.get_taxa(subnode)).issuperset(taxon_set): # taxon_set is downstream
|
|
339 node_id=subnode
|
|
340 break # out of for loop
|
|
341 else:
|
|
342 return -1 # taxon set was not with successors, for loop exhausted
|
|
343
|
|
344 def is_bifurcating(self,node=None):
|
|
345 """Return True if tree downstream of node is strictly bifurcating."""
|
|
346 if not node:
|
|
347 node=self.root
|
|
348 if node==self.root and len(self.node(node).succ)==3: #root can be trifurcating, because it has no ancestor
|
|
349 return self.is_bifurcating(self.node(node).succ[0]) and \
|
|
350 self.is_bifurcating(self.node(node).succ[1]) and \
|
|
351 self.is_bifurcating(self.node(node).succ[2])
|
|
352 if len(self.node(node).succ)==2:
|
|
353 return self.is_bifurcating(self.node(node).succ[0]) and self.is_bifurcating(self.node(node).succ[1])
|
|
354 elif len(self.node(node).succ)==0:
|
|
355 return True
|
|
356 else:
|
|
357 return False
|
|
358
|
|
359
|
|
360
|
|
361 def branchlength2support(self):
|
|
362 """Move values stored in data.branchlength to data.support, and set branchlength to 0.0
|
|
363
|
|
364 This is necessary when support has been stored as branchlength (e.g. paup), and has thus
|
|
365 been read in as branchlength.
|
|
366 """
|
|
367
|
|
368 for n in self.chain.keys():
|
|
369 self.node(n).data.support=self.node(n).data.branchlength
|
|
370 self.node(n).data.branchlength=0.0
|
|
371
|
|
372 def convert_absolute_support(self,nrep):
|
|
373 """Convert absolute support (clade-count) to rel. frequencies.
|
|
374
|
|
375 Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of
|
|
376 calculating relative frequencies."""
|
|
377
|
|
378 for n in self._walk():
|
|
379 if self.node(n).data.support:
|
|
380 self.node(n).data.support/=float(nrep)
|
|
381
|
|
382 def randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True):
|
|
383 """Generates a random tree with ntax taxa and/or taxa from taxlabels.
|
|
384
|
|
385 new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True)
|
|
386 Trees are bifurcating by default. (Polytomies not yet supported).
|
|
387 """
|
|
388
|
|
389 if not ntax and taxon_list:
|
|
390 ntax=len(taxon_list)
|
|
391 elif not taxon_list and ntax:
|
|
392 taxon_list=['taxon'+str(i+1) for i in range(ntax)]
|
|
393 elif not ntax and not taxon_list:
|
|
394 raise TreeError('Either numer of taxa or list of taxa must be specified.')
|
|
395 elif ntax<>len(taxon_list):
|
|
396 raise TreeError('Length of taxon list must correspond to ntax.')
|
|
397 # initiate self with empty root
|
|
398 self.__init__()
|
|
399 terminals=self.get_terminals()
|
|
400 # bifurcate randomly at terminal nodes until ntax is reached
|
|
401 while len(terminals)<ntax:
|
|
402 newsplit=random.choice(terminals)
|
|
403 new_terminals=self.split(parent_id=newsplit,branchlength=branchlength)
|
|
404 # if desired, give some variation to the branch length
|
|
405 if branchlength_sd:
|
|
406 for nt in new_terminals:
|
|
407 bl=random.gauss(branchlength,branchlength_sd)
|
|
408 if bl<0:
|
|
409 bl=0
|
|
410 self.node(nt).data.branchlength=bl
|
|
411 terminals.extend(new_terminals)
|
|
412 terminals.remove(newsplit)
|
|
413 # distribute taxon labels randomly
|
|
414 random.shuffle(taxon_list)
|
|
415 for (node,name) in zip(terminals,taxon_list):
|
|
416 self.node(node).data.taxon=name
|
|
417
|
|
418 def display(self):
|
|
419 """Quick and dirty lists of all nodes."""
|
|
420 table=[('#','taxon','prev','succ','brlen','blen (sum)','support')]
|
|
421 for i in self.all_ids():
|
|
422 n=self.node(i)
|
|
423 if not n.data:
|
|
424 table.append((str(i),'-',str(n.prev),str(n.succ),'-','-','-'))
|
|
425 else:
|
|
426 tx=n.data.taxon
|
|
427 if not tx:
|
|
428 tx='-'
|
|
429 blength=n.data.branchlength
|
|
430 if blength is None:
|
|
431 blength='-'
|
|
432 sum_blength='-'
|
|
433 else:
|
|
434 sum_blength=self.sum_branchlength(node=i)
|
|
435 support=n.data.support
|
|
436 if support is None:
|
|
437 support='-'
|
|
438 table.append((str(i),tx,str(n.prev),str(n.succ),blength,sum_blength,support))
|
|
439 print '\n'.join(['%3s %32s %15s %15s %8s %10s %8s' % l for l in table])
|
|
440 print '\nRoot: ',self.root
|
|
441
|
|
442 def to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True,plain_newick=False):
|
|
443 """Return a paup compatible tree line.
|
|
444
|
|
445 to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True)
|
|
446 """
|
|
447 # if there's a conflict in the arguments, we override plain=True
|
|
448 if support_as_branchlengths or branchlengths_only:
|
|
449 plain=False
|
|
450 self.support_as_branchlengths=support_as_branchlengths
|
|
451 self.branchlengths_only=branchlengths_only
|
|
452 self.plain=plain
|
|
453
|
|
454 def make_info_string(data,terminal=False):
|
|
455 """Creates nicely formatted support/branchlengths."""
|
|
456 # CHECK FORMATTING
|
|
457 if self.plain: # plain tree only. That's easy.
|
|
458 return ''
|
|
459 elif self.support_as_branchlengths: # support as branchlengths (eg. PAUP), ignore actual branchlengths
|
|
460 if terminal: # terminal branches have 100% support
|
|
461 return ':%1.2f' % self.max_support
|
|
462 else:
|
|
463 return ':%1.2f' % (data.support)
|
|
464 elif self.branchlengths_only: # write only branchlengths, ignore support
|
|
465 return ':%1.5f' % (data.branchlength)
|
|
466 else: # write suport and branchlengths (e.g. .con tree of mrbayes)
|
|
467 if terminal:
|
|
468 return ':%1.5f' % (data.branchlength)
|
|
469 else:
|
|
470 if data.branchlength is not None and data.support is not None: # we have blen and suppport
|
|
471 return '%1.2f:%1.5f' % (data.support,data.branchlength)
|
|
472 elif data.branchlength is not None: # we have only blen
|
|
473 return '0.00000:%1.5f' % (data.branchlength)
|
|
474 elif data.support is not None: # we have only support
|
|
475 return '%1.2f:0.00000' % (data.support)
|
|
476 else:
|
|
477 return '0.00:0.00000'
|
|
478
|
|
479 def newickize(node):
|
|
480 """Convert a node tree to a newick tree recursively."""
|
|
481
|
|
482 if not self.node(node).succ: #terminal
|
|
483 return self.node(node).data.taxon+make_info_string(self.node(node).data,terminal=True)
|
|
484 else:
|
|
485 return '(%s)%s' % (','.join(map(newickize,self.node(node).succ)),make_info_string(self.node(node).data))
|
|
486 return subtree
|
|
487
|
|
488 treeline=['tree']
|
|
489 if self.name:
|
|
490 treeline.append(self.name)
|
|
491 else:
|
|
492 treeline.append('a_tree')
|
|
493 treeline.append('=')
|
|
494 if self.weight<>1:
|
|
495 treeline.append('[&W%s]' % str(round(float(self.weight),3)))
|
|
496 if self.rooted:
|
|
497 treeline.append('[&R]')
|
|
498 treeline.append('(%s)' % ','.join(map(newickize,self.node(self.root).succ)))
|
|
499 treeline.append(';')
|
|
500 if plain_newick:
|
|
501 return treeline[-2]
|
|
502 else:
|
|
503 return ' '.join(treeline)
|
|
504
|
|
505 def __str__(self):
|
|
506 """Short version of to_string(), gives plain tree"""
|
|
507 return self.to_string(plain=True)
|
|
508
|
|
509 def unroot(self):
|
|
510 """Defines a unrooted Tree structure, using data of a rooted Tree."""
|
|
511
|
|
512 # travel down the rooted tree structure and save all branches and the nodes they connect
|
|
513
|
|
514 def _get_branches(node):
|
|
515 branches=[]
|
|
516 for b in self.node(node).succ:
|
|
517 branches.append([node,b,self.node(b).data.branchlength,self.node(b).data.support])
|
|
518 branches.extend(_get_branches(b))
|
|
519 return branches
|
|
520
|
|
521 self.unrooted=_get_branches(self.root)
|
|
522 # if root is bifurcating, then it is eliminated
|
|
523 if len(self.node(self.root).succ)==2:
|
|
524 # find the two branches that connect to root
|
|
525 rootbranches=[b for b in self.unrooted if self.root in b[:2]]
|
|
526 b1=self.unrooted.pop(self.unrooted.index(rootbranches[0]))
|
|
527 b2=self.unrooted.pop(self.unrooted.index(rootbranches[1]))
|
|
528 # Connect them two each other. If both have support, it should be identical (or one set to None?).
|
|
529 # If both have branchlengths, they will be added
|
|
530 newbranch=[b1[1],b2[1],b1[2]+b2[2]]
|
|
531 if b1[3] is None:
|
|
532 newbranch.append(b2[3]) # either None (both rootbranches are unsupported) or some support
|
|
533 elif b2[3] is None:
|
|
534 newbranch.append(b1[3]) # dito
|
|
535 elif b1[3]==b2[3]:
|
|
536 newbranch.append(b1[3]) # identical support
|
|
537 elif b1[3]==0 or b2[3]==0:
|
|
538 newbranch.append(b1[3]+b2[3]) # one is 0, take the other
|
|
539 else:
|
|
540 raise TreeError, 'Support mismatch in bifurcating root: %f, %f' % (float(b1[3]),float(b2[3]))
|
|
541 self.unrooted.append(newbranch)
|
|
542
|
|
543 def root_with_outgroup(self,outgroup=None):
|
|
544
|
|
545 def _connect_subtree(parent,child):
|
|
546 """Hook subtree starting with node child to parent."""
|
|
547 for i,branch in enumerate(self.unrooted):
|
|
548 if parent in branch[:2] and child in branch[:2]:
|
|
549 branch=self.unrooted.pop(i)
|
|
550 break
|
|
551 else:
|
|
552 raise TreeError, 'Unable to connect nodes for rooting: nodes %d and %d are not connected' % (parent,child)
|
|
553 self.link(parent,child)
|
|
554 self.node(child).data.branchlength=branch[2]
|
|
555 self.node(child).data.support=branch[3]
|
|
556 #now check if there are more branches connected to the child, and if so, connect them
|
|
557 child_branches=[b for b in self.unrooted if child in b[:2]]
|
|
558 for b in child_branches:
|
|
559 if child==b[0]:
|
|
560 succ=b[1]
|
|
561 else:
|
|
562 succ=b[0]
|
|
563 _connect_subtree(child,succ)
|
|
564
|
|
565 # check the outgroup we're supposed to root with
|
|
566 if outgroup is None:
|
|
567 return self.root
|
|
568 outgroup_node=self.is_monophyletic(outgroup)
|
|
569 if outgroup_node==-1:
|
|
570 return -1
|
|
571 # if tree is already rooted with outgroup on a bifurcating root,
|
|
572 # or the outgroup includes all taxa on the tree, then we're fine
|
|
573 if (len(self.node(self.root).succ)==2 and outgroup_node in self.node(self.root).succ) or outgroup_node==self.root:
|
|
574 return self.root
|
|
575
|
|
576 self.unroot()
|
|
577 # now we find the branch that connects outgroup and ingroup
|
|
578 #print self.node(outgroup_node).prev
|
|
579 for i,b in enumerate(self.unrooted):
|
|
580 if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]:
|
|
581 root_branch=self.unrooted.pop(i)
|
|
582 break
|
|
583 else:
|
|
584 raise TreeError, 'Unrooted and rooted Tree do not match'
|
|
585 if outgroup_node==root_branch[1]:
|
|
586 ingroup_node=root_branch[0]
|
|
587 else:
|
|
588 ingroup_node=root_branch[1]
|
|
589 # now we destroy the old tree structure, but keep node data. Nodes will be reconnected according to new outgroup
|
|
590 for n in self.all_ids():
|
|
591 self.node(n).prev=None
|
|
592 self.node(n).succ=[]
|
|
593 # now we just add both subtrees (outgroup and ingroup) branch for branch
|
|
594 root=Nodes.Node(data=NodeData()) # new root
|
|
595 self.add(root) # add to tree description
|
|
596 self.root=root.id # set as root
|
|
597 self.unrooted.append([root.id,ingroup_node,root_branch[2],root_branch[3]]) # add branch to ingroup to unrooted tree
|
|
598 self.unrooted.append([root.id,outgroup_node,0.0,0.0]) # add branch to outgroup to unrooted tree
|
|
599 _connect_subtree(root.id,ingroup_node) # add ingroup
|
|
600 _connect_subtree(root.id,outgroup_node) # add outgroup
|
|
601 # if theres still a lonely node in self.chain, then it's the old root, and we delete it
|
|
602 oldroot=[i for i in self.all_ids() if self.node(i).prev is None and i!=self.root]
|
|
603 if len(oldroot)>1:
|
|
604 raise TreeError, 'Isolated nodes in tree description: %s' % ','.join(oldroot)
|
|
605 elif len(oldroot)==1:
|
|
606 self.kill(oldroot[0])
|
|
607 return self.root
|
|
608
|
|
609
|
|
610 def consensus(trees, threshold=0.5,outgroup=None):
|
|
611 """Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees."""
|
|
612
|
|
613 total=len(trees)
|
|
614 if total==0:
|
|
615 return None
|
|
616 # shouldn't we make sure that it's NodeData or subclass??
|
|
617 dataclass=trees[0].dataclass
|
|
618 max_support=trees[0].max_support
|
|
619 clades={}
|
|
620 #countclades={}
|
|
621 alltaxa=sets.Set(trees[0].get_taxa())
|
|
622 # calculate calde frequencies
|
|
623 c=0
|
|
624 for t in trees:
|
|
625 c+=1
|
|
626 #if c%50==0:
|
|
627 # print c
|
|
628 if alltaxa!=sets.Set(t.get_taxa()):
|
|
629 raise TreeError, 'Trees for consensus must contain the same taxa'
|
|
630 t.root_with_outgroup(outgroup=outgroup)
|
|
631 for st_node in t._walk(t.root):
|
|
632 subclade_taxa=t.get_taxa(st_node)
|
|
633 subclade_taxa.sort()
|
|
634 subclade_taxa=str(subclade_taxa) # lists are not hashable
|
|
635 if subclade_taxa in clades:
|
|
636 clades[subclade_taxa]+=float(t.weight)/total
|
|
637 else:
|
|
638 clades[subclade_taxa]=float(t.weight)/total
|
|
639 #if subclade_taxa in countclades:
|
|
640 # countclades[subclade_taxa]+=t.weight
|
|
641 #else:
|
|
642 # countclades[subclade_taxa]=t.weight
|
|
643 # weed out clades below threshold
|
|
644 for (c,p) in clades.items():
|
|
645 if p<threshold:
|
|
646 del clades[c]
|
|
647 # create a tree with a root node
|
|
648 consensus=Tree(name='consensus_%2.1f' % float(threshold),data=dataclass)
|
|
649 # each clade needs a node in the new tree, add them as isolated nodes
|
|
650 for (c,s) in clades.items():
|
|
651 node=Nodes.Node(data=dataclass())
|
|
652 node.data.support=s
|
|
653 node.data.taxon=sets.Set(eval(c))
|
|
654 consensus.add(node)
|
|
655 # set root node data
|
|
656 consensus.node(consensus.root).data.support=None
|
|
657 consensus.node(consensus.root).data.taxon=alltaxa
|
|
658 # we sort the nodes by no. of taxa in the clade, so root will be the last
|
|
659 consensus_ids=consensus.all_ids()
|
|
660 consensus_ids.sort(lambda x,y:len(consensus.node(x).data.taxon)-len(consensus.node(y).data.taxon))
|
|
661 # now we just have to hook each node to the next smallest node that includes all taxa of the current
|
|
662 for i,current in enumerate(consensus_ids[:-1]): # skip the last one which is the root
|
|
663 #print '----'
|
|
664 #print 'current: ',consensus.node(current).data.taxon
|
|
665 # search remaining nodes
|
|
666 for parent in consensus_ids[i+1:]:
|
|
667 #print 'parent: ',consensus.node(parent).data.taxon
|
|
668 if consensus.node(parent).data.taxon.issuperset(consensus.node(current).data.taxon):
|
|
669 break
|
|
670 else:
|
|
671 sys.exit('corrupt tree structure?')
|
|
672 # internal nodes don't have taxa
|
|
673 if len(consensus.node(current).data.taxon)==1:
|
|
674 consensus.node(current).data.taxon=consensus.node(current).data.taxon.pop()
|
|
675 # reset the support for terminal nodes to maximum
|
|
676 #consensus.node(current).data.support=max_support
|
|
677 else:
|
|
678 consensus.node(current).data.taxon=None
|
|
679 consensus.link(parent,current)
|
|
680 # eliminate root taxon name
|
|
681 consensus.node(consensus_ids[-1]).data.taxon=None
|
|
682 return consensus
|
|
683
|
|
684
|
|
685
|
|
686 |