annotate corebio/seq_io/_nexus/Trees.py @ 4:4d47ab2b7bcc

Uploaded
author davidmurphy
date Fri, 13 Jan 2012 07:18:19 -0500
parents c55bdc2fb9fa
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
1
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
2 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
3 # Trees.py
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
4 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
5 # Copyright 2005 by Frank Kauff & Cymon J. Cox. All rights reserved.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
6 # This code is part of the Biopython distribution and governed by its
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
7 # license. Please see the LICENSE file that should have been included
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
8 # as part of this package.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
9 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
10 # Tree class handles phylogenetic trees. Provides a set of methods to read and write newick-format tree
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
11 # descriptions, get information about trees (monphyly of taxon sets, congruence between trees, common ancestors,...)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
12 # and to manipulate trees (reroot trees, split terminal nodes).
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
13 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
14 # Bug reports welcome: fkauff@duke.edu
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
15 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
16
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
17 import sys, random, sets
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
18 import Nodes
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
19
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
20 PRECISION_BRANCHLENGTH=6
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
21 PRECISION_SUPPORT=6
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
22
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
23 class TreeError(Exception): pass
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
24
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
25 class NodeData:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
26 """Stores tree-relevant data associated with nodes (e.g. branches or otus)."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
27 def __init__(self,taxon=None,branchlength=0.0,support=None):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
28 self.taxon=taxon
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
29 self.branchlength=branchlength
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
30 self.support=support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
31
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
32 class Tree(Nodes.Chain):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
33 """Represents a tree using a chain of nodes with on predecessor (=ancestor)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
34 and multiple successors (=subclades).
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
35 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
36 # A newick tree is parsed into nested list and then converted to a node list in two stages
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
37 # mostly due to historical reasons. This could be done in one swoop). Note: parentheses ( ) and
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
38 # colon : are not allowed in taxon names. This is against NEXUS standard, but makes life much
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
39 # easier when parsing trees.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
40
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
41 ## NOTE: Tree should store its data class in something like self.dataclass=data,
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
42 ## so that nodes that are generated have easy access to the data class
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
43 ## Some routines use automatically NodeData, this needs to be more concise
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
44
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
45 def __init__(self,tree=None,weight=1.0,rooted=False,name='',data=NodeData,values_are_support=False,max_support=1.0):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
46 """Ntree(self,tree)."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
47 Nodes.Chain.__init__(self)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
48 self.dataclass=data
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
49 self.__values_are_support=values_are_support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
50 self.max_support=max_support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
51 self.weight=weight
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
52 self.rooted=rooted
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
53 self.name=name
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
54 root=Nodes.Node(data())
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
55 self.add(root)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
56 self.root=root.id
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
57 if tree: # use the tree we have
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
58 # if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
59 tree=tree.strip().replace('\n','').replace('\r','')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
60 # there's discrepancy whether newick allows semicolons et the end
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
61 tree=tree.rstrip(';')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
62 self._add_subtree(parent_id=root.id,tree=self._parse(tree)[0])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
63
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
64 def _parse(self,tree):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
65 """Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
66
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
67 if tree.count('(')!=tree.count(')'):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
68 raise TreeError, 'Parentheses do not match in (sub)tree: '+tree
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
69 if tree.count('(')==0: # a leaf
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
70 colon=tree.rfind(':')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
71 if colon>-1:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
72 return [tree[:colon],self._get_values(tree[colon+1:])]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
73 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
74 return [tree,[None]]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
75 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
76 closing=tree.rfind(')')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
77 val=self._get_values(tree[closing+1:])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
78 if not val:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
79 val=[None]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
80 subtrees=[]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
81 plevel=0
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
82 prev=1
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
83 for p in range(1,closing):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
84 if tree[p]=='(':
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
85 plevel+=1
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
86 elif tree[p]==')':
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
87 plevel-=1
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
88 elif tree[p]==',' and plevel==0:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
89 subtrees.append(tree[prev:p])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
90 prev=p+1
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
91 subtrees.append(tree[prev:closing])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
92 subclades=[self._parse(subtree) for subtree in subtrees]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
93 return [subclades,val]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
94
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
95 def _add_subtree(self,parent_id=None,tree=None):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
96 """Adds leaf or tree (in newick format) to a parent_id. (self,parent_id,tree)."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
97
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
98 if parent_id is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
99 raise TreeError('Need node_id to connect to.')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
100 for st in tree:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
101 if type(st[0])==list: # it's a subtree
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
102 nd=self.dataclass()
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
103 if len(st[1])>=2: # if there's two values, support comes first. Is that always so?
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
104 nd.support=st[1][0]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
105 if st[1][1] is not None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
106 nd.branchlength=st[1][1]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
107 elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
108 if not self.__values_are_support: # default
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
109 if st[1][0] is not None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
110 nd.branchlength=st[1][0]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
111 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
112 nd.support=st[1][0]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
113 sn=Nodes.Node(nd)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
114 self.add(sn,parent_id)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
115 self._add_subtree(sn.id,st[0])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
116 else: # it's a leaf
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
117 nd=self.dataclass()
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
118 nd.taxon=st[0]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
119 if len(st)>1:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
120 if len(st[1])>=2: # if there's two values, support comes first. Is that always so?
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
121 nd.support=st[1][0]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
122 if st[1][1] is not None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
123 nd.branchlength=st[1][1]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
124 elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
125 if not self.__values_are_support: # default
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
126 if st[1][0] is not None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
127 nd.branchlength=st[1][0]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
128 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
129 nd.support=st[1][0]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
130 leaf=Nodes.Node(nd)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
131 self.add(leaf,parent_id)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
132
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
133 def _get_values(self, text):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
134 """Extracts values (support/branchlength) from xx[:yyy], xx."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
135
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
136 if text=='':
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
137 return None
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
138 return [float(t) for t in text.split(':') if t.strip()]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
139
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
140 def _walk(self,node=None):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
141 """Return all node_ids downwards from a node."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
142
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
143 if node is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
144 node=self.root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
145 for n in self.node(node).succ:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
146 yield n
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
147 for sn in self._walk(n):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
148 yield sn
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
149
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
150 def node(self,node_id):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
151 """Return the instance of node_id.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
152
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
153 node = node(self,node_id)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
154 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
155 if node_id not in self.chain:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
156 raise TreeError('Unknown node_id: %d' % node_id)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
157 return self.chain[node_id]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
158
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
159 def split(self,parent_id=None,n=2,branchlength=1.0):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
160 """Speciation: generates n (default two) descendants of a node.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
161
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
162 [new ids] = split(self,parent_id=None,n=2,branchlength=1.0):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
163 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
164 if parent_id is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
165 raise TreeError('Missing node_id.')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
166 ids=[]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
167 parent_data=self.chain[parent_id].data
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
168 for i in range(n):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
169 node=Nodes.Node()
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
170 if parent_data:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
171 node.data=self.dataclass()
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
172 # each node has taxon and branchlength attribute
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
173 if parent_data.taxon:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
174 node.data.taxon=parent_data.taxon+str(i)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
175 node.data.branchlength=branchlength
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
176 ids.append(self.add(node,parent_id))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
177 return ids
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
178
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
179 def search_taxon(self,taxon):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
180 """Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
181
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
182 node_id = search_taxon(self,taxon)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
183 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
184 for id,node in self.chain.items():
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
185 if node.data.taxon==taxon:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
186 return id
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
187 return None
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
188
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
189 def prune(self,taxon):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
190 """Prunes a terminal taxon from the tree.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
191
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
192 id_of_previous_node = prune(self,taxon)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
193 If taxon is from a bifurcation, the connectiong node will be collapsed
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
194 and its branchlength added to remaining terminal node. This might be no
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
195 longer a meaningful value'
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
196 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
197
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
198 id=self.search_taxon(taxon)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
199 if id is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
200 raise TreeError('Taxon not found: %s' % taxon)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
201 elif id not in self.get_terminals():
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
202 raise TreeError('Not a terminal taxon: %s' % taxon)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
203 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
204 prev=self.unlink(id)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
205 self.kill(id)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
206 if not prev==self.root and len(self.node(prev).succ)==1:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
207 succ=self.node(prev).succ[0]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
208 new_bl=self.node(prev).data.branchlength+self.node(succ).data.branchlength
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
209 self.collapse(prev)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
210 self.node(succ).data.branchlength=new_bl
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
211 return prev
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
212
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
213 def get_taxa(self,node_id=None):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
214 """Return a list of all otus downwards from a node (self, node_id).
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
215
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
216 nodes = get_taxa(self,node_id=None)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
217 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
218
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
219 if node_id is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
220 node_id=self.root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
221 if node_id not in self.chain:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
222 raise TreeError('Unknown node_id: %d.' % node_id)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
223 if self.chain[node_id].succ==[]:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
224 if self.chain[node_id].data:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
225 return [self.chain[node_id].data.taxon]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
226 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
227 return None
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
228 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
229 list=[]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
230 for succ in self.chain[node_id].succ:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
231 list.extend(self.get_taxa(succ))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
232 return list
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
233
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
234 def get_terminals(self):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
235 """Return a list of all terminal nodes."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
236 return [i for i in self.all_ids() if self.node(i).succ==[]]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
237
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
238 def sum_branchlength(self,root=None,node=None):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
239 """Adds up the branchlengths from root (default self.root) to node.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
240
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
241 sum = sum_branchlength(self,root=None,node=None)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
242 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
243
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
244 if root is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
245 root=self.root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
246 if node is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
247 raise TreeError('Missing node id.')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
248 blen=0.0
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
249 while node is not None and node is not root:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
250 blen+=self.node(node).data.branchlength
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
251 node=self.node(node).prev
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
252 return blen
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
253
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
254 def set_subtree(self,node):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
255 """Return subtree as a set of nested sets.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
256
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
257 sets = set_subtree(self,node)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
258 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
259
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
260 if self.node(node).succ==[]:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
261 return self.node(node).data.taxon
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
262 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
263 return sets.Set([self.set_subtree(n) for n in self.node(node).succ])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
264
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
265 def is_identical(self,tree2):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
266 """Compare tree and tree2 for identity.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
267
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
268 result = is_identical(self,tree2)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
269 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
270 return self.set_subtree(self.root)==tree2.set_subtree(tree2.root)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
271
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
272 def is_compatible(self,tree2,threshold,strict=True):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
273 """Compares branches with support>threshold for compatibility.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
274
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
275 result = is_compatible(self,tree2,threshold)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
276 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
277
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
278 # check if both trees have the same set of taxa. strict=True enforces this.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
279 missing2=sets.Set(self.get_taxa())-sets.Set(tree2.get_taxa())
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
280 missing1=sets.Set(tree2.get_taxa())-sets.Set(self.get_taxa())
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
281 if strict and (missing1 or missing2):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
282 if missing1:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
283 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1) , self.name)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
284 if missing2:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
285 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2) , tree2.name)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
286 raise TreeError, 'Can\'t compare trees with different taxon compositions.'
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
287 t1=[(sets.Set(self.get_taxa(n)),self.node(n).data.support) for n in self.all_ids() if \
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
288 self.node(n).succ and\
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
289 (self.node(n).data and self.node(n).data.support and self.node(n).data.support>=threshold)]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
290 t2=[(sets.Set(tree2.get_taxa(n)),tree2.node(n).data.support) for n in tree2.all_ids() if \
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
291 tree2.node(n).succ and\
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
292 (tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support>=threshold)]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
293 conflict=[]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
294 for (st1,sup1) in t1:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
295 for (st2,sup2) in t2:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
296 if not st1.issubset(st2) and not st2.issubset(st1): # don't hiccup on upstream nodes
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
297 intersect,notin1,notin2=st1 & st2, st2-st1, st1-st2 # all three are non-empty sets
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
298 # if notin1==missing1 or notin2==missing2 <==> st1.issubset(st2) or st2.issubset(st1) ???
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
299 if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)): # omit conflicts due to missing taxa
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
300 conflict.append((st1,sup1,st2,sup2,intersect,notin1,notin2))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
301 return conflict
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
302
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
303 def common_ancestor(self,node1,node2):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
304 """Return the common ancestor that connects to nodes.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
305
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
306 node_id = common_ancestor(self,node1,node2)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
307 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
308
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
309 l1=[self.root]+self.trace(self.root,node1)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
310 l2=[self.root]+self.trace(self.root,node2)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
311 return [n for n in l1 if n in l2][-1]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
312
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
313
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
314 def distance(self,node1,node2):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
315 """Add and return the sum of the branchlengths between two nodes.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
316 dist = distance(self,node1,node2)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
317 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
318
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
319 ca=self.common_ancestor(node1,node2)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
320 return self.sum_branchlength(ca,node1)+self.sum_branchlength(ca,node2)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
321
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
322 def is_monophyletic(self,taxon_list):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
323 """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
324
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
325 result = is_monophyletic(self,taxon_list)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
326 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
327 if isinstance(taxon_list,str):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
328 taxon_set=sets.Set([taxon_list])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
329 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
330 taxon_set=sets.Set(taxon_list)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
331 node_id=self.root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
332 while 1:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
333 subclade_taxa=sets.Set(self.get_taxa(node_id))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
334 if subclade_taxa==taxon_set: # are we there?
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
335 return node_id
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
336 else: # check subnodes
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
337 for subnode in self.chain[node_id].succ:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
338 if sets.Set(self.get_taxa(subnode)).issuperset(taxon_set): # taxon_set is downstream
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
339 node_id=subnode
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
340 break # out of for loop
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
341 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
342 return -1 # taxon set was not with successors, for loop exhausted
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
343
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
344 def is_bifurcating(self,node=None):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
345 """Return True if tree downstream of node is strictly bifurcating."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
346 if not node:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
347 node=self.root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
348 if node==self.root and len(self.node(node).succ)==3: #root can be trifurcating, because it has no ancestor
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
349 return self.is_bifurcating(self.node(node).succ[0]) and \
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
350 self.is_bifurcating(self.node(node).succ[1]) and \
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
351 self.is_bifurcating(self.node(node).succ[2])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
352 if len(self.node(node).succ)==2:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
353 return self.is_bifurcating(self.node(node).succ[0]) and self.is_bifurcating(self.node(node).succ[1])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
354 elif len(self.node(node).succ)==0:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
355 return True
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
356 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
357 return False
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
358
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
359
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
360
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
361 def branchlength2support(self):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
362 """Move values stored in data.branchlength to data.support, and set branchlength to 0.0
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
363
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
364 This is necessary when support has been stored as branchlength (e.g. paup), and has thus
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
365 been read in as branchlength.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
366 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
367
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
368 for n in self.chain.keys():
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
369 self.node(n).data.support=self.node(n).data.branchlength
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
370 self.node(n).data.branchlength=0.0
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
371
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
372 def convert_absolute_support(self,nrep):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
373 """Convert absolute support (clade-count) to rel. frequencies.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
374
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
375 Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
376 calculating relative frequencies."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
377
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
378 for n in self._walk():
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
379 if self.node(n).data.support:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
380 self.node(n).data.support/=float(nrep)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
381
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
382 def randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
383 """Generates a random tree with ntax taxa and/or taxa from taxlabels.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
384
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
385 new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
386 Trees are bifurcating by default. (Polytomies not yet supported).
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
387 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
388
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
389 if not ntax and taxon_list:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
390 ntax=len(taxon_list)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
391 elif not taxon_list and ntax:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
392 taxon_list=['taxon'+str(i+1) for i in range(ntax)]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
393 elif not ntax and not taxon_list:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
394 raise TreeError('Either numer of taxa or list of taxa must be specified.')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
395 elif ntax<>len(taxon_list):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
396 raise TreeError('Length of taxon list must correspond to ntax.')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
397 # initiate self with empty root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
398 self.__init__()
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
399 terminals=self.get_terminals()
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
400 # bifurcate randomly at terminal nodes until ntax is reached
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
401 while len(terminals)<ntax:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
402 newsplit=random.choice(terminals)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
403 new_terminals=self.split(parent_id=newsplit,branchlength=branchlength)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
404 # if desired, give some variation to the branch length
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
405 if branchlength_sd:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
406 for nt in new_terminals:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
407 bl=random.gauss(branchlength,branchlength_sd)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
408 if bl<0:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
409 bl=0
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
410 self.node(nt).data.branchlength=bl
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
411 terminals.extend(new_terminals)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
412 terminals.remove(newsplit)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
413 # distribute taxon labels randomly
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
414 random.shuffle(taxon_list)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
415 for (node,name) in zip(terminals,taxon_list):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
416 self.node(node).data.taxon=name
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
417
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
418 def display(self):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
419 """Quick and dirty lists of all nodes."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
420 table=[('#','taxon','prev','succ','brlen','blen (sum)','support')]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
421 for i in self.all_ids():
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
422 n=self.node(i)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
423 if not n.data:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
424 table.append((str(i),'-',str(n.prev),str(n.succ),'-','-','-'))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
425 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
426 tx=n.data.taxon
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
427 if not tx:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
428 tx='-'
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
429 blength=n.data.branchlength
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
430 if blength is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
431 blength='-'
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
432 sum_blength='-'
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
433 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
434 sum_blength=self.sum_branchlength(node=i)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
435 support=n.data.support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
436 if support is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
437 support='-'
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
438 table.append((str(i),tx,str(n.prev),str(n.succ),blength,sum_blength,support))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
439 print '\n'.join(['%3s %32s %15s %15s %8s %10s %8s' % l for l in table])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
440 print '\nRoot: ',self.root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
441
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
442 def to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True,plain_newick=False):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
443 """Return a paup compatible tree line.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
444
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
445 to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
446 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
447 # if there's a conflict in the arguments, we override plain=True
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
448 if support_as_branchlengths or branchlengths_only:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
449 plain=False
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
450 self.support_as_branchlengths=support_as_branchlengths
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
451 self.branchlengths_only=branchlengths_only
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
452 self.plain=plain
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
453
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
454 def make_info_string(data,terminal=False):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
455 """Creates nicely formatted support/branchlengths."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
456 # CHECK FORMATTING
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
457 if self.plain: # plain tree only. That's easy.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
458 return ''
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
459 elif self.support_as_branchlengths: # support as branchlengths (eg. PAUP), ignore actual branchlengths
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
460 if terminal: # terminal branches have 100% support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
461 return ':%1.2f' % self.max_support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
462 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
463 return ':%1.2f' % (data.support)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
464 elif self.branchlengths_only: # write only branchlengths, ignore support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
465 return ':%1.5f' % (data.branchlength)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
466 else: # write suport and branchlengths (e.g. .con tree of mrbayes)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
467 if terminal:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
468 return ':%1.5f' % (data.branchlength)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
469 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
470 if data.branchlength is not None and data.support is not None: # we have blen and suppport
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
471 return '%1.2f:%1.5f' % (data.support,data.branchlength)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
472 elif data.branchlength is not None: # we have only blen
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
473 return '0.00000:%1.5f' % (data.branchlength)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
474 elif data.support is not None: # we have only support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
475 return '%1.2f:0.00000' % (data.support)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
476 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
477 return '0.00:0.00000'
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
478
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
479 def newickize(node):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
480 """Convert a node tree to a newick tree recursively."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
481
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
482 if not self.node(node).succ: #terminal
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
483 return self.node(node).data.taxon+make_info_string(self.node(node).data,terminal=True)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
484 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
485 return '(%s)%s' % (','.join(map(newickize,self.node(node).succ)),make_info_string(self.node(node).data))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
486 return subtree
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
487
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
488 treeline=['tree']
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
489 if self.name:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
490 treeline.append(self.name)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
491 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
492 treeline.append('a_tree')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
493 treeline.append('=')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
494 if self.weight<>1:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
495 treeline.append('[&W%s]' % str(round(float(self.weight),3)))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
496 if self.rooted:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
497 treeline.append('[&R]')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
498 treeline.append('(%s)' % ','.join(map(newickize,self.node(self.root).succ)))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
499 treeline.append(';')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
500 if plain_newick:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
501 return treeline[-2]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
502 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
503 return ' '.join(treeline)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
504
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
505 def __str__(self):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
506 """Short version of to_string(), gives plain tree"""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
507 return self.to_string(plain=True)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
508
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
509 def unroot(self):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
510 """Defines a unrooted Tree structure, using data of a rooted Tree."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
511
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
512 # travel down the rooted tree structure and save all branches and the nodes they connect
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
513
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
514 def _get_branches(node):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
515 branches=[]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
516 for b in self.node(node).succ:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
517 branches.append([node,b,self.node(b).data.branchlength,self.node(b).data.support])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
518 branches.extend(_get_branches(b))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
519 return branches
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
520
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
521 self.unrooted=_get_branches(self.root)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
522 # if root is bifurcating, then it is eliminated
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
523 if len(self.node(self.root).succ)==2:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
524 # find the two branches that connect to root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
525 rootbranches=[b for b in self.unrooted if self.root in b[:2]]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
526 b1=self.unrooted.pop(self.unrooted.index(rootbranches[0]))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
527 b2=self.unrooted.pop(self.unrooted.index(rootbranches[1]))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
528 # Connect them two each other. If both have support, it should be identical (or one set to None?).
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
529 # If both have branchlengths, they will be added
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
530 newbranch=[b1[1],b2[1],b1[2]+b2[2]]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
531 if b1[3] is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
532 newbranch.append(b2[3]) # either None (both rootbranches are unsupported) or some support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
533 elif b2[3] is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
534 newbranch.append(b1[3]) # dito
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
535 elif b1[3]==b2[3]:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
536 newbranch.append(b1[3]) # identical support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
537 elif b1[3]==0 or b2[3]==0:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
538 newbranch.append(b1[3]+b2[3]) # one is 0, take the other
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
539 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
540 raise TreeError, 'Support mismatch in bifurcating root: %f, %f' % (float(b1[3]),float(b2[3]))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
541 self.unrooted.append(newbranch)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
542
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
543 def root_with_outgroup(self,outgroup=None):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
544
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
545 def _connect_subtree(parent,child):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
546 """Hook subtree starting with node child to parent."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
547 for i,branch in enumerate(self.unrooted):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
548 if parent in branch[:2] and child in branch[:2]:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
549 branch=self.unrooted.pop(i)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
550 break
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
551 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
552 raise TreeError, 'Unable to connect nodes for rooting: nodes %d and %d are not connected' % (parent,child)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
553 self.link(parent,child)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
554 self.node(child).data.branchlength=branch[2]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
555 self.node(child).data.support=branch[3]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
556 #now check if there are more branches connected to the child, and if so, connect them
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
557 child_branches=[b for b in self.unrooted if child in b[:2]]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
558 for b in child_branches:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
559 if child==b[0]:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
560 succ=b[1]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
561 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
562 succ=b[0]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
563 _connect_subtree(child,succ)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
564
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
565 # check the outgroup we're supposed to root with
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
566 if outgroup is None:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
567 return self.root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
568 outgroup_node=self.is_monophyletic(outgroup)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
569 if outgroup_node==-1:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
570 return -1
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
571 # if tree is already rooted with outgroup on a bifurcating root,
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
572 # or the outgroup includes all taxa on the tree, then we're fine
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
573 if (len(self.node(self.root).succ)==2 and outgroup_node in self.node(self.root).succ) or outgroup_node==self.root:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
574 return self.root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
575
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
576 self.unroot()
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
577 # now we find the branch that connects outgroup and ingroup
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
578 #print self.node(outgroup_node).prev
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
579 for i,b in enumerate(self.unrooted):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
580 if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
581 root_branch=self.unrooted.pop(i)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
582 break
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
583 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
584 raise TreeError, 'Unrooted and rooted Tree do not match'
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
585 if outgroup_node==root_branch[1]:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
586 ingroup_node=root_branch[0]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
587 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
588 ingroup_node=root_branch[1]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
589 # now we destroy the old tree structure, but keep node data. Nodes will be reconnected according to new outgroup
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
590 for n in self.all_ids():
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
591 self.node(n).prev=None
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
592 self.node(n).succ=[]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
593 # now we just add both subtrees (outgroup and ingroup) branch for branch
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
594 root=Nodes.Node(data=NodeData()) # new root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
595 self.add(root) # add to tree description
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
596 self.root=root.id # set as root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
597 self.unrooted.append([root.id,ingroup_node,root_branch[2],root_branch[3]]) # add branch to ingroup to unrooted tree
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
598 self.unrooted.append([root.id,outgroup_node,0.0,0.0]) # add branch to outgroup to unrooted tree
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
599 _connect_subtree(root.id,ingroup_node) # add ingroup
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
600 _connect_subtree(root.id,outgroup_node) # add outgroup
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
601 # if theres still a lonely node in self.chain, then it's the old root, and we delete it
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
602 oldroot=[i for i in self.all_ids() if self.node(i).prev is None and i!=self.root]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
603 if len(oldroot)>1:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
604 raise TreeError, 'Isolated nodes in tree description: %s' % ','.join(oldroot)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
605 elif len(oldroot)==1:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
606 self.kill(oldroot[0])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
607 return self.root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
608
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
609
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
610 def consensus(trees, threshold=0.5,outgroup=None):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
611 """Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
612
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
613 total=len(trees)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
614 if total==0:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
615 return None
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
616 # shouldn't we make sure that it's NodeData or subclass??
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
617 dataclass=trees[0].dataclass
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
618 max_support=trees[0].max_support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
619 clades={}
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
620 #countclades={}
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
621 alltaxa=sets.Set(trees[0].get_taxa())
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
622 # calculate calde frequencies
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
623 c=0
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
624 for t in trees:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
625 c+=1
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
626 #if c%50==0:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
627 # print c
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
628 if alltaxa!=sets.Set(t.get_taxa()):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
629 raise TreeError, 'Trees for consensus must contain the same taxa'
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
630 t.root_with_outgroup(outgroup=outgroup)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
631 for st_node in t._walk(t.root):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
632 subclade_taxa=t.get_taxa(st_node)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
633 subclade_taxa.sort()
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
634 subclade_taxa=str(subclade_taxa) # lists are not hashable
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
635 if subclade_taxa in clades:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
636 clades[subclade_taxa]+=float(t.weight)/total
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
637 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
638 clades[subclade_taxa]=float(t.weight)/total
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
639 #if subclade_taxa in countclades:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
640 # countclades[subclade_taxa]+=t.weight
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
641 #else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
642 # countclades[subclade_taxa]=t.weight
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
643 # weed out clades below threshold
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
644 for (c,p) in clades.items():
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
645 if p<threshold:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
646 del clades[c]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
647 # create a tree with a root node
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
648 consensus=Tree(name='consensus_%2.1f' % float(threshold),data=dataclass)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
649 # each clade needs a node in the new tree, add them as isolated nodes
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
650 for (c,s) in clades.items():
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
651 node=Nodes.Node(data=dataclass())
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
652 node.data.support=s
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
653 node.data.taxon=sets.Set(eval(c))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
654 consensus.add(node)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
655 # set root node data
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
656 consensus.node(consensus.root).data.support=None
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
657 consensus.node(consensus.root).data.taxon=alltaxa
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
658 # we sort the nodes by no. of taxa in the clade, so root will be the last
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
659 consensus_ids=consensus.all_ids()
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
660 consensus_ids.sort(lambda x,y:len(consensus.node(x).data.taxon)-len(consensus.node(y).data.taxon))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
661 # now we just have to hook each node to the next smallest node that includes all taxa of the current
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
662 for i,current in enumerate(consensus_ids[:-1]): # skip the last one which is the root
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
663 #print '----'
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
664 #print 'current: ',consensus.node(current).data.taxon
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
665 # search remaining nodes
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
666 for parent in consensus_ids[i+1:]:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
667 #print 'parent: ',consensus.node(parent).data.taxon
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
668 if consensus.node(parent).data.taxon.issuperset(consensus.node(current).data.taxon):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
669 break
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
670 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
671 sys.exit('corrupt tree structure?')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
672 # internal nodes don't have taxa
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
673 if len(consensus.node(current).data.taxon)==1:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
674 consensus.node(current).data.taxon=consensus.node(current).data.taxon.pop()
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
675 # reset the support for terminal nodes to maximum
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
676 #consensus.node(current).data.support=max_support
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
677 else:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
678 consensus.node(current).data.taxon=None
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
679 consensus.link(parent,current)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
680 # eliminate root taxon name
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
681 consensus.node(consensus_ids[-1]).data.taxon=None
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
682 return consensus
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
683
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
684
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
685
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
686