6
|
1 import os, re, sys
|
|
2
|
|
3 class Tree:
|
|
4
|
|
5 def __init__( self, inFileName="" ):
|
|
6 self.tree = None
|
|
7 self.inFileName = inFileName
|
|
8 if self.inFileName != "":
|
|
9 self.loadTree()
|
|
10
|
|
11 def loadTree( self, verbose=0 ):
|
|
12 inF = open( self.inFileName, "r" )
|
|
13 lines = inF.readlines()
|
|
14 inF.close()
|
|
15 line = "".join(lines).replace("\n","")
|
|
16 self.tree = self.parseTree( line )
|
|
17 if verbose > 0:
|
|
18 print "nb of leaves: %i" % ( self.getNbOfLeaves( self.tree ) )
|
|
19
|
|
20 def parseTree( self, sTree ):
|
|
21 if "," not in sTree:
|
|
22 name, length = sTree.split(":")
|
|
23 return self.makeLeaf( name, float(length) )
|
|
24
|
|
25 distPattern = re.compile(r'(?P<tree>\(.+\))\:(?P<length>[e\-\d\.]+)$')
|
|
26 m = distPattern.search( sTree )
|
|
27 length = 0
|
|
28 if m:
|
|
29 if m.group('length'): length = float( m.group('length') )
|
|
30 sTree = m.group('tree')
|
|
31 if length == "": length = 0
|
|
32
|
|
33 lhs, rhs = self.parseSubTree( sTree )
|
|
34
|
|
35 return { "name": "internal",
|
|
36 "left": self.parseTree( lhs ),
|
|
37 "right": self.parseTree( rhs ),
|
|
38 "length": length }
|
|
39
|
|
40 def makeLeaf( self, name, length ):
|
|
41 return { "left":None, "right":None, "name":name, "length":length }
|
|
42
|
|
43 def parseSubTree( self, sTree ):
|
|
44 """
|
|
45 Parse a newick-formatted string of type 'a,b' into [a,b]
|
|
46 """
|
|
47 chars = list( sTree[1:-1] )
|
|
48 count = 0
|
|
49 isLhs = True
|
|
50 leftS = ""
|
|
51 rightS = ""
|
|
52 for c in chars:
|
|
53 if c == "(":
|
|
54 count += 1
|
|
55 elif c == ")":
|
|
56 count -= 1
|
|
57 elif (c == ",") and (count == 0) and (isLhs) :
|
|
58 isLhs = False
|
|
59 continue
|
|
60 if isLhs: leftS += c
|
|
61 else: rightS += c
|
|
62 return [ leftS, rightS ]
|
|
63
|
|
64 def toNewick( self, tree ):
|
|
65 newString = ""
|
|
66 if tree["name"] is not "internal":
|
|
67 newString += tree["name"]
|
|
68 else:
|
|
69 newString += "("
|
|
70 newString += self.toNewick( tree["left"] )
|
|
71 newString += ","
|
|
72 newString += self.toNewick( tree["right"] )
|
|
73 newString += ")"
|
|
74 if tree["length"]:
|
|
75 newString += ":"
|
|
76 newString += "%f" % ( tree["length"] )
|
|
77 return newString
|
|
78
|
|
79 def saveTree( self, outFileName ):
|
|
80 outF = open( outFileName, "w" )
|
|
81 outF.write( self.toNewick( self.tree ) )
|
|
82 outF.close()
|
|
83
|
|
84 def replaceHeaderViaPrefixSearch( self, tree, dNew2Init ):
|
|
85 if dNew2Init.has_key( tree["name"] ):
|
|
86 tree["name"] = dNew2Init[ tree["name"] ].replace(" ","_").replace("::","-").replace(",","-")
|
|
87 if tree["left"] != None:
|
|
88 self.replaceHeaderViaPrefixSearch( tree["left"], dNew2Init )
|
|
89 if tree["right"] != None:
|
|
90 self.replaceHeaderViaPrefixSearch( tree["right"], dNew2Init )
|
|
91
|
|
92 def retrieveInitialSequenceHeaders( self, dNew2Init, outFileName ):
|
|
93 tree = self.tree
|
|
94 self.replaceHeaderViaPrefixSearch( tree, dNew2Init )
|
|
95 self.tree = tree
|
|
96 self.saveTree( outFileName )
|
|
97
|
|
98 def getNbOfChildNodes( self, tree, nbNodes ):
|
|
99 if tree["left"] is not None:
|
|
100 nbNodes += 1
|
|
101 nbNodes = self.getNbOfChildNodes( tree["left"], nbNodes )
|
|
102 if tree["right"] is not None:
|
|
103 nbNodes += 1
|
|
104 nbNodes = self.getNbOfChildNodes( tree["right"], nbNodes )
|
|
105 return nbNodes
|
|
106
|
|
107 def getNbOfNodes( self ):
|
|
108 nbNodes = 0
|
|
109 return self.getNbOfChildNodes( self.tree, nbNodes )
|
|
110
|
|
111 def getNbOfChildLeaves( self, tree, nbLeaves ):
|
|
112 if tree["name"] != "internal":
|
|
113 nbLeaves += 1
|
|
114 if tree["left"] is not None:
|
|
115 nbLeaves = self.getNbOfChildLeaves( tree["left"], nbLeaves )
|
|
116 if tree["right"] is not None:
|
|
117 nbLeaves = self.getNbOfChildLeaves( tree["right"], nbLeaves )
|
|
118 return nbLeaves
|
|
119
|
|
120 def getNbOfLeaves( self ):
|
|
121 nbLeaves = 0
|
|
122 return self.getNbOfChildLeaves( self.tree, nbLeaves )
|