Mercurial > repos > yufei-luo > s_mart
diff commons/core/tree/Tree.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/core/tree/Tree.py Fri Jan 18 04:54:14 2013 -0500 @@ -0,0 +1,122 @@ +import os, re, sys + +class Tree: + + def __init__( self, inFileName="" ): + self.tree = None + self.inFileName = inFileName + if self.inFileName != "": + self.loadTree() + + def loadTree( self, verbose=0 ): + inF = open( self.inFileName, "r" ) + lines = inF.readlines() + inF.close() + line = "".join(lines).replace("\n","") + self.tree = self.parseTree( line ) + if verbose > 0: + print "nb of leaves: %i" % ( self.getNbOfLeaves( self.tree ) ) + + def parseTree( self, sTree ): + if "," not in sTree: + name, length = sTree.split(":") + return self.makeLeaf( name, float(length) ) + + distPattern = re.compile(r'(?P<tree>\(.+\))\:(?P<length>[e\-\d\.]+)$') + m = distPattern.search( sTree ) + length = 0 + if m: + if m.group('length'): length = float( m.group('length') ) + sTree = m.group('tree') + if length == "": length = 0 + + lhs, rhs = self.parseSubTree( sTree ) + + return { "name": "internal", + "left": self.parseTree( lhs ), + "right": self.parseTree( rhs ), + "length": length } + + def makeLeaf( self, name, length ): + return { "left":None, "right":None, "name":name, "length":length } + + def parseSubTree( self, sTree ): + """ + Parse a newick-formatted string of type 'a,b' into [a,b] + """ + chars = list( sTree[1:-1] ) + count = 0 + isLhs = True + leftS = "" + rightS = "" + for c in chars: + if c == "(": + count += 1 + elif c == ")": + count -= 1 + elif (c == ",") and (count == 0) and (isLhs) : + isLhs = False + continue + if isLhs: leftS += c + else: rightS += c + return [ leftS, rightS ] + + def toNewick( self, tree ): + newString = "" + if tree["name"] is not "internal": + newString += tree["name"] + else: + newString += "(" + newString += self.toNewick( tree["left"] ) + newString += "," + newString += self.toNewick( tree["right"] ) + newString += ")" + if tree["length"]: + newString += ":" + newString += "%f" % ( tree["length"] ) + return newString + + def saveTree( self, outFileName ): + outF = open( outFileName, "w" ) + outF.write( self.toNewick( self.tree ) ) + outF.close() + + def replaceHeaderViaPrefixSearch( self, tree, dNew2Init ): + if dNew2Init.has_key( tree["name"] ): + tree["name"] = dNew2Init[ tree["name"] ].replace(" ","_").replace("::","-").replace(",","-") + if tree["left"] != None: + self.replaceHeaderViaPrefixSearch( tree["left"], dNew2Init ) + if tree["right"] != None: + self.replaceHeaderViaPrefixSearch( tree["right"], dNew2Init ) + + def retrieveInitialSequenceHeaders( self, dNew2Init, outFileName ): + tree = self.tree + self.replaceHeaderViaPrefixSearch( tree, dNew2Init ) + self.tree = tree + self.saveTree( outFileName ) + + def getNbOfChildNodes( self, tree, nbNodes ): + if tree["left"] is not None: + nbNodes += 1 + nbNodes = self.getNbOfChildNodes( tree["left"], nbNodes ) + if tree["right"] is not None: + nbNodes += 1 + nbNodes = self.getNbOfChildNodes( tree["right"], nbNodes ) + return nbNodes + + def getNbOfNodes( self ): + nbNodes = 0 + return self.getNbOfChildNodes( self.tree, nbNodes ) + + def getNbOfChildLeaves( self, tree, nbLeaves ): + if tree["name"] != "internal": + nbLeaves += 1 + if tree["left"] is not None: + nbLeaves = self.getNbOfChildLeaves( tree["left"], nbLeaves ) + if tree["right"] is not None: + nbLeaves = self.getNbOfChildLeaves( tree["right"], nbLeaves ) + return nbLeaves + + def getNbOfLeaves( self ): + nbLeaves = 0 + return self.getNbOfChildLeaves( self.tree, nbLeaves )