diff smart_toolShed/commons/core/tree/Tree.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/smart_toolShed/commons/core/tree/Tree.py	Thu Jan 17 10:52:14 2013 -0500
@@ -0,0 +1,122 @@
+import os, re, sys
+
+class Tree:
+
+    def __init__( self, inFileName="" ):
+        self.tree = None
+        self.inFileName = inFileName
+        if self.inFileName != "":
+            self.loadTree()
+            
+    def loadTree( self, verbose=0 ):
+        inF = open( self.inFileName, "r" )
+        lines = inF.readlines()
+        inF.close()
+        line = "".join(lines).replace("\n","")
+        self.tree = self.parseTree( line )
+        if verbose > 0:
+            print "nb of leaves: %i" % ( self.getNbOfLeaves( self.tree ) )
+        
+    def parseTree( self, sTree ):
+        if "," not in sTree:
+            name, length = sTree.split(":")
+            return self.makeLeaf( name, float(length) )
+        
+        distPattern = re.compile(r'(?P<tree>\(.+\))\:(?P<length>[e\-\d\.]+)$')
+	m = distPattern.search( sTree )
+	length = 0
+	if m:			
+            if m.group('length'): length = float( m.group('length') )
+            sTree = m.group('tree')
+	if length == "": length = 0
+        
+        lhs, rhs = self.parseSubTree( sTree )
+        
+        return { "name": "internal",
+                       "left": self.parseTree( lhs ),
+                       "right": self.parseTree( rhs ),
+                       "length": length }
+        
+    def makeLeaf( self, name, length ):
+        return { "left":None, "right":None, "name":name, "length":length }
+    
+    def parseSubTree( self, sTree ):
+        """
+        Parse a newick-formatted string of type 'a,b' into [a,b]
+        """
+        chars = list( sTree[1:-1] )
+        count = 0
+        isLhs = True
+        leftS = ""
+	rightS = ""
+	for c in chars:
+            if c == "(":
+                count += 1
+            elif c == ")":
+                count -= 1
+            elif (c == ",") and (count == 0) and (isLhs) :
+                isLhs = False
+                continue
+            if isLhs: leftS += c
+            else: rightS += c
+	return [ leftS, rightS ]
+    
+    def toNewick( self, tree ):
+        newString = ""
+        if tree["name"] is not "internal":
+            newString += tree["name"]
+        else:
+            newString += "("
+            newString += self.toNewick( tree["left"] )
+            newString += ","
+            newString += self.toNewick( tree["right"] )
+            newString += ")"
+        if tree["length"]:
+            newString += ":"
+            newString += "%f" % ( tree["length"] )
+	return newString
+    
+    def saveTree( self, outFileName ):
+        outF = open( outFileName, "w" )
+        outF.write( self.toNewick( self.tree ) )
+        outF.close()
+        
+    def replaceHeaderViaPrefixSearch( self, tree, dNew2Init ):
+        if dNew2Init.has_key( tree["name"] ):
+            tree["name"] = dNew2Init[ tree["name"] ].replace(" ","_").replace("::","-").replace(",","-")
+        if tree["left"] != None:
+            self.replaceHeaderViaPrefixSearch( tree["left"], dNew2Init )
+        if tree["right"] != None:
+            self.replaceHeaderViaPrefixSearch( tree["right"], dNew2Init )
+            
+    def retrieveInitialSequenceHeaders( self, dNew2Init, outFileName  ):
+        tree = self.tree
+        self.replaceHeaderViaPrefixSearch( tree, dNew2Init )
+        self.tree = tree
+        self.saveTree( outFileName )
+
+    def getNbOfChildNodes( self, tree, nbNodes ):
+        if tree["left"] is not None:
+            nbNodes += 1
+            nbNodes = self.getNbOfChildNodes( tree["left"], nbNodes )
+        if tree["right"] is not None:
+            nbNodes += 1
+            nbNodes = self.getNbOfChildNodes( tree["right"], nbNodes )
+        return nbNodes
+    
+    def getNbOfNodes( self ):
+        nbNodes = 0
+        return self.getNbOfChildNodes( self.tree, nbNodes )
+    
+    def getNbOfChildLeaves( self, tree, nbLeaves ):
+        if tree["name"] != "internal":
+            nbLeaves += 1
+        if tree["left"] is not None:
+            nbLeaves = self.getNbOfChildLeaves( tree["left"], nbLeaves )
+        if tree["right"] is not None:
+            nbLeaves = self.getNbOfChildLeaves( tree["right"], nbLeaves )
+        return nbLeaves
+    
+    def getNbOfLeaves( self ):
+        nbLeaves = 0
+        return self.getNbOfChildLeaves( self.tree, nbLeaves )