view smart_toolShed/commons/core/tree/Tree.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line source

import os, re, sys

class Tree:

    def __init__( self, inFileName="" ):
        self.tree = None
        self.inFileName = inFileName
        if self.inFileName != "":
            self.loadTree()
            
    def loadTree( self, verbose=0 ):
        inF = open( self.inFileName, "r" )
        lines = inF.readlines()
        inF.close()
        line = "".join(lines).replace("\n","")
        self.tree = self.parseTree( line )
        if verbose > 0:
            print "nb of leaves: %i" % ( self.getNbOfLeaves( self.tree ) )
        
    def parseTree( self, sTree ):
        if "," not in sTree:
            name, length = sTree.split(":")
            return self.makeLeaf( name, float(length) )
        
        distPattern = re.compile(r'(?P<tree>\(.+\))\:(?P<length>[e\-\d\.]+)$')
	m = distPattern.search( sTree )
	length = 0
	if m:			
            if m.group('length'): length = float( m.group('length') )
            sTree = m.group('tree')
	if length == "": length = 0
        
        lhs, rhs = self.parseSubTree( sTree )
        
        return { "name": "internal",
                       "left": self.parseTree( lhs ),
                       "right": self.parseTree( rhs ),
                       "length": length }
        
    def makeLeaf( self, name, length ):
        return { "left":None, "right":None, "name":name, "length":length }
    
    def parseSubTree( self, sTree ):
        """
        Parse a newick-formatted string of type 'a,b' into [a,b]
        """
        chars = list( sTree[1:-1] )
        count = 0
        isLhs = True
        leftS = ""
	rightS = ""
	for c in chars:
            if c == "(":
                count += 1
            elif c == ")":
                count -= 1
            elif (c == ",") and (count == 0) and (isLhs) :
                isLhs = False
                continue
            if isLhs: leftS += c
            else: rightS += c
	return [ leftS, rightS ]
    
    def toNewick( self, tree ):
        newString = ""
        if tree["name"] is not "internal":
            newString += tree["name"]
        else:
            newString += "("
            newString += self.toNewick( tree["left"] )
            newString += ","
            newString += self.toNewick( tree["right"] )
            newString += ")"
        if tree["length"]:
            newString += ":"
            newString += "%f" % ( tree["length"] )
	return newString
    
    def saveTree( self, outFileName ):
        outF = open( outFileName, "w" )
        outF.write( self.toNewick( self.tree ) )
        outF.close()
        
    def replaceHeaderViaPrefixSearch( self, tree, dNew2Init ):
        if dNew2Init.has_key( tree["name"] ):
            tree["name"] = dNew2Init[ tree["name"] ].replace(" ","_").replace("::","-").replace(",","-")
        if tree["left"] != None:
            self.replaceHeaderViaPrefixSearch( tree["left"], dNew2Init )
        if tree["right"] != None:
            self.replaceHeaderViaPrefixSearch( tree["right"], dNew2Init )
            
    def retrieveInitialSequenceHeaders( self, dNew2Init, outFileName  ):
        tree = self.tree
        self.replaceHeaderViaPrefixSearch( tree, dNew2Init )
        self.tree = tree
        self.saveTree( outFileName )

    def getNbOfChildNodes( self, tree, nbNodes ):
        if tree["left"] is not None:
            nbNodes += 1
            nbNodes = self.getNbOfChildNodes( tree["left"], nbNodes )
        if tree["right"] is not None:
            nbNodes += 1
            nbNodes = self.getNbOfChildNodes( tree["right"], nbNodes )
        return nbNodes
    
    def getNbOfNodes( self ):
        nbNodes = 0
        return self.getNbOfChildNodes( self.tree, nbNodes )
    
    def getNbOfChildLeaves( self, tree, nbLeaves ):
        if tree["name"] != "internal":
            nbLeaves += 1
        if tree["left"] is not None:
            nbLeaves = self.getNbOfChildLeaves( tree["left"], nbLeaves )
        if tree["right"] is not None:
            nbLeaves = self.getNbOfChildLeaves( tree["right"], nbLeaves )
        return nbLeaves
    
    def getNbOfLeaves( self ):
        nbLeaves = 0
        return self.getNbOfChildLeaves( self.tree, nbLeaves )