view commons/core/parsing/PilerTAToGrouperMap.py @ 9:1eb55963fe39

Updated CompareOverlappingSmall*.py
author m-zytnicki
date Thu, 14 Mar 2013 05:23:05 -0400
parents 769e306b7933
children
line wrap: on
line source

import time
import os

class PilerTAToGrouperMap(object):
    """
   Convert the output file from Piler into grouper format.
    """
    def __init__(self, inputGffFileName, inputPYRFileName, inputMOTIFFileName, outputFileName):
        self._inputGffFileName = inputGffFileName
        self._inputPYRFileName = inputPYRFileName
        self._inputMOTIFFileName = inputMOTIFFileName
        self._outFileName = outputFileName

    def run (self):
        inFileGff = open( self._inputGffFileName, "r" )  
        inFilePyr = open( self._inputPYRFileName, "r" )   
        outFile = open(self._outFileName,"w") 
        
        #step 0 : get pile Info and write out an info file
        for pyrLine in inFilePyr.readlines():#-tan_pyr.gff
            if pyrLine == "":
                break
            pileIndex = ""
            pyrIndex = pyrLine.split('\t')[8].replace ('PyramidIndex', 'Pyramid')
            for gffLine in inFileGff.readlines(): #-tan.gff
                if gffLine == "":
                    break
                if pyrIndex in gffLine:
                    pileIndex = gffLine.split(';')[1].strip()
                    break    
            line = "%s\t%s" % (pileIndex, pyrIndex)
            outFile.write(line)
           
        inFilePyr.close()
        inFileGff.close()
        outFile.close()    
                
        #Step 1 : Add pile info to motif file and write out two files one with grouperID and one in map format
        outFileMotifGrpFileName = self._inputMOTIFFileName + ".grp"
        outFileMotifGrpMapFileName = self._inputMOTIFFileName + ".grp.map"
        
        inFileInfo = open(self._outFileName,"r") 
        inFileMotif = open(self._inputMOTIFFileName, "r" )
        outFileMotifGrp = open(outFileMotifGrpFileName, "w" )
        outFileMotifGrpMap = open(outFileMotifGrpMapFileName, "w" )
         
        inFileInfos = inFileInfo.readlines()
        lineInfoIndex = 0
        
        for countMotif,lineMotif in enumerate(inFileMotif.readlines()):
            if lineMotif == "":
                    break
            dataMotif = lineMotif.split(';')
            motif, pyrNameMotif  = dataMotif[:2]
            pyrNameMotif = pyrNameMotif.strip()
            pileNameMotif = ""
            
            while lineInfoIndex < len(inFileInfos):
                lineInfo = inFileInfos[lineInfoIndex]
                if lineInfo == "":
                    break
                if pyrNameMotif in lineInfo:          
                    pileNameMotif = lineInfo.split('\t')[0]
                    break
                lineInfoIndex +=1
                
            #translate to Grouper IdFormat
            pyrID = pyrNameMotif.split(' ')[1]
            pileID = pileNameMotif.split(' ')[1]
            dataMotif = motif.split ('\t')
            chrm = dataMotif [0]
            start,end = dataMotif [3:5]
            countMotif += 1
            memberID = "MbS%sGr" % (countMotif) + pyrID + "Cl" + pileID
            
            stringMotif = "%s\t%s\t%s\t%s\n" % ( memberID, motif, pileNameMotif, pyrNameMotif)
            outFileMotifGrp.write( stringMotif)
    
            stringGrpMap = "%s\t%s\t%s\t%s\n" % ( memberID, chrm, start, end )
            outFileMotifGrpMap.write( stringGrpMap )    
          
        inFileMotif.close()
        inFileInfo.close()
        outFileMotifGrp.close()
        outFileMotifGrpMap.close()