diff commons/core/parsing/PilerTAToGrouperMap.py @ 36:44d5973c188c

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 15:02:29 -0400
parents 769e306b7933
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/PilerTAToGrouperMap.py	Tue Apr 30 15:02:29 2013 -0400
@@ -0,0 +1,85 @@
+import time
+import os
+
+class PilerTAToGrouperMap(object):
+    """
+   Convert the output file from Piler into grouper format.
+    """
+    def __init__(self, inputGffFileName, inputPYRFileName, inputMOTIFFileName, outputFileName):
+        self._inputGffFileName = inputGffFileName
+        self._inputPYRFileName = inputPYRFileName
+        self._inputMOTIFFileName = inputMOTIFFileName
+        self._outFileName = outputFileName
+
+    def run (self):
+        inFileGff = open( self._inputGffFileName, "r" )  
+        inFilePyr = open( self._inputPYRFileName, "r" )   
+        outFile = open(self._outFileName,"w") 
+        
+        #step 0 : get pile Info and write out an info file
+        for pyrLine in inFilePyr.readlines():#-tan_pyr.gff
+            if pyrLine == "":
+                break
+            pileIndex = ""
+            pyrIndex = pyrLine.split('\t')[8].replace ('PyramidIndex', 'Pyramid')
+            for gffLine in inFileGff.readlines(): #-tan.gff
+                if gffLine == "":
+                    break
+                if pyrIndex in gffLine:
+                    pileIndex = gffLine.split(';')[1].strip()
+                    break    
+            line = "%s\t%s" % (pileIndex, pyrIndex)
+            outFile.write(line)
+           
+        inFilePyr.close()
+        inFileGff.close()
+        outFile.close()    
+                
+        #Step 1 : Add pile info to motif file and write out two files one with grouperID and one in map format
+        outFileMotifGrpFileName = self._inputMOTIFFileName + ".grp"
+        outFileMotifGrpMapFileName = self._inputMOTIFFileName + ".grp.map"
+        
+        inFileInfo = open(self._outFileName,"r") 
+        inFileMotif = open(self._inputMOTIFFileName, "r" )
+        outFileMotifGrp = open(outFileMotifGrpFileName, "w" )
+        outFileMotifGrpMap = open(outFileMotifGrpMapFileName, "w" )
+         
+        inFileInfos = inFileInfo.readlines()
+        lineInfoIndex = 0
+        
+        for countMotif,lineMotif in enumerate(inFileMotif.readlines()):
+            if lineMotif == "":
+                    break
+            dataMotif = lineMotif.split(';')
+            motif, pyrNameMotif  = dataMotif[:2]
+            pyrNameMotif = pyrNameMotif.strip()
+            pileNameMotif = ""
+            
+            while lineInfoIndex < len(inFileInfos):
+                lineInfo = inFileInfos[lineInfoIndex]
+                if lineInfo == "":
+                    break
+                if pyrNameMotif in lineInfo:          
+                    pileNameMotif = lineInfo.split('\t')[0]
+                    break
+                lineInfoIndex +=1
+                
+            #translate to Grouper IdFormat
+            pyrID = pyrNameMotif.split(' ')[1]
+            pileID = pileNameMotif.split(' ')[1]
+            dataMotif = motif.split ('\t')
+            chrm = dataMotif [0]
+            start,end = dataMotif [3:5]
+            countMotif += 1
+            memberID = "MbS%sGr" % (countMotif) + pyrID + "Cl" + pileID
+            
+            stringMotif = "%s\t%s\t%s\t%s\n" % ( memberID, motif, pileNameMotif, pyrNameMotif)
+            outFileMotifGrp.write( stringMotif)
+    
+            stringGrpMap = "%s\t%s\t%s\t%s\n" % ( memberID, chrm, start, end )
+            outFileMotifGrpMap.write( stringGrpMap )    
+          
+        inFileMotif.close()
+        inFileInfo.close()
+        outFileMotifGrp.close()
+        outFileMotifGrpMap.close()