comparison commons/core/parsing/PilerTAToGrouperMap.py @ 36:44d5973c188c

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 15:02:29 -0400
parents 769e306b7933
children
comparison
equal deleted inserted replaced
35:d94018ca4ada 36:44d5973c188c
1 import time
2 import os
3
4 class PilerTAToGrouperMap(object):
5 """
6 Convert the output file from Piler into grouper format.
7 """
8 def __init__(self, inputGffFileName, inputPYRFileName, inputMOTIFFileName, outputFileName):
9 self._inputGffFileName = inputGffFileName
10 self._inputPYRFileName = inputPYRFileName
11 self._inputMOTIFFileName = inputMOTIFFileName
12 self._outFileName = outputFileName
13
14 def run (self):
15 inFileGff = open( self._inputGffFileName, "r" )
16 inFilePyr = open( self._inputPYRFileName, "r" )
17 outFile = open(self._outFileName,"w")
18
19 #step 0 : get pile Info and write out an info file
20 for pyrLine in inFilePyr.readlines():#-tan_pyr.gff
21 if pyrLine == "":
22 break
23 pileIndex = ""
24 pyrIndex = pyrLine.split('\t')[8].replace ('PyramidIndex', 'Pyramid')
25 for gffLine in inFileGff.readlines(): #-tan.gff
26 if gffLine == "":
27 break
28 if pyrIndex in gffLine:
29 pileIndex = gffLine.split(';')[1].strip()
30 break
31 line = "%s\t%s" % (pileIndex, pyrIndex)
32 outFile.write(line)
33
34 inFilePyr.close()
35 inFileGff.close()
36 outFile.close()
37
38 #Step 1 : Add pile info to motif file and write out two files one with grouperID and one in map format
39 outFileMotifGrpFileName = self._inputMOTIFFileName + ".grp"
40 outFileMotifGrpMapFileName = self._inputMOTIFFileName + ".grp.map"
41
42 inFileInfo = open(self._outFileName,"r")
43 inFileMotif = open(self._inputMOTIFFileName, "r" )
44 outFileMotifGrp = open(outFileMotifGrpFileName, "w" )
45 outFileMotifGrpMap = open(outFileMotifGrpMapFileName, "w" )
46
47 inFileInfos = inFileInfo.readlines()
48 lineInfoIndex = 0
49
50 for countMotif,lineMotif in enumerate(inFileMotif.readlines()):
51 if lineMotif == "":
52 break
53 dataMotif = lineMotif.split(';')
54 motif, pyrNameMotif = dataMotif[:2]
55 pyrNameMotif = pyrNameMotif.strip()
56 pileNameMotif = ""
57
58 while lineInfoIndex < len(inFileInfos):
59 lineInfo = inFileInfos[lineInfoIndex]
60 if lineInfo == "":
61 break
62 if pyrNameMotif in lineInfo:
63 pileNameMotif = lineInfo.split('\t')[0]
64 break
65 lineInfoIndex +=1
66
67 #translate to Grouper IdFormat
68 pyrID = pyrNameMotif.split(' ')[1]
69 pileID = pileNameMotif.split(' ')[1]
70 dataMotif = motif.split ('\t')
71 chrm = dataMotif [0]
72 start,end = dataMotif [3:5]
73 countMotif += 1
74 memberID = "MbS%sGr" % (countMotif) + pyrID + "Cl" + pileID
75
76 stringMotif = "%s\t%s\t%s\t%s\n" % ( memberID, motif, pileNameMotif, pyrNameMotif)
77 outFileMotifGrp.write( stringMotif)
78
79 stringGrpMap = "%s\t%s\t%s\t%s\n" % ( memberID, chrm, start, end )
80 outFileMotifGrpMap.write( stringGrpMap )
81
82 inFileMotif.close()
83 inFileInfo.close()
84 outFileMotifGrp.close()
85 outFileMotifGrpMap.close()