6
|
1 import time
|
|
2 import os
|
|
3
|
|
4 class PilerTAToGrouperMap(object):
|
|
5 """
|
|
6 Convert the output file from Piler into grouper format.
|
|
7 """
|
|
8 def __init__(self, inputGffFileName, inputPYRFileName, inputMOTIFFileName, outputFileName):
|
|
9 self._inputGffFileName = inputGffFileName
|
|
10 self._inputPYRFileName = inputPYRFileName
|
|
11 self._inputMOTIFFileName = inputMOTIFFileName
|
|
12 self._outFileName = outputFileName
|
|
13
|
|
14 def run (self):
|
|
15 inFileGff = open( self._inputGffFileName, "r" )
|
|
16 inFilePyr = open( self._inputPYRFileName, "r" )
|
|
17 outFile = open(self._outFileName,"w")
|
|
18
|
|
19 #step 0 : get pile Info and write out an info file
|
|
20 for pyrLine in inFilePyr.readlines():#-tan_pyr.gff
|
|
21 if pyrLine == "":
|
|
22 break
|
|
23 pileIndex = ""
|
|
24 pyrIndex = pyrLine.split('\t')[8].replace ('PyramidIndex', 'Pyramid')
|
|
25 for gffLine in inFileGff.readlines(): #-tan.gff
|
|
26 if gffLine == "":
|
|
27 break
|
|
28 if pyrIndex in gffLine:
|
|
29 pileIndex = gffLine.split(';')[1].strip()
|
|
30 break
|
|
31 line = "%s\t%s" % (pileIndex, pyrIndex)
|
|
32 outFile.write(line)
|
|
33
|
|
34 inFilePyr.close()
|
|
35 inFileGff.close()
|
|
36 outFile.close()
|
|
37
|
|
38 #Step 1 : Add pile info to motif file and write out two files one with grouperID and one in map format
|
|
39 outFileMotifGrpFileName = self._inputMOTIFFileName + ".grp"
|
|
40 outFileMotifGrpMapFileName = self._inputMOTIFFileName + ".grp.map"
|
|
41
|
|
42 inFileInfo = open(self._outFileName,"r")
|
|
43 inFileMotif = open(self._inputMOTIFFileName, "r" )
|
|
44 outFileMotifGrp = open(outFileMotifGrpFileName, "w" )
|
|
45 outFileMotifGrpMap = open(outFileMotifGrpMapFileName, "w" )
|
|
46
|
|
47 inFileInfos = inFileInfo.readlines()
|
|
48 lineInfoIndex = 0
|
|
49
|
|
50 for countMotif,lineMotif in enumerate(inFileMotif.readlines()):
|
|
51 if lineMotif == "":
|
|
52 break
|
|
53 dataMotif = lineMotif.split(';')
|
|
54 motif, pyrNameMotif = dataMotif[:2]
|
|
55 pyrNameMotif = pyrNameMotif.strip()
|
|
56 pileNameMotif = ""
|
|
57
|
|
58 while lineInfoIndex < len(inFileInfos):
|
|
59 lineInfo = inFileInfos[lineInfoIndex]
|
|
60 if lineInfo == "":
|
|
61 break
|
|
62 if pyrNameMotif in lineInfo:
|
|
63 pileNameMotif = lineInfo.split('\t')[0]
|
|
64 break
|
|
65 lineInfoIndex +=1
|
|
66
|
|
67 #translate to Grouper IdFormat
|
|
68 pyrID = pyrNameMotif.split(' ')[1]
|
|
69 pileID = pileNameMotif.split(' ')[1]
|
|
70 dataMotif = motif.split ('\t')
|
|
71 chrm = dataMotif [0]
|
|
72 start,end = dataMotif [3:5]
|
|
73 countMotif += 1
|
|
74 memberID = "MbS%sGr" % (countMotif) + pyrID + "Cl" + pileID
|
|
75
|
|
76 stringMotif = "%s\t%s\t%s\t%s\n" % ( memberID, motif, pileNameMotif, pyrNameMotif)
|
|
77 outFileMotifGrp.write( stringMotif)
|
|
78
|
|
79 stringGrpMap = "%s\t%s\t%s\t%s\n" % ( memberID, chrm, start, end )
|
|
80 outFileMotifGrpMap.write( stringGrpMap )
|
|
81
|
|
82 inFileMotif.close()
|
|
83 inFileInfo.close()
|
|
84 outFileMotifGrp.close()
|
|
85 outFileMotifGrpMap.close()
|