6
|
1 import time
|
|
2 import os
|
|
3
|
|
4 class PalsToAlign(object):
|
|
5 """
|
|
6 Convert the output from PALS (GFF2 format) into the 'align' format.
|
|
7 """
|
|
8 def __init__(self,inputPalsFileName="" , outputAlignFileName="", removeSameSequences=False):
|
|
9 self._removeSameSequences = removeSameSequences
|
|
10 self._inputPalsFileName = inputPalsFileName
|
|
11 self._outputAlignFileName = outputAlignFileName
|
|
12
|
|
13 def run (self):
|
|
14 file = open(self._inputPalsFileName, "r")
|
|
15 tmpFileName = "PalsToAlign%s"%str(os.getpid() )
|
|
16 tmpFile = open(tmpFileName, "w")
|
|
17
|
|
18 for line in file.readlines():
|
|
19
|
|
20 if line == "":
|
|
21 break
|
|
22
|
|
23 data = line.split("\t")
|
|
24
|
|
25 qryName = data[0]
|
|
26 source = data[1]
|
|
27 feature = data[2]
|
|
28 qryStart = data[3]
|
|
29 qryEnd = data[4]
|
|
30 score = data[5]
|
|
31 strand = data[6]
|
|
32 frame = data[7]
|
|
33 attributes = data[8][:-1].split()
|
|
34
|
|
35 sbjName = attributes[1]
|
|
36 sbjStart = attributes[2]
|
|
37 sbjEnd = attributes[3][:-1]
|
|
38 percId = (1 - float(attributes[-1])) * 100.0
|
|
39
|
|
40 if strand != "+":
|
|
41 tmp = sbjStart
|
|
42 sbjStart = sbjEnd
|
|
43 sbjEnd = tmp
|
|
44
|
|
45 if self._removeSameSequences \
|
|
46 and "chunk" in qryName and "chunk" in sbjName \
|
|
47 and min(int(qryStart), int(qryEnd)) == 1 \
|
|
48 and min(int(sbjStart), int(sbjEnd)) == 1 \
|
|
49 and percId == 100.0:
|
|
50 line = self.inFile.readline()
|
|
51 continue
|
|
52
|
|
53 if qryStart < qryEnd:
|
|
54 alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, "0.0", score, percId)
|
|
55 else:
|
|
56 alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryEnd, qryStart, sbjName, sbjEnd, sbjStart, "0.0", score, percId)
|
|
57
|
|
58 tmpFile.write(alignLine)
|
|
59
|
|
60 file.close()
|
|
61 tmpFile.close()
|
|
62
|
|
63 os.system("sort -k 1,1 -k 4,4 -k 2,2n -k 3,3n -k 5,5n -k 6,6n -k 8,8n %s > %s" % (tmpFileName, self._outputAlignFileName))
|
|
64 os.remove(tmpFileName)
|
|
65
|
|
66
|