comparison commons/core/parsing/PalsToAlign.py @ 38:2c0c0a89fad7

Uploaded
author m-zytnicki
date Thu, 02 May 2013 09:56:47 -0400
parents 769e306b7933
children
comparison
equal deleted inserted replaced
37:d22fadc825e3 38:2c0c0a89fad7
1 import time
2 import os
3
4 class PalsToAlign(object):
5 """
6 Convert the output from PALS (GFF2 format) into the 'align' format.
7 """
8 def __init__(self,inputPalsFileName="" , outputAlignFileName="", removeSameSequences=False):
9 self._removeSameSequences = removeSameSequences
10 self._inputPalsFileName = inputPalsFileName
11 self._outputAlignFileName = outputAlignFileName
12
13 def run (self):
14 file = open(self._inputPalsFileName, "r")
15 tmpFileName = "PalsToAlign%s"%str(os.getpid() )
16 tmpFile = open(tmpFileName, "w")
17
18 for line in file.readlines():
19
20 if line == "":
21 break
22
23 data = line.split("\t")
24
25 qryName = data[0]
26 source = data[1]
27 feature = data[2]
28 qryStart = data[3]
29 qryEnd = data[4]
30 score = data[5]
31 strand = data[6]
32 frame = data[7]
33 attributes = data[8][:-1].split()
34
35 sbjName = attributes[1]
36 sbjStart = attributes[2]
37 sbjEnd = attributes[3][:-1]
38 percId = (1 - float(attributes[-1])) * 100.0
39
40 if strand != "+":
41 tmp = sbjStart
42 sbjStart = sbjEnd
43 sbjEnd = tmp
44
45 if self._removeSameSequences \
46 and "chunk" in qryName and "chunk" in sbjName \
47 and min(int(qryStart), int(qryEnd)) == 1 \
48 and min(int(sbjStart), int(sbjEnd)) == 1 \
49 and percId == 100.0:
50 line = self.inFile.readline()
51 continue
52
53 if qryStart < qryEnd:
54 alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, "0.0", score, percId)
55 else:
56 alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryEnd, qryStart, sbjName, sbjEnd, sbjStart, "0.0", score, percId)
57
58 tmpFile.write(alignLine)
59
60 file.close()
61 tmpFile.close()
62
63 os.system("sort -k 1,1 -k 4,4 -k 2,2n -k 3,3n -k 5,5n -k 6,6n -k 8,8n %s > %s" % (tmpFileName, self._outputAlignFileName))
64 os.remove(tmpFileName)
65
66