Mercurial > repos > yufei-luo > s_mart
diff commons/core/parsing/PalsToAlign.py @ 38:2c0c0a89fad7
Uploaded
author | m-zytnicki |
---|---|
date | Thu, 02 May 2013 09:56:47 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/core/parsing/PalsToAlign.py Thu May 02 09:56:47 2013 -0400 @@ -0,0 +1,66 @@ +import time +import os + +class PalsToAlign(object): + """ + Convert the output from PALS (GFF2 format) into the 'align' format. + """ + def __init__(self,inputPalsFileName="" , outputAlignFileName="", removeSameSequences=False): + self._removeSameSequences = removeSameSequences + self._inputPalsFileName = inputPalsFileName + self._outputAlignFileName = outputAlignFileName + + def run (self): + file = open(self._inputPalsFileName, "r") + tmpFileName = "PalsToAlign%s"%str(os.getpid() ) + tmpFile = open(tmpFileName, "w") + + for line in file.readlines(): + + if line == "": + break + + data = line.split("\t") + + qryName = data[0] + source = data[1] + feature = data[2] + qryStart = data[3] + qryEnd = data[4] + score = data[5] + strand = data[6] + frame = data[7] + attributes = data[8][:-1].split() + + sbjName = attributes[1] + sbjStart = attributes[2] + sbjEnd = attributes[3][:-1] + percId = (1 - float(attributes[-1])) * 100.0 + + if strand != "+": + tmp = sbjStart + sbjStart = sbjEnd + sbjEnd = tmp + + if self._removeSameSequences \ + and "chunk" in qryName and "chunk" in sbjName \ + and min(int(qryStart), int(qryEnd)) == 1 \ + and min(int(sbjStart), int(sbjEnd)) == 1 \ + and percId == 100.0: + line = self.inFile.readline() + continue + + if qryStart < qryEnd: + alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, "0.0", score, percId) + else: + alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryEnd, qryStart, sbjName, sbjEnd, sbjStart, "0.0", score, percId) + + tmpFile.write(alignLine) + + file.close() + tmpFile.close() + + os.system("sort -k 1,1 -k 4,4 -k 2,2n -k 3,3n -k 5,5n -k 6,6n -k 8,8n %s > %s" % (tmpFileName, self._outputAlignFileName)) + os.remove(tmpFileName) + +