diff commons/core/parsing/PalsToAlign.py @ 38:2c0c0a89fad7

Uploaded
author m-zytnicki
date Thu, 02 May 2013 09:56:47 -0400
parents 769e306b7933
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/PalsToAlign.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,66 @@
+import time
+import os
+
+class PalsToAlign(object):
+    """
+    Convert the output from PALS (GFF2 format) into the 'align' format.
+    """
+    def __init__(self,inputPalsFileName="" , outputAlignFileName="", removeSameSequences=False):
+        self._removeSameSequences = removeSameSequences
+        self._inputPalsFileName = inputPalsFileName
+        self._outputAlignFileName = outputAlignFileName
+
+    def run (self):
+        file = open(self._inputPalsFileName, "r")
+        tmpFileName = "PalsToAlign%s"%str(os.getpid() ) 
+        tmpFile = open(tmpFileName, "w")
+        
+        for line in file.readlines():
+    
+            if line == "":
+                break
+    
+            data = line.split("\t")
+    
+            qryName = data[0]
+            source = data[1]
+            feature = data[2]
+            qryStart = data[3]
+            qryEnd = data[4]
+            score = data[5]
+            strand = data[6]
+            frame = data[7]
+            attributes = data[8][:-1].split()
+    
+            sbjName = attributes[1]
+            sbjStart = attributes[2]
+            sbjEnd = attributes[3][:-1]
+            percId = (1 - float(attributes[-1])) * 100.0
+    
+            if strand != "+":
+                tmp = sbjStart
+                sbjStart = sbjEnd
+                sbjEnd = tmp
+    
+            if self._removeSameSequences \
+            and "chunk" in qryName and "chunk" in sbjName \
+            and min(int(qryStart), int(qryEnd)) == 1 \
+            and min(int(sbjStart), int(sbjEnd)) == 1 \
+            and percId == 100.0:
+                line = self.inFile.readline()
+                continue
+    
+            if qryStart < qryEnd:
+                alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, "0.0", score, percId)
+            else:
+                alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryEnd, qryStart, sbjName, sbjEnd, sbjStart, "0.0", score, percId)
+    
+            tmpFile.write(alignLine)
+
+        file.close()
+        tmpFile.close()
+    
+        os.system("sort -k 1,1 -k 4,4 -k 2,2n -k 3,3n -k 5,5n -k 6,6n -k 8,8n %s > %s" % (tmpFileName, self._outputAlignFileName))
+        os.remove(tmpFileName)
+
+