diff spring_package/Alignment.py @ 17:c790d25086dc draft

"planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
author guerler
date Wed, 28 Oct 2020 05:11:56 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spring_package/Alignment.py	Wed Oct 28 05:11:56 2020 +0000
@@ -0,0 +1,110 @@
+from Bio import pairwise2
+
+class Alignment:
+	def __init__(self, fileName):
+		self.queryName = None
+		self.queryStart = list()
+		self.queryAlignment = list()
+		self.templateName = None
+		self.templateStart = list()
+		self.templateAlignment = list()
+		self.readFile(fileName)
+
+	def readFile(self, fileName):
+		with open(fileName) as file:
+			for index, line in enumerate(file):
+				cols = line.split()
+				if len(cols) > 1 and cols[0] == "Query":
+					self.queryName = cols[1].split()[0][0:14]
+				if len(cols) > 1 and cols[0].startswith(">"):
+					self.templateName = cols[0][1:]
+				if self.queryName and self.templateName:
+					if len(cols) > 2:
+						if cols[0] == "Q" and cols[1] == self.queryName:
+							self.queryStart.append(self.getStart(cols[2]))
+							self.queryAlignment.append(cols[3])
+						if cols[0] == "T" and cols[1] == self.templateName:
+							self.templateStart.append(self.getStart(cols[2]))
+							self.templateAlignment.append(cols[3])
+				if len(cols) > 1 and cols[0] == "No" and cols[1] == "2":
+					break
+
+	def createModel(self, templateChain):
+		hhrMapping = self.mapSequence(templateChain)
+		previousResidue = dict()
+		for residueNumber in templateChain:
+			templateResidue = templateChain[residueNumber]["residue"]
+			previousResidue[residueNumber] = templateResidue
+			templateChain[residueNumber]["residue"] = None
+		tCount = 0
+		for i in range(len(self.templateAlignment)):
+			templateSequence = self.templateAlignment[i]
+			querySequence = self.queryAlignment[i]
+			queryStart = self.queryStart[i]
+			n = len(querySequence)
+			qCount = 0
+			for j in range(n):
+				qs = querySequence[j]
+				ts = templateSequence[j]
+				rs = hhrMapping[tCount]
+				if rs != "-" and qs != "-" and ts != "-":
+					residueNumber = rs
+					if residueNumber in templateChain:
+						pr = previousResidue[residueNumber]
+						if pr != self.toThreeAmino(ts):
+							print ("Warning: Ignoring mismatching residue [%s != %s]." % (pr, self.toThreeAmino(ts)))
+						templateChain[residueNumber]["residue"] = self.toThreeAmino(qs)
+						templateChain[residueNumber]["residueNumber"] = queryStart + qCount
+					else:
+						print ("Warning: Skipping missing residue [%s]." % residueNumber)
+				if qs != "-":
+					qCount = qCount + 1
+				if ts != "-":
+					tCount = tCount + 1
+
+	def mapSequence(self, templateChain):
+		pdbSequence = ""
+		for residueNumber in templateChain:
+			templateResidue = templateChain[residueNumber]["residue"]
+			pdbSequence = pdbSequence + self.toSingleAmino(templateResidue)
+		hhrSequence = ""
+		for i in range(len(self.templateAlignment)):
+			templateSequence = self.templateAlignment[i]
+			for s in templateSequence:
+				if s != "-":
+					hhrSequence = hhrSequence + s
+		alignments = pairwise2.align.globalxx(pdbSequence, hhrSequence)
+		pdbAlignment = alignments[0].seqA
+		hhrAlignment = alignments[0].seqB
+		pCount = 0
+		hCount = 0
+		hhrMapping = []
+		for i in range(len(pdbAlignment)):
+			p = pdbAlignment[i]
+			h = hhrAlignment[i]
+			if h != "-":
+				residueIndex = pCount if p != "-" else p 
+				hhrMapping.append(residueIndex)
+			if p != "-":
+				pCount = pCount + 1
+		sortedResidueIndex = sorted(templateChain.keys())
+		for i in range(len(hhrMapping)):
+			if hhrMapping[i] != "-":
+				hhrMapping[i] = sortedResidueIndex[hhrMapping[i]]
+		return hhrMapping
+
+	def getStart(self, x):
+		try:
+			return int(x)
+		except:
+			raise Exception("Invalid start index in alignment [%s]." % x)
+
+	def toThreeAmino(self, seq):
+		code = dict(G="GLY", A="ALA", V="VAL", L="LEU", I="ILE", M="MET", F="PHE", P="PRO", Y="TYR", W="TRP",
+					K="LYS", S="SER", C="CYS", N="ASN", Q="GLN", H="HIS", T="THR", E="GLU", D="ASP", R="ARG")
+		return code[seq] if seq in code else "XXX"
+
+	def toSingleAmino(self, seq):
+		code = dict(GLY="G", ALA="A", VAL="V", LEU="L", ILE="I", MET="M", PHE="F", PRO="P", TYR="Y", TRP="W",
+					LYS="K", SER="S", CYS="C", ASN="N", GLN="Q", HIS="H", THR="T", GLU="E", ASP="D", ARG="R")
+		return code[seq] if seq in code else "X"