Mercurial > repos > guerler > springsuite
diff spring_package/Alignment.py @ 37:0be0af9e695d draft
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
author | guerler |
---|---|
date | Wed, 25 Nov 2020 14:35:35 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spring_package/Alignment.py Wed Nov 25 14:35:35 2020 +0000 @@ -0,0 +1,110 @@ +from Bio import pairwise2 + + +class Alignment: + def __init__(self, fileName): + self.queryName = None + self.queryStart = list() + self.queryAlignment = list() + self.templateName = None + self.templateStart = list() + self.templateAlignment = list() + self.readFile(fileName) + + def readFile(self, fileName): + with open(fileName) as file: + for line in file: + cols = line.split() + if len(cols) > 1 and cols[0] == "Query": + self.queryName = cols[1].split()[0][0:14] + if len(cols) > 1 and cols[0].startswith(">"): + self.templateName = cols[0][1:] + if self.queryName and self.templateName: + if len(cols) > 2: + if cols[0] == "Q" and cols[1] == self.queryName: + self.queryStart.append(self.getStart(cols[2])) + self.queryAlignment.append(cols[3]) + if cols[0] == "T" and cols[1] == self.templateName: + self.templateStart.append(self.getStart(cols[2])) + self.templateAlignment.append(cols[3]) + if len(cols) > 1 and cols[0] == "No" and cols[1] == "2": + break + + def createModel(self, templateChain): + hhrMapping = self.mapSequence(templateChain) + previousResidue = dict() + for residueNumber in templateChain: + templateResidue = templateChain[residueNumber]["residue"] + previousResidue[residueNumber] = templateResidue + templateChain[residueNumber]["residue"] = None + tCount = 0 + for i in range(len(self.templateAlignment)): + templateSequence = self.templateAlignment[i] + querySequence = self.queryAlignment[i] + queryStart = self.queryStart[i] + n = len(querySequence) + qCount = 0 + for j in range(n): + qs = querySequence[j] + ts = templateSequence[j] + rs = hhrMapping[tCount] + if rs != "-" and qs != "-" and ts != "-": + residueNumber = rs + if residueNumber in templateChain: + pr = previousResidue[residueNumber] + if pr != self.toThreeAmino(ts): + print("Warning: Ignoring mismatching residue [%s != %s]." % (pr, self.toThreeAmino(ts))) + templateChain[residueNumber]["residue"] = self.toThreeAmino(qs) + templateChain[residueNumber]["residueNumber"] = queryStart + qCount + else: + print("Warning: Skipping missing residue [%s]." % residueNumber) + if qs != "-": + qCount = qCount + 1 + if ts != "-": + tCount = tCount + 1 + + def mapSequence(self, templateChain): + pdbSequence = "" + for residueNumber in templateChain: + templateResidue = templateChain[residueNumber]["residue"] + pdbSequence = pdbSequence + self.toSingleAmino(templateResidue) + hhrSequence = "" + for i in range(len(self.templateAlignment)): + templateSequence = self.templateAlignment[i] + for s in templateSequence: + if s != "-": + hhrSequence = hhrSequence + s + alignments = pairwise2.align.globalxx(pdbSequence, hhrSequence) + pdbAlignment = alignments[0].seqA + hhrAlignment = alignments[0].seqB + pCount = 0 + hhrMapping = [] + for i in range(len(pdbAlignment)): + p = pdbAlignment[i] + h = hhrAlignment[i] + if h != "-": + residueIndex = pCount if p != "-" else p + hhrMapping.append(residueIndex) + if p != "-": + pCount = pCount + 1 + sortedResidueIndex = sorted(templateChain.keys()) + for i in range(len(hhrMapping)): + if hhrMapping[i] != "-": + hhrMapping[i] = sortedResidueIndex[hhrMapping[i]] + return hhrMapping + + def getStart(self, x): + try: + return int(x) + except Exception: + raise Exception("Invalid start index in alignment [%s]." % x) + + def toThreeAmino(self, seq): + code = dict(G="GLY", A="ALA", V="VAL", L="LEU", I="ILE", M="MET", F="PHE", P="PRO", Y="TYR", W="TRP", + K="LYS", S="SER", C="CYS", N="ASN", Q="GLN", H="HIS", T="THR", E="GLU", D="ASP", R="ARG") + return code[seq] if seq in code else "XXX" + + def toSingleAmino(self, seq): + code = dict(GLY="G", ALA="A", VAL="V", LEU="L", ILE="I", MET="M", PHE="F", PRO="P", TYR="Y", TRP="W", + LYS="K", SER="S", CYS="C", ASN="N", GLN="Q", HIS="H", THR="T", GLU="E", ASP="D", ARG="R") + return code[seq] if seq in code else "X"