comparison spring_package/Alignment.py @ 17:c790d25086dc draft

"planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
author guerler
date Wed, 28 Oct 2020 05:11:56 +0000
parents
children
comparison
equal deleted inserted replaced
16:16eb2acaaa20 17:c790d25086dc
1 from Bio import pairwise2
2
3 class Alignment:
4 def __init__(self, fileName):
5 self.queryName = None
6 self.queryStart = list()
7 self.queryAlignment = list()
8 self.templateName = None
9 self.templateStart = list()
10 self.templateAlignment = list()
11 self.readFile(fileName)
12
13 def readFile(self, fileName):
14 with open(fileName) as file:
15 for index, line in enumerate(file):
16 cols = line.split()
17 if len(cols) > 1 and cols[0] == "Query":
18 self.queryName = cols[1].split()[0][0:14]
19 if len(cols) > 1 and cols[0].startswith(">"):
20 self.templateName = cols[0][1:]
21 if self.queryName and self.templateName:
22 if len(cols) > 2:
23 if cols[0] == "Q" and cols[1] == self.queryName:
24 self.queryStart.append(self.getStart(cols[2]))
25 self.queryAlignment.append(cols[3])
26 if cols[0] == "T" and cols[1] == self.templateName:
27 self.templateStart.append(self.getStart(cols[2]))
28 self.templateAlignment.append(cols[3])
29 if len(cols) > 1 and cols[0] == "No" and cols[1] == "2":
30 break
31
32 def createModel(self, templateChain):
33 hhrMapping = self.mapSequence(templateChain)
34 previousResidue = dict()
35 for residueNumber in templateChain:
36 templateResidue = templateChain[residueNumber]["residue"]
37 previousResidue[residueNumber] = templateResidue
38 templateChain[residueNumber]["residue"] = None
39 tCount = 0
40 for i in range(len(self.templateAlignment)):
41 templateSequence = self.templateAlignment[i]
42 querySequence = self.queryAlignment[i]
43 queryStart = self.queryStart[i]
44 n = len(querySequence)
45 qCount = 0
46 for j in range(n):
47 qs = querySequence[j]
48 ts = templateSequence[j]
49 rs = hhrMapping[tCount]
50 if rs != "-" and qs != "-" and ts != "-":
51 residueNumber = rs
52 if residueNumber in templateChain:
53 pr = previousResidue[residueNumber]
54 if pr != self.toThreeAmino(ts):
55 print ("Warning: Ignoring mismatching residue [%s != %s]." % (pr, self.toThreeAmino(ts)))
56 templateChain[residueNumber]["residue"] = self.toThreeAmino(qs)
57 templateChain[residueNumber]["residueNumber"] = queryStart + qCount
58 else:
59 print ("Warning: Skipping missing residue [%s]." % residueNumber)
60 if qs != "-":
61 qCount = qCount + 1
62 if ts != "-":
63 tCount = tCount + 1
64
65 def mapSequence(self, templateChain):
66 pdbSequence = ""
67 for residueNumber in templateChain:
68 templateResidue = templateChain[residueNumber]["residue"]
69 pdbSequence = pdbSequence + self.toSingleAmino(templateResidue)
70 hhrSequence = ""
71 for i in range(len(self.templateAlignment)):
72 templateSequence = self.templateAlignment[i]
73 for s in templateSequence:
74 if s != "-":
75 hhrSequence = hhrSequence + s
76 alignments = pairwise2.align.globalxx(pdbSequence, hhrSequence)
77 pdbAlignment = alignments[0].seqA
78 hhrAlignment = alignments[0].seqB
79 pCount = 0
80 hCount = 0
81 hhrMapping = []
82 for i in range(len(pdbAlignment)):
83 p = pdbAlignment[i]
84 h = hhrAlignment[i]
85 if h != "-":
86 residueIndex = pCount if p != "-" else p
87 hhrMapping.append(residueIndex)
88 if p != "-":
89 pCount = pCount + 1
90 sortedResidueIndex = sorted(templateChain.keys())
91 for i in range(len(hhrMapping)):
92 if hhrMapping[i] != "-":
93 hhrMapping[i] = sortedResidueIndex[hhrMapping[i]]
94 return hhrMapping
95
96 def getStart(self, x):
97 try:
98 return int(x)
99 except:
100 raise Exception("Invalid start index in alignment [%s]." % x)
101
102 def toThreeAmino(self, seq):
103 code = dict(G="GLY", A="ALA", V="VAL", L="LEU", I="ILE", M="MET", F="PHE", P="PRO", Y="TYR", W="TRP",
104 K="LYS", S="SER", C="CYS", N="ASN", Q="GLN", H="HIS", T="THR", E="GLU", D="ASP", R="ARG")
105 return code[seq] if seq in code else "XXX"
106
107 def toSingleAmino(self, seq):
108 code = dict(GLY="G", ALA="A", VAL="V", LEU="L", ILE="I", MET="M", PHE="F", PRO="P", TYR="Y", TRP="W",
109 LYS="K", SER="S", CYS="C", ASN="N", GLN="Q", HIS="H", THR="T", GLU="E", ASP="D", ARG="R")
110 return code[seq] if seq in code else "X"