annotate spring_package/Utilities.py @ 39:172398348efd draft

"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
author guerler
date Fri, 22 Jan 2021 15:50:27 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
1 from os.path import isfile
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
2
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
3
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
4 def validateIdentifier(identifier):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
5 if len(identifier) < 6 or identifier[4:5] != "_":
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
6 raise Exception("Invalid list entry (`PDB_CHAIN`): %s." % identifier)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
7
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
8
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
9 def getId(identifier):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
10 identifier = identifier.strip()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
11 validateIdentifier(identifier)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
12 return identifier[:4].upper() + identifier[4:6]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
13
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
14
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
15 def getChain(identifier):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
16 validateIdentifier(identifier)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
17 pdbChain = identifier[5:6]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
18 return pdbChain
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
19
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
20
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
21 def getName(identifier):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
22 pdb = identifier[:4].lower()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
23 return pdb
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
24
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
25
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
26 def getCrossReference(crossReferenceFile, allPartners=False):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
27 crossReference = dict()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
28 crossCount = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
29 with open(crossReferenceFile) as file:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
30 for line in file:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
31 columns = line.split()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
32 if len(columns) < 2:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
33 raise Exception("Invalid Cross Reference Entry %s." % line)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
34 core = columns[0]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
35 partner = columns[1]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
36 if len(columns) < 4:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
37 templates = [core, partner]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
38 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
39 templates = [columns[2], columns[3]]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
40 if core not in crossReference:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
41 crossReference[core] = dict(partners=list(), templates=list())
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
42 if allPartners or partner not in crossReference[core]["partners"]:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
43 crossReference[core]["partners"].append(partner)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
44 crossReference[core]["templates"].append(templates)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
45 crossCount = crossCount + 1
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
46 print("Identified %s reference interactions." % crossCount)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
47 return crossReference
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
48
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
49
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
50 def getTemplates(hhrFile, minScore=10):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
51 result = dict()
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
52 topTemplate = None
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
53 if isfile(hhrFile):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
54 with open(hhrFile) as file:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
55 for index, line in enumerate(file):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
56 if index > 8:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
57 if not line.strip():
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
58 break
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
59 templateId = line[4:10]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
60 templateScore = float(line[57:63])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
61 if templateScore > minScore:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
62 if topTemplate is None:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
63 topTemplate = templateId
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
64 result[templateId] = templateScore
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
diff changeset
65 return topTemplate, result