annotate spring_cross.py @ 38:80a4b98121b6 draft

"planemo upload commit 22cd6b0fa88ce0ddc4052beab306f5ba10754f12"
author guerler
date Wed, 25 Nov 2020 17:38:24 +0000
parents 0be0af9e695d
children 172398348efd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
37
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
1 #! /usr/bin/env python3
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
2 import argparse
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
3 from os import system
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
4
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
5 from spring_package.DBKit import createFile
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
6 from spring_package.Molecule import Molecule
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
7
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
8
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
9 def getId(line):
38
80a4b98121b6 "planemo upload commit 22cd6b0fa88ce0ddc4052beab306f5ba10754f12"
guerler
parents: 37
diff changeset
10 line = line.strip()
80a4b98121b6 "planemo upload commit 22cd6b0fa88ce0ddc4052beab306f5ba10754f12"
guerler
parents: 37
diff changeset
11 if len(line) != 6 or line[4:5] != "_":
80a4b98121b6 "planemo upload commit 22cd6b0fa88ce0ddc4052beab306f5ba10754f12"
guerler
parents: 37
diff changeset
12 raise Exception("Invalid list entry (`PDB_CHAIN`): %s." % line)
37
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
13 return line[:4].upper() + line[4:6]
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
14
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
15
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
16 def main(args):
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
17 logFile = open(args.log, "w")
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
18 system("mkdir -p %s" % args.temp)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
19 pdbCount = 0
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
20 partnerList = set()
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
21 entries = list()
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
22 with open(args.list) as file:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
23 for line in file:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
24 entries.append(getId(line))
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
25 logFile.write("Found %s template entries.\n" % len(entries))
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
26 for entryId in entries:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
27 pdb = entryId[:4].lower()
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
28 pdbChain = entryId[5:6]
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
29 pdbFile = "%s/temp.pdb" % args.temp
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
30 pdbDatabaseId = "%s.pdb" % pdb
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
31 createFile(pdbDatabaseId, args.index, args.database, pdbFile)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
32 try:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
33 mol = Molecule(pdbFile)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
34 except Exception:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
35 logFile.write("Warning: File '%s' not found.\n" % pdbDatabaseId)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
36 continue
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
37 pdbCount = pdbCount + 1
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
38 logFile.write("Processing %s, chain %s.\n" % (pdb, pdbChain))
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
39 logFile.write("Found %d biomolecule(s).\n" % len(mol.biomol.keys()))
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
40 for biomolNumber in mol.biomol:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
41 if biomolNumber == 0:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
42 logFile.write("Processing biomolecule.\n")
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
43 bioMolecule = mol
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
44 else:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
45 logFile.write("Processing biomolecule %d.\n" % biomolNumber)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
46 bioMolecule = mol.createUnit(biomolNumber)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
47 nChains = len(bioMolecule.calpha.keys())
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
48 print("Found %d chain(s)." % nChains)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
49 if nChains > 1 and pdbChain in bioMolecule.calpha:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
50 for bioChain in bioMolecule.calpha:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
51 if bioChain == pdbChain:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
52 continue
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
53 partnerPdbChain = "%s_%s" % (pdb.upper(), bioChain[:1])
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
54 partnerList.add("%s\t%s" % (entryId, partnerPdbChain))
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
55 else:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
56 logFile.write("Skipping: Chain not found or single chain [%s].\n" % pdbChain)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
57 logFile.flush()
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
58 with open(args.output, 'w') as output_file:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
59 for entry in sorted(partnerList):
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
60 output_file.write("%s\n" % entry)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
61
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
62
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
63 if __name__ == "__main__":
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
64 parser = argparse.ArgumentParser(description='List filtering.')
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
65 parser.add_argument('-l', '--list', help='List of PDB chains [PDB_CHAIN]', required=True)
38
80a4b98121b6 "planemo upload commit 22cd6b0fa88ce0ddc4052beab306f5ba10754f12"
guerler
parents: 37
diff changeset
66 parser.add_argument('-i', '--index', help='PDB Database Index file (ffindex)', required=True)
80a4b98121b6 "planemo upload commit 22cd6b0fa88ce0ddc4052beab306f5ba10754f12"
guerler
parents: 37
diff changeset
67 parser.add_argument('-d', '--database', help='PDB Database files (ffdata)', required=True)
37
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
68 parser.add_argument('-o', '--output', help='Output file', required=True)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
69 parser.add_argument('-t', '--temp', help='Temporary Directory', required=True)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
70 parser.add_argument('-g', '--log', help='Log File', required=True)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
71 args = parser.parse_args()
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
72 main(args)