annotate spring_cross.py @ 37:0be0af9e695d draft

"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
author guerler
date Wed, 25 Nov 2020 14:35:35 +0000
parents
children 80a4b98121b6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
37
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
1 #! /usr/bin/env python3
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
2 import argparse
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
3 from os import system
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
4
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
5 from spring_package.DBKit import createFile
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
6 from spring_package.Molecule import Molecule
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
7
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
8
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
9 def getId(line):
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
10 line = line.split()[0]
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
11 return line[:4].upper() + line[4:6]
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
12
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
13
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
14 def main(args):
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
15 logFile = open(args.log, "w")
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
16 system("mkdir -p %s" % args.temp)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
17 pdbCount = 0
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
18 partnerList = set()
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
19 entries = list()
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
20 with open(args.list) as file:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
21 for line in file:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
22 entries.append(getId(line))
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
23 logFile.write("Found %s template entries.\n" % len(entries))
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
24 for entryId in entries:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
25 pdb = entryId[:4].lower()
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
26 pdbChain = entryId[5:6]
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
27 pdbFile = "%s/temp.pdb" % args.temp
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
28 pdbDatabaseId = "%s.pdb" % pdb
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
29 createFile(pdbDatabaseId, args.index, args.database, pdbFile)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
30 try:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
31 mol = Molecule(pdbFile)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
32 except Exception:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
33 logFile.write("Warning: File '%s' not found.\n" % pdbDatabaseId)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
34 continue
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
35 pdbCount = pdbCount + 1
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
36 logFile.write("Processing %s, chain %s.\n" % (pdb, pdbChain))
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
37 logFile.write("Found %d biomolecule(s).\n" % len(mol.biomol.keys()))
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
38 for biomolNumber in mol.biomol:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
39 if biomolNumber == 0:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
40 logFile.write("Processing biomolecule.\n")
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
41 bioMolecule = mol
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
42 else:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
43 logFile.write("Processing biomolecule %d.\n" % biomolNumber)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
44 bioMolecule = mol.createUnit(biomolNumber)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
45 nChains = len(bioMolecule.calpha.keys())
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
46 print("Found %d chain(s)." % nChains)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
47 if nChains > 1 and pdbChain in bioMolecule.calpha:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
48 for bioChain in bioMolecule.calpha:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
49 if bioChain == pdbChain:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
50 continue
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
51 partnerPdbChain = "%s_%s" % (pdb.upper(), bioChain[:1])
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
52 partnerList.add("%s\t%s" % (entryId, partnerPdbChain))
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
53 else:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
54 logFile.write("Skipping: Chain not found or single chain [%s].\n" % pdbChain)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
55 logFile.flush()
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
56 with open(args.output, 'w') as output_file:
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
57 for entry in sorted(partnerList):
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
58 output_file.write("%s\n" % entry)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
59
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
60
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
61 if __name__ == "__main__":
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
62 parser = argparse.ArgumentParser(description='List filtering.')
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
63 parser.add_argument('-l', '--list', help='List of PDB chains [PDB_CHAIN]', required=True)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
64 parser.add_argument('-i', '--index', help='PDB Database Index file (dbkit_index)', required=True)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
65 parser.add_argument('-d', '--database', help='PDB Database files (dbkit)', required=True)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
66 parser.add_argument('-o', '--output', help='Output file', required=True)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
67 parser.add_argument('-t', '--temp', help='Temporary Directory', required=True)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
68 parser.add_argument('-g', '--log', help='Log File', required=True)
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
69 args = parser.parse_args()
0be0af9e695d "planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff changeset
70 main(args)