annotate spring_minz.py @ 39:172398348efd draft

"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
author guerler
date Fri, 22 Jan 2021 15:50:27 +0000
parents 41353488926c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d30785e31577 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
1 #! /usr/bin/env python3
d30785e31577 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
2 import argparse
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
3
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
4 from spring_package.Utilities import getCrossReference, getTemplates
0
d30785e31577 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
5
29
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents: 27
diff changeset
6
0
d30785e31577 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
7 def main(args):
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
8 minScore = args.minscore
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
9 logFile = open(args.log, 'w')
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
10 targets = list()
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
11 targetPath = args.targetpath.rstrip("/")
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
12 hhrResults = dict()
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
13 with open(args.targetlist) as file:
29
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents: 27
diff changeset
14 for line in file:
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
15 name = line.strip()
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
16 targets.append(name)
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
17 print("Loaded %s target names from `%s`." % (len(targets), args.targetlist))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
18 for targetName in targets:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
19 targetFile = "%s/%s" % (targetPath, targetName)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
20 hhrResults[targetName] = getTemplates(targetFile, minScore)
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
21 if args.inputlist:
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
22 inputs = list()
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
23 inputPath = args.inputpath.rstrip("/")
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
24 with open(args.inputlist) as file:
29
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents: 27
diff changeset
25 for line in file:
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
26 name = line.strip()
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
27 inputs.append(name)
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
28 print("Loaded %s input names from `%s`." % (len(inputs), args.inputlist))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
29 for inputName in inputs:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
30 if inputName not in hhrResults:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
31 inputFile = "%s/%s" % (inputPath, inputName)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
32 hhrResults[inputName] = getTemplates(inputFile, minScore)
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
33 else:
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
34 inputs = targets
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
35 print("Loaded hhr results for %s entries." % len(hhrResults.keys()))
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
36 crossReference = getCrossReference(args.cross)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
37 print("Loaded cross reference from `%s`." % args.cross)
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
38 interactions = dict()
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
39 for targetName in targets:
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
40 matchScores(hhrResults=hhrResults,
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
41 targetName=targetName,
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
42 inputs=inputs,
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
43 crossReference=crossReference,
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
44 minScore=args.minscore,
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
45 logFile=logFile,
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
46 interactions=interactions)
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
47 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True)
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
48 with open(args.output, 'w') as output_file:
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
49 for entry in interactions:
29
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents: 27
diff changeset
50 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"],
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents: 27
diff changeset
51 entry["inputName"], entry["minZ"],
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents: 27
diff changeset
52 entry["minInfo"]))
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
53 logFile.close()
17
c790d25086dc "planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents: 16
diff changeset
54
29
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents: 27
diff changeset
55
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
56 def matchScores(hhrResults, targetName, inputs, crossReference, minScore, logFile, interactions):
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
57 if targetName not in hhrResults:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
58 print("Target not found `%s`" % targetName)
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
59 else:
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
60 targetTop, targetHits = hhrResults[targetName]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
61 print("Evaluating %s." % targetName)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
62 logFile.write("Evaluating %s.\n" % targetName)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
63 logFile.flush()
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
64 for inputName in inputs:
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
65 if inputName in hhrResults:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
66 inputTop, inputHits = hhrResults[inputName]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
67 minZ = 0
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
68 minInfo = ""
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
69 for t in targetHits:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
70 if t in crossReference:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
71 partners = crossReference[t]["partners"]
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
72 for p in partners:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
73 if p in inputHits:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
74 score = min(targetHits[t], inputHits[p])
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
75 if score > minZ:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
76 minZ = score
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
77 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
78 if minZ > minScore:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
79 if targetName > inputName:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
80 interactionKey = "%s_%s" % (targetName, inputName)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
81 else:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
82 interactionKey = "%s_%s" % (inputName, targetName)
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
83 if interactionKey in interactions:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
84 if interactions[interactionKey]["minZ"] >= minZ:
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
85 continue
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
86 interactions[interactionKey] = dict(targetName=targetName,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
87 inputName=inputName,
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
88 minZ=minZ, minInfo=minInfo)
0
d30785e31577 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
89
29
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents: 27
diff changeset
90
0
d30785e31577 "planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff changeset
91 if __name__ == "__main__":
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
92 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.')
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
93 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True)
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
94 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True)
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
95 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False)
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
96 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False)
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
97 parser.add_argument('-c', '--cross', help='PDB Cross Reference', required=True)
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
98 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True)
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
99 parser.add_argument('-l', '--log', help='Log file', required=True)
39
172398348efd "planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents: 29
diff changeset
100 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=25)
23
5469e19f1f96 "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents: 22
diff changeset
101 args = parser.parse_args()
29
41353488926c "planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents: 27
diff changeset
102 main(args)