Mercurial > repos > guerler > springsuite
annotate spring_minz.py @ 41:f316caf098a6 draft default tip
"planemo upload commit 685e1236afde7cf6bb0c9236de06998d2c211dd3"
author | guerler |
---|---|
date | Mon, 01 Mar 2021 15:02:36 +0000 |
parents | 172398348efd |
children |
rev | line source |
---|---|
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
1 #! /usr/bin/env python3 |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
2 import argparse |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
3 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
4 from spring_package.Utilities import getCrossReference, getTemplates |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
5 |
29
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
27
diff
changeset
|
6 |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
7 def main(args): |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
8 minScore = args.minscore |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
9 logFile = open(args.log, 'w') |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
10 targets = list() |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
11 targetPath = args.targetpath.rstrip("/") |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
12 hhrResults = dict() |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
13 with open(args.targetlist) as file: |
29
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
27
diff
changeset
|
14 for line in file: |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
15 name = line.strip() |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
16 targets.append(name) |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
17 print("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
18 for targetName in targets: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
19 targetFile = "%s/%s" % (targetPath, targetName) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
20 hhrResults[targetName] = getTemplates(targetFile, minScore) |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
21 if args.inputlist: |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
22 inputs = list() |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
23 inputPath = args.inputpath.rstrip("/") |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
24 with open(args.inputlist) as file: |
29
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
27
diff
changeset
|
25 for line in file: |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
26 name = line.strip() |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
27 inputs.append(name) |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
28 print("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
29 for inputName in inputs: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
30 if inputName not in hhrResults: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
31 inputFile = "%s/%s" % (inputPath, inputName) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
32 hhrResults[inputName] = getTemplates(inputFile, minScore) |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
33 else: |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
34 inputs = targets |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
35 print("Loaded hhr results for %s entries." % len(hhrResults.keys())) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
36 crossReference = getCrossReference(args.cross) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
37 print("Loaded cross reference from `%s`." % args.cross) |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
38 interactions = dict() |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
39 for targetName in targets: |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
40 matchScores(hhrResults=hhrResults, |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
41 targetName=targetName, |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
42 inputs=inputs, |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
43 crossReference=crossReference, |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
44 minScore=args.minscore, |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
45 logFile=logFile, |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
46 interactions=interactions) |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
47 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
48 with open(args.output, 'w') as output_file: |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
49 for entry in interactions: |
29
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
27
diff
changeset
|
50 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
27
diff
changeset
|
51 entry["inputName"], entry["minZ"], |
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
27
diff
changeset
|
52 entry["minInfo"])) |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
53 logFile.close() |
17
c790d25086dc
"planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
guerler
parents:
16
diff
changeset
|
54 |
29
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
27
diff
changeset
|
55 |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
56 def matchScores(hhrResults, targetName, inputs, crossReference, minScore, logFile, interactions): |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
57 if targetName not in hhrResults: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
58 print("Target not found `%s`" % targetName) |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
59 else: |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
60 targetTop, targetHits = hhrResults[targetName] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
61 print("Evaluating %s." % targetName) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
62 logFile.write("Evaluating %s.\n" % targetName) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
63 logFile.flush() |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
64 for inputName in inputs: |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
65 if inputName in hhrResults: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
66 inputTop, inputHits = hhrResults[inputName] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
67 minZ = 0 |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
68 minInfo = "" |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
69 for t in targetHits: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
70 if t in crossReference: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
71 partners = crossReference[t]["partners"] |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
72 for p in partners: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
73 if p in inputHits: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
74 score = min(targetHits[t], inputHits[p]) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
75 if score > minZ: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
76 minZ = score |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
77 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
78 if minZ > minScore: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
79 if targetName > inputName: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
80 interactionKey = "%s_%s" % (targetName, inputName) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
81 else: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
82 interactionKey = "%s_%s" % (inputName, targetName) |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
83 if interactionKey in interactions: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
84 if interactions[interactionKey]["minZ"] >= minZ: |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
85 continue |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
86 interactions[interactionKey] = dict(targetName=targetName, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
87 inputName=inputName, |
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
88 minZ=minZ, minInfo=minInfo) |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
89 |
29
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
27
diff
changeset
|
90 |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
91 if __name__ == "__main__": |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
92 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
93 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
94 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
95 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
96 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False) |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
97 parser.add_argument('-c', '--cross', help='PDB Cross Reference', required=True) |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
98 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) |
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
99 parser.add_argument('-l', '--log', help='Log file', required=True) |
39
172398348efd
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
guerler
parents:
29
diff
changeset
|
100 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=25) |
23
5469e19f1f96
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
guerler
parents:
22
diff
changeset
|
101 args = parser.parse_args() |
29
41353488926c
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
guerler
parents:
27
diff
changeset
|
102 main(args) |