Mercurial > repos > guerler > springsuite
annotate spring_minz.py @ 16:16eb2acaaa20 draft
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
author | guerler |
---|---|
date | Sat, 24 Oct 2020 17:48:06 +0000 |
parents | 4a4888bf0338 |
children | c790d25086dc |
rev | line source |
---|---|
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
1 #! /usr/bin/env python3 |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
2 import argparse |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
3 import os |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
4 |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
5 def main(args): |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
6 names = [] |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
7 with open(args.list) as file: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
8 for index, line in enumerate(file): |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
9 names.append(line.strip()) |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
10 print ("Loaded %s names from `%s`." % (len(names), args.list)) |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
11 crossreference = {} |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
12 with open(args.crossreference) as file: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
13 for index, line in enumerate(file): |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
14 columns = line.split() |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
15 core = columns[0] |
16
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
16 partner = columns[-1] |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
17 if core not in crossreference: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
18 crossreference[core] = [] |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
19 crossreference[core].append(partner) |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
20 print ("Loaded cross reference from `%s`." % args.crossreference) |
16
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
21 toptarget, targets = get_template_scores(args.target, args.minscore, args.idx) |
8
f2f38991c36f
"planemo upload commit ab5e686c62d07a0d45216b474d435b855745222d-dirty"
guerler
parents:
0
diff
changeset
|
22 interactions = [] |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
23 if not targets: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
24 print("No targets found `%s`" % args.target) |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
25 else: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
26 print ("Loaded target scores from `%s`." % args.target) |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
27 for name in names: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
28 input_directory = args.inputs.rstrip("/") |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
29 input_file = "%s/%s" % (input_directory, name) |
16
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
30 toptemplate, templates = get_template_scores(input_file, args.minscore, args.idx) |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
31 minz = 0 |
16
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
32 mint = "" |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
33 for t in targets: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
34 if t in crossreference: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
35 partners = crossreference[t] |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
36 for p in partners: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
37 if p in templates: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
38 score = min(targets[t], templates[p]) |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
39 if score > minz: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
40 minz = score |
16
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
41 mint = "%s\t%s\t%s\t%s" % (toptarget, toptemplate, t, p) |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
42 if minz > args.minscore: |
16
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
43 interactions.append((name, minz, mint)) |
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
44 print("Predicting: %s, min-Z: %s, templates: %s" % (name, minz, mint)) |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
45 interactions.sort(key=lambda tup: tup[1], reverse=True) |
11
21a7dd67b483
"planemo upload commit f208fb81425711754738fc8e612ee2d5cc26377f"
guerler
parents:
9
diff
changeset
|
46 with open(args.output, 'a+') as output_file: |
8
f2f38991c36f
"planemo upload commit ab5e686c62d07a0d45216b474d435b855745222d-dirty"
guerler
parents:
0
diff
changeset
|
47 for i in interactions: |
16
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
48 output_file.write("%s\t%s\t%s\t%s\n" % (args.name, i[0], i[1], i[2])) |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
49 |
9
4ac5d5a9b21c
"planemo upload commit f465445ebca0307953f59938494a2244ca8ea23c"
guerler
parents:
8
diff
changeset
|
50 def get_template_scores(hhr_file, min_score, identifier_length): |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
51 result = {} |
16
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
52 toptemplate = None |
9
4ac5d5a9b21c
"planemo upload commit f465445ebca0307953f59938494a2244ca8ea23c"
guerler
parents:
8
diff
changeset
|
53 identifier_length = identifier_length + 4 |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
54 if os.path.isfile(hhr_file): |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
55 with open(hhr_file) as file: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
56 for index, line in enumerate(file): |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
57 if index > 8: |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
58 if not line.strip(): |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
59 break |
9
4ac5d5a9b21c
"planemo upload commit f465445ebca0307953f59938494a2244ca8ea23c"
guerler
parents:
8
diff
changeset
|
60 template_id = line[4:identifier_length] |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
61 template_score = float(line[57:63]) |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
62 if template_score > min_score: |
16
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
63 if toptemplate is None: |
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
64 toptemplate = template_id |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
65 result[template_id] = template_score |
16
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
66 return toptemplate, result |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
67 |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
68 if __name__ == "__main__": |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
69 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
70 parser.add_argument('-t', '--target', help='HHR target file result', required=True) |
11
21a7dd67b483
"planemo upload commit f208fb81425711754738fc8e612ee2d5cc26377f"
guerler
parents:
9
diff
changeset
|
71 parser.add_argument('-n', '--name', help='HHR target name', required=True) |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
72 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) |
9
4ac5d5a9b21c
"planemo upload commit f465445ebca0307953f59938494a2244ca8ea23c"
guerler
parents:
8
diff
changeset
|
73 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
74 parser.add_argument('-l', '--list', help='Text file containing identifiers.', required=True) |
9
4ac5d5a9b21c
"planemo upload commit f465445ebca0307953f59938494a2244ca8ea23c"
guerler
parents:
8
diff
changeset
|
75 parser.add_argument('-i', '--inputs', help='Directory containing `hhr` files', required=True) |
16
16eb2acaaa20
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
guerler
parents:
15
diff
changeset
|
76 parser.add_argument('-o', '--output', help='Output file containing min-Z scores`', required=True) |
0
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
77 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
78 args = parser.parse_args() |
d30785e31577
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
guerler
parents:
diff
changeset
|
79 main(args) |