Mercurial > repos > guerler > springsuite
comparison spring_minz.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children | f2f38991c36f |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d30785e31577 |
---|---|
1 #! /usr/bin/env python3 | |
2 import argparse | |
3 import os | |
4 | |
5 def main(args): | |
6 names = [] | |
7 with open(args.list) as file: | |
8 for index, line in enumerate(file): | |
9 names.append(line.strip()) | |
10 print ("Loaded %s names from `%s`." % (len(names), args.list)) | |
11 crossreference = {} | |
12 with open(args.crossreference) as file: | |
13 for index, line in enumerate(file): | |
14 columns = line.split() | |
15 core = columns[0] | |
16 partner = columns[2] | |
17 if core not in crossreference: | |
18 crossreference[core] = [] | |
19 crossreference[core].append(partner) | |
20 print ("Loaded cross reference from `%s`." % args.crossreference) | |
21 targets = get_template_scores(args.target, args.minscore) | |
22 if not targets: | |
23 print("No targets found `%s`" % args.target) | |
24 else: | |
25 print ("Loaded target scores from `%s`." % args.target) | |
26 interactions = [] | |
27 for name in names: | |
28 input_directory = args.inputs.rstrip("/") | |
29 input_file = "%s/%s" % (input_directory, name) | |
30 templates = get_template_scores(input_file, args.minscore) | |
31 minz = 0 | |
32 for t in targets: | |
33 if t in crossreference: | |
34 partners = crossreference[t] | |
35 for p in partners: | |
36 if p in templates: | |
37 score = min(targets[t], templates[p]) | |
38 if score > minz: | |
39 minz = score | |
40 if minz > args.minscore: | |
41 interactions.append((name, minz)) | |
42 print("Predicting: %s, min-Z: %s" % (name, minz)) | |
43 interactions.sort(key=lambda tup: tup[1], reverse=True) | |
44 with open(args.output, 'w') as output_file: | |
45 for i in interactions: | |
46 output_file.write("%s %s\n" % (i[0], i[1])) | |
47 | |
48 def get_template_scores(hhr_file, min_score): | |
49 result = {} | |
50 if os.path.isfile(hhr_file): | |
51 with open(hhr_file) as file: | |
52 for index, line in enumerate(file): | |
53 if index > 8: | |
54 if not line.strip(): | |
55 break | |
56 template_id = line[4:10] | |
57 template_score = float(line[57:63]) | |
58 if template_score > min_score: | |
59 result[template_id] = template_score | |
60 return result | |
61 | |
62 if __name__ == "__main__": | |
63 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') | |
64 parser.add_argument('-t', '--target', help='HHR target file result', required=True) | |
65 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) | |
66 parser.add_argument('-l', '--list', help='Text file containing identifiers.', required=True) | |
67 parser.add_argument('-i', '--inputs', help='Directory containing `hhr/X/Y.hhr` files', required=True) | |
68 parser.add_argument('-o', '--output', help='Output file containing minZ-scores`', required=True) | |
69 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) | |
70 args = parser.parse_args() | |
71 main(args) |