comparison spring_minz.py @ 16:16eb2acaaa20 draft

"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
author guerler
date Sat, 24 Oct 2020 17:48:06 +0000
parents 4a4888bf0338
children c790d25086dc
comparison
equal deleted inserted replaced
15:4a4888bf0338 16:16eb2acaaa20
11 crossreference = {} 11 crossreference = {}
12 with open(args.crossreference) as file: 12 with open(args.crossreference) as file:
13 for index, line in enumerate(file): 13 for index, line in enumerate(file):
14 columns = line.split() 14 columns = line.split()
15 core = columns[0] 15 core = columns[0]
16 partner = columns[2] 16 partner = columns[-1]
17 if core not in crossreference: 17 if core not in crossreference:
18 crossreference[core] = [] 18 crossreference[core] = []
19 crossreference[core].append(partner) 19 crossreference[core].append(partner)
20 print ("Loaded cross reference from `%s`." % args.crossreference) 20 print ("Loaded cross reference from `%s`." % args.crossreference)
21 targets = get_template_scores(args.target, args.minscore, args.idx) 21 toptarget, targets = get_template_scores(args.target, args.minscore, args.idx)
22 interactions = [] 22 interactions = []
23 if not targets: 23 if not targets:
24 print("No targets found `%s`" % args.target) 24 print("No targets found `%s`" % args.target)
25 else: 25 else:
26 print ("Loaded target scores from `%s`." % args.target) 26 print ("Loaded target scores from `%s`." % args.target)
27 for name in names: 27 for name in names:
28 input_directory = args.inputs.rstrip("/") 28 input_directory = args.inputs.rstrip("/")
29 input_file = "%s/%s" % (input_directory, name) 29 input_file = "%s/%s" % (input_directory, name)
30 templates = get_template_scores(input_file, args.minscore, args.idx) 30 toptemplate, templates = get_template_scores(input_file, args.minscore, args.idx)
31 minz = 0 31 minz = 0
32 mint = ""
32 for t in targets: 33 for t in targets:
33 if t in crossreference: 34 if t in crossreference:
34 partners = crossreference[t] 35 partners = crossreference[t]
35 for p in partners: 36 for p in partners:
36 if p in templates: 37 if p in templates:
37 score = min(targets[t], templates[p]) 38 score = min(targets[t], templates[p])
38 if score > minz: 39 if score > minz:
39 minz = score 40 minz = score
41 mint = "%s\t%s\t%s\t%s" % (toptarget, toptemplate, t, p)
40 if minz > args.minscore: 42 if minz > args.minscore:
41 interactions.append((name, minz)) 43 interactions.append((name, minz, mint))
42 print("Predicting: %s, min-Z: %s" % (name, minz)) 44 print("Predicting: %s, min-Z: %s, templates: %s" % (name, minz, mint))
43 interactions.sort(key=lambda tup: tup[1], reverse=True) 45 interactions.sort(key=lambda tup: tup[1], reverse=True)
44 with open(args.output, 'a+') as output_file: 46 with open(args.output, 'a+') as output_file:
45 for i in interactions: 47 for i in interactions:
46 output_file.write("%s\t%s\t%s\n" % (args.name, i[0], i[1])) 48 output_file.write("%s\t%s\t%s\t%s\n" % (args.name, i[0], i[1], i[2]))
47 49
48 def get_template_scores(hhr_file, min_score, identifier_length): 50 def get_template_scores(hhr_file, min_score, identifier_length):
49 result = {} 51 result = {}
52 toptemplate = None
50 identifier_length = identifier_length + 4 53 identifier_length = identifier_length + 4
51 if os.path.isfile(hhr_file): 54 if os.path.isfile(hhr_file):
52 with open(hhr_file) as file: 55 with open(hhr_file) as file:
53 for index, line in enumerate(file): 56 for index, line in enumerate(file):
54 if index > 8: 57 if index > 8:
55 if not line.strip(): 58 if not line.strip():
56 break 59 break
57 template_id = line[4:identifier_length] 60 template_id = line[4:identifier_length]
58 template_score = float(line[57:63]) 61 template_score = float(line[57:63])
59 if template_score > min_score: 62 if template_score > min_score:
63 if toptemplate is None:
64 toptemplate = template_id
60 result[template_id] = template_score 65 result[template_id] = template_score
61 return result 66 return toptemplate, result
62 67
63 if __name__ == "__main__": 68 if __name__ == "__main__":
64 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') 69 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.')
65 parser.add_argument('-t', '--target', help='HHR target file result', required=True) 70 parser.add_argument('-t', '--target', help='HHR target file result', required=True)
66 parser.add_argument('-n', '--name', help='HHR target name', required=True) 71 parser.add_argument('-n', '--name', help='HHR target name', required=True)
67 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) 72 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True)
68 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) 73 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6)
69 parser.add_argument('-l', '--list', help='Text file containing identifiers.', required=True) 74 parser.add_argument('-l', '--list', help='Text file containing identifiers.', required=True)
70 parser.add_argument('-i', '--inputs', help='Directory containing `hhr` files', required=True) 75 parser.add_argument('-i', '--inputs', help='Directory containing `hhr` files', required=True)
71 parser.add_argument('-o', '--output', help='Output file containing minZ-scores`', required=True) 76 parser.add_argument('-o', '--output', help='Output file containing min-Z scores`', required=True)
72 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) 77 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10)
73 args = parser.parse_args() 78 args = parser.parse_args()
74 main(args) 79 main(args)