Mercurial > repos > guerler > springsuite
comparison spring_minz.py @ 17:c790d25086dc draft
"planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
author | guerler |
---|---|
date | Wed, 28 Oct 2020 05:11:56 +0000 |
parents | 16eb2acaaa20 |
children | 5feab7f00f02 |
comparison
equal
deleted
inserted
replaced
16:16eb2acaaa20 | 17:c790d25086dc |
---|---|
1 #! /usr/bin/env python3 | 1 #! /usr/bin/env python3 |
2 import argparse | 2 import argparse |
3 import os | 3 import os |
4 | 4 |
5 def main(args): | 5 def main(args): |
6 names = [] | 6 inputs = set() |
7 with open(args.list) as file: | 7 with open(args.inputlist) as file: |
8 for index, line in enumerate(file): | 8 for index, line in enumerate(file): |
9 names.append(line.strip()) | 9 name = line.strip() |
10 print ("Loaded %s names from `%s`." % (len(names), args.list)) | 10 inputs.add(name) |
11 crossreference = {} | 11 print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) |
12 targets = set() | |
13 duplicates = 0 | |
14 with open(args.targetlist) as file: | |
15 for index, line in enumerate(file): | |
16 name = line.strip() | |
17 targets.add(name) | |
18 if name in inputs: | |
19 duplicates = duplicates + 1 | |
20 print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) | |
21 crossReference = dict() | |
12 with open(args.crossreference) as file: | 22 with open(args.crossreference) as file: |
13 for index, line in enumerate(file): | 23 for index, line in enumerate(file): |
14 columns = line.split() | 24 columns = line.split() |
15 core = columns[0] | 25 core = columns[0] |
16 partner = columns[-1] | 26 partner = columns[-1] |
17 if core not in crossreference: | 27 if core not in crossReference: |
18 crossreference[core] = [] | 28 crossReference[core] = [] |
19 crossreference[core].append(partner) | 29 crossReference[core].append(partner) |
20 print ("Loaded cross reference from `%s`." % args.crossreference) | 30 print ("Loaded cross reference from `%s`." % args.crossreference) |
21 toptarget, targets = get_template_scores(args.target, args.minscore, args.idx) | 31 interactions = dict() |
22 interactions = [] | 32 for targetName in targets: |
23 if not targets: | 33 targetDirectory = args.targetpath.rstrip("/") |
24 print("No targets found `%s`" % args.target) | 34 targetFile = "%s/%s" % (targetDirectory, targetName) |
35 matchScores(targetFile=targetFile, | |
36 targetName=targetName, | |
37 inputs=inputs, | |
38 inputPath=args.inputpath, | |
39 crossReference=crossReference, | |
40 minScore=args.minscore, | |
41 idLength=args.idx, | |
42 interactions=interactions) | |
43 if duplicates == len(targets): | |
44 for inputName in inputs: | |
45 inputDirectory = args.inputpath.rstrip("/") | |
46 inputFile = "%s/%s" % (inputDirectory, inputName) | |
47 matchScores(targetFile=inputFile, | |
48 targetName=inputName, | |
49 inputs=targets, | |
50 inputPath=args.targetpath, | |
51 crossReference=crossReference, | |
52 minScore=args.minscore, | |
53 idLength=args.idx, | |
54 interactions=interactions) | |
55 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) | |
56 with open(args.output, 'w') as output_file: | |
57 for entry in interactions: | |
58 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) | |
59 | |
60 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, interactions): | |
61 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) | |
62 if not targetHits: | |
63 print("No targets found `%s`" % targetFile) | |
25 else: | 64 else: |
26 print ("Loaded target scores from `%s`." % args.target) | 65 print ("Loaded target scores from `%s`." % targetFile) |
27 for name in names: | 66 for inputName in inputs: |
28 input_directory = args.inputs.rstrip("/") | 67 inputDirectory = inputPath.rstrip("/") |
29 input_file = "%s/%s" % (input_directory, name) | 68 inputFile = "%s/%s" % (inputDirectory, inputName) |
30 toptemplate, templates = get_template_scores(input_file, args.minscore, args.idx) | 69 inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) |
31 minz = 0 | 70 minZ = 0 |
32 mint = "" | 71 minInfo = "" |
33 for t in targets: | 72 for t in targetHits: |
34 if t in crossreference: | 73 if t in crossReference: |
35 partners = crossreference[t] | 74 partners = crossReference[t] |
36 for p in partners: | 75 for p in partners: |
37 if p in templates: | 76 if p in inputHits: |
38 score = min(targets[t], templates[p]) | 77 score = min(targetHits[t], inputHits[p]) |
39 if score > minz: | 78 if score > minZ: |
40 minz = score | 79 minZ = score |
41 mint = "%s\t%s\t%s\t%s" % (toptarget, toptemplate, t, p) | 80 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) |
42 if minz > args.minscore: | 81 if minZ > minScore: |
43 interactions.append((name, minz, mint)) | 82 if targetName > inputName: |
44 print("Predicting: %s, min-Z: %s, templates: %s" % (name, minz, mint)) | 83 interactionKey = "%s_%s_%s" % (targetName, inputName, minZ) |
45 interactions.sort(key=lambda tup: tup[1], reverse=True) | 84 else: |
46 with open(args.output, 'a+') as output_file: | 85 interactionKey = "%s_%s_%s" % (inputName, targetName, minZ) |
47 for i in interactions: | 86 if interactionKey not in interactions: |
48 output_file.write("%s\t%s\t%s\t%s\n" % (args.name, i[0], i[1], i[2])) | 87 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) |
88 print("Predicting: %s, min-Z: %s, templates: %s" % (inputName, minZ, minInfo)) | |
89 return interactions | |
49 | 90 |
50 def get_template_scores(hhr_file, min_score, identifier_length): | 91 def getTemplateScores(hhrFile, minScore, identifierLength): |
51 result = {} | 92 result = dict() |
52 toptemplate = None | 93 topTemplate = None |
53 identifier_length = identifier_length + 4 | 94 identifierLength = identifierLength + 4 |
54 if os.path.isfile(hhr_file): | 95 if os.path.isfile(hhrFile): |
55 with open(hhr_file) as file: | 96 with open(hhrFile) as file: |
56 for index, line in enumerate(file): | 97 for index, line in enumerate(file): |
57 if index > 8: | 98 if index > 8: |
58 if not line.strip(): | 99 if not line.strip(): |
59 break | 100 break |
60 template_id = line[4:identifier_length] | 101 templateId = line[4:identifierLength] |
61 template_score = float(line[57:63]) | 102 templateScore = float(line[57:63]) |
62 if template_score > min_score: | 103 if templateScore > minScore: |
63 if toptemplate is None: | 104 if topTemplate is None: |
64 toptemplate = template_id | 105 topTemplate = templateId |
65 result[template_id] = template_score | 106 result[templateId] = templateScore |
66 return toptemplate, result | 107 return topTemplate, result |
67 | 108 |
68 if __name__ == "__main__": | 109 if __name__ == "__main__": |
69 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') | 110 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') |
70 parser.add_argument('-t', '--target', help='HHR target file result', required=True) | 111 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=True) |
71 parser.add_argument('-n', '--name', help='HHR target name', required=True) | 112 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=True) |
113 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) | |
114 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) | |
72 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) | 115 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) |
73 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) | 116 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) |
74 parser.add_argument('-l', '--list', help='Text file containing identifiers.', required=True) | 117 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) |
75 parser.add_argument('-i', '--inputs', help='Directory containing `hhr` files', required=True) | |
76 parser.add_argument('-o', '--output', help='Output file containing min-Z scores`', required=True) | |
77 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) | 118 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) |
78 args = parser.parse_args() | 119 args = parser.parse_args() |
79 main(args) | 120 main(args) |