Mercurial > repos > guerler > springsuite
comparison spring_minz.py @ 23:5469e19f1f96 draft
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
author | guerler |
---|---|
date | Thu, 29 Oct 2020 13:04:47 +0000 |
parents | acaff61a09b2 |
children | 5d1ae615e4ec |
comparison
equal
deleted
inserted
replaced
22:acaff61a09b2 | 23:5469e19f1f96 |
---|---|
1 #! /usr/bin/env python3 | 1 #! /usr/bin/env python3 |
2 import argparse | 2 import argparse |
3 import os | 3 import os |
4 | 4 |
5 def main(args): | 5 def main(args): |
6 inputs = list() | 6 logFile = open(args.log, 'a+') |
7 with open(args.inputlist) as file: | 7 targets = list() |
8 for index, line in enumerate(file): | 8 targetPath = args.targetpath.rstrip("/") |
9 name = line.strip() | 9 with open(args.targetlist) as file: |
10 inputs.append(name) | 10 for index, line in enumerate(file): |
11 print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) | 11 name = line.strip() |
12 targets = list() | 12 targets.append(name) |
13 duplicates = 0 | 13 print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) |
14 with open(args.targetlist) as file: | 14 if args.inputlist: |
15 for index, line in enumerate(file): | 15 inputs = list() |
16 name = line.strip() | 16 inputPath = args.inputpath.rstrip("/") |
17 targets.append(name) | 17 with open(args.inputlist) as file: |
18 if name in inputs: | 18 for index, line in enumerate(file): |
19 duplicates = duplicates + 1 | 19 name = line.strip() |
20 print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) | 20 inputs.append(name) |
21 crossReference = dict() | 21 print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) |
22 with open(args.crossreference) as file: | 22 else: |
23 for index, line in enumerate(file): | 23 inputs = targets |
24 columns = line.split() | 24 inputPath = targetPath |
25 core = columns[0] | 25 crossReference = dict() |
26 partner = columns[-1] | 26 with open(args.crossreference) as file: |
27 if core not in crossReference: | 27 for index, line in enumerate(file): |
28 crossReference[core] = [] | 28 columns = line.split() |
29 crossReference[core].append(partner) | 29 core = columns[0] |
30 print ("Loaded cross reference from `%s`." % args.crossreference) | 30 partner = columns[-1] |
31 interactions = dict() | 31 if core not in crossReference: |
32 for targetName in targets: | 32 crossReference[core] = [] |
33 targetDirectory = args.targetpath.rstrip("/") | 33 crossReference[core].append(partner) |
34 targetFile = "%s/%s" % (targetDirectory, targetName) | 34 print ("Loaded cross reference from `%s`." % args.crossreference) |
35 matchScores(targetFile=targetFile, | 35 interactions = dict() |
36 targetName=targetName, | 36 for targetName in targets: |
37 inputs=sorted(inputs), | 37 targetFile = "%s/%s" % (targetPath, targetName) |
38 inputPath=args.inputpath, | 38 matchScores(targetFile=targetFile, |
39 crossReference=crossReference, | 39 targetName=targetName, |
40 minScore=args.minscore, | 40 inputs=inputs, |
41 idLength=args.idx, | 41 inputPath=inputPath, |
42 interactions=interactions) | 42 crossReference=crossReference, |
43 if duplicates != len(targets): | 43 minScore=args.minscore, |
44 for inputName in inputs: | 44 idLength=args.idx, |
45 inputDirectory = args.inputpath.rstrip("/") | 45 logFile=logFile, |
46 inputFile = "%s/%s" % (inputDirectory, inputName) | 46 interactions=interactions) |
47 matchScores(targetFile=inputFile, | 47 if args.inputlist: |
48 targetName=inputName, | 48 for inputName in inputs: |
49 inputs=targets, | 49 inputDirectory = inputPath |
50 inputPath=args.targetpath, | 50 inputFile = "%s/%s" % (inputDirectory, inputName) |
51 crossReference=crossReference, | 51 matchScores(targetFile=inputFile, |
52 minScore=args.minscore, | 52 targetName=inputName, |
53 idLength=args.idx, | 53 inputs=targets, |
54 interactions=interactions) | 54 inputPath=targetPath, |
55 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) | 55 crossReference=crossReference, |
56 with open(args.output, 'w') as output_file: | 56 minScore=args.minscore, |
57 for entry in interactions: | 57 idLength=args.idx, |
58 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) | 58 logFile=logFile, |
59 interactions=interactions) | |
60 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) | |
61 with open(args.output, 'w') as output_file: | |
62 for entry in interactions: | |
63 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) | |
64 logFile.close() | |
59 | 65 |
60 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, interactions): | 66 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, logFile, interactions): |
61 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) | 67 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) |
62 if not targetHits: | 68 if not targetHits: |
63 print("No targets found `%s`" % targetFile) | 69 print("No targets found `%s`" % targetFile) |
64 else: | 70 else: |
65 print ("Loaded target scores from `%s`." % targetFile) | 71 print ("Loaded target scores from `%s`." % targetFile) |
66 for inputName in inputs: | 72 for inputName in inputs: |
67 inputDirectory = inputPath.rstrip("/") | 73 inputFile = "%s/%s" % (inputPath, inputName) |
68 inputFile = "%s/%s" % (inputDirectory, inputName) | 74 inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) |
69 inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) | 75 minZ = 0 |
70 minZ = 0 | 76 minInfo = "" |
71 minInfo = "" | 77 for t in targetHits: |
72 for t in targetHits: | 78 if t in crossReference: |
73 if t in crossReference: | 79 partners = crossReference[t] |
74 partners = crossReference[t] | 80 for p in partners: |
75 for p in partners: | 81 if p in inputHits: |
76 if p in inputHits: | 82 score = min(targetHits[t], inputHits[p]) |
77 score = min(targetHits[t], inputHits[p]) | 83 if score > minZ: |
78 if score > minZ: | 84 minZ = score |
79 minZ = score | 85 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) |
80 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) | 86 if minZ > minScore: |
81 if minZ > minScore: | 87 if targetName > inputName: |
82 if targetName > inputName: | 88 interactionKey = "%s_%s" % (targetName, inputName) |
83 interactionKey = "%s_%s" % (targetName, inputName) | 89 else: |
84 else: | 90 interactionKey = "%s_%s" % (inputName, targetName) |
85 interactionKey = "%s_%s" % (inputName, targetName) | 91 if interactionKey in interactions: |
86 if interactionKey in interactions: | 92 if interactions[interactionKey]["minZ"] >= minZ: |
87 if interactions[interactionKey]["minZ"] >= minZ: | 93 continue |
88 continue | 94 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) |
89 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) | 95 logFile.write("Interaction between %s and %s [min-Z: %s].\n" % (targetName, inputName, minZ)) |
90 print("Predicting: %s, min-Z: %s, templates: %s" % (inputName, minZ, minInfo)) | |
91 return interactions | |
92 | 96 |
93 def getTemplateScores(hhrFile, minScore, identifierLength): | 97 def getTemplateScores(hhrFile, minScore, identifierLength): |
94 result = dict() | 98 result = dict() |
95 topTemplate = None | 99 topTemplate = None |
96 identifierLength = identifierLength + 4 | 100 identifierLength = identifierLength + 4 |
97 if os.path.isfile(hhrFile): | 101 if os.path.isfile(hhrFile): |
98 with open(hhrFile) as file: | 102 with open(hhrFile) as file: |
99 for index, line in enumerate(file): | 103 for index, line in enumerate(file): |
100 if index > 8: | 104 if index > 8: |
101 if not line.strip(): | 105 if not line.strip(): |
102 break | 106 break |
103 templateId = line[4:identifierLength] | 107 templateId = line[4:identifierLength] |
104 templateScore = float(line[57:63]) | 108 templateScore = float(line[57:63]) |
105 if templateScore > minScore: | 109 if templateScore > minScore: |
106 if topTemplate is None: | 110 if topTemplate is None: |
107 topTemplate = templateId | 111 topTemplate = templateId |
108 result[templateId] = templateScore | 112 result[templateId] = templateScore |
109 return topTemplate, result | 113 return topTemplate, result |
110 | 114 |
111 if __name__ == "__main__": | 115 if __name__ == "__main__": |
112 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') | 116 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') |
113 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=True) | 117 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) |
114 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=True) | 118 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) |
115 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) | 119 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) |
116 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) | 120 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False) |
117 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) | 121 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) |
118 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) | 122 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) |
119 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) | 123 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) |
120 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) | 124 parser.add_argument('-l', '--log', help='Log file', required=True) |
121 args = parser.parse_args() | 125 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) |
122 main(args) | 126 args = parser.parse_args() |
127 main(args) |