Mercurial > repos > guerler > springsuite
comparison spring_minz.py @ 39:172398348efd draft
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
author | guerler |
---|---|
date | Fri, 22 Jan 2021 15:50:27 +0000 |
parents | 41353488926c |
children |
comparison
equal
deleted
inserted
replaced
38:80a4b98121b6 | 39:172398348efd |
---|---|
1 #! /usr/bin/env python3 | 1 #! /usr/bin/env python3 |
2 import argparse | 2 import argparse |
3 import os | 3 |
4 from spring_package.Utilities import getCrossReference, getTemplates | |
4 | 5 |
5 | 6 |
6 def main(args): | 7 def main(args): |
7 logFile = open(args.log, 'a+') | 8 minScore = args.minscore |
9 logFile = open(args.log, 'w') | |
8 targets = list() | 10 targets = list() |
9 targetPath = args.targetpath.rstrip("/") | 11 targetPath = args.targetpath.rstrip("/") |
12 hhrResults = dict() | |
10 with open(args.targetlist) as file: | 13 with open(args.targetlist) as file: |
11 for line in file: | 14 for line in file: |
12 name = line.strip() | 15 name = line.strip() |
13 targets.append(name) | 16 targets.append(name) |
14 print("Loaded %s target names from `%s`." % (len(targets), | 17 print("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) |
15 args.targetlist)) | 18 for targetName in targets: |
19 targetFile = "%s/%s" % (targetPath, targetName) | |
20 hhrResults[targetName] = getTemplates(targetFile, minScore) | |
16 if args.inputlist: | 21 if args.inputlist: |
17 inputs = list() | 22 inputs = list() |
18 inputPath = args.inputpath.rstrip("/") | 23 inputPath = args.inputpath.rstrip("/") |
19 with open(args.inputlist) as file: | 24 with open(args.inputlist) as file: |
20 for line in file: | 25 for line in file: |
21 name = line.strip() | 26 name = line.strip() |
22 inputs.append(name) | 27 inputs.append(name) |
23 print("Loaded %s input names from `%s`." % (len(inputs), | 28 print("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) |
24 args.inputlist)) | 29 for inputName in inputs: |
30 if inputName not in hhrResults: | |
31 inputFile = "%s/%s" % (inputPath, inputName) | |
32 hhrResults[inputName] = getTemplates(inputFile, minScore) | |
25 else: | 33 else: |
26 inputs = targets | 34 inputs = targets |
27 inputPath = targetPath | 35 print("Loaded hhr results for %s entries." % len(hhrResults.keys())) |
28 crossReference = dict() | 36 crossReference = getCrossReference(args.cross) |
29 with open(args.crossreference) as file: | 37 print("Loaded cross reference from `%s`." % args.cross) |
30 for line in file: | |
31 columns = line.split() | |
32 core = columns[0] | |
33 partner = columns[-1] | |
34 if core not in crossReference: | |
35 crossReference[core] = [] | |
36 crossReference[core].append(partner) | |
37 print("Loaded cross reference from `%s`." % args.crossreference) | |
38 interactions = dict() | 38 interactions = dict() |
39 for targetName in targets: | 39 for targetName in targets: |
40 targetFile = "%s/%s" % (targetPath, targetName) | 40 matchScores(hhrResults=hhrResults, |
41 matchScores(targetFile=targetFile, | |
42 targetName=targetName, | 41 targetName=targetName, |
43 inputs=inputs, | 42 inputs=inputs, |
44 inputPath=inputPath, | |
45 crossReference=crossReference, | 43 crossReference=crossReference, |
46 idLength=args.idlength, | |
47 minScore=args.minscore, | 44 minScore=args.minscore, |
48 logFile=logFile, | 45 logFile=logFile, |
49 interactions=interactions) | 46 interactions=interactions) |
50 if args.inputlist: | 47 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) |
51 for inputName in inputs: | |
52 inputDirectory = inputPath | |
53 inputFile = "%s/%s" % (inputDirectory, inputName) | |
54 matchScores(targetFile=inputFile, | |
55 targetName=inputName, | |
56 inputs=targets, | |
57 inputPath=targetPath, | |
58 crossReference=crossReference, | |
59 minScore=args.minscore, | |
60 idLength=args.idlength, | |
61 logFile=logFile, | |
62 interactions=interactions) | |
63 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], | |
64 reverse=True) | |
65 with open(args.output, 'w') as output_file: | 48 with open(args.output, 'w') as output_file: |
66 for entry in interactions: | 49 for entry in interactions: |
67 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], | 50 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], |
68 entry["inputName"], entry["minZ"], | 51 entry["inputName"], entry["minZ"], |
69 entry["minInfo"])) | 52 entry["minInfo"])) |
70 logFile.close() | 53 logFile.close() |
71 | 54 |
72 | 55 |
73 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, | 56 def matchScores(hhrResults, targetName, inputs, crossReference, minScore, logFile, interactions): |
74 minScore, idLength, logFile, interactions): | 57 if targetName not in hhrResults: |
75 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) | 58 print("Target not found `%s`" % targetName) |
76 if not targetHits: | |
77 print("No targets found `%s`" % targetFile) | |
78 else: | 59 else: |
79 print("Loaded target scores from `%s`." % targetFile) | 60 targetTop, targetHits = hhrResults[targetName] |
61 print("Evaluating %s." % targetName) | |
62 logFile.write("Evaluating %s.\n" % targetName) | |
63 logFile.flush() | |
80 for inputName in inputs: | 64 for inputName in inputs: |
81 inputFile = "%s/%s" % (inputPath, inputName) | 65 if inputName in hhrResults: |
82 inputTop, inputHits = getTemplateScores(inputFile, | 66 inputTop, inputHits = hhrResults[inputName] |
83 minScore, idLength) | 67 minZ = 0 |
84 minZ = 0 | 68 minInfo = "" |
85 minInfo = "" | 69 for t in targetHits: |
86 for t in targetHits: | 70 if t in crossReference: |
87 if t in crossReference: | 71 partners = crossReference[t]["partners"] |
88 partners = crossReference[t] | 72 for p in partners: |
89 for p in partners: | 73 if p in inputHits: |
90 if p in inputHits: | 74 score = min(targetHits[t], inputHits[p]) |
91 score = min(targetHits[t], inputHits[p]) | 75 if score > minZ: |
92 if score > minZ: | 76 minZ = score |
93 minZ = score | 77 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) |
94 minInfo = "%s\t%s\t%s\t%s" % (targetTop, | 78 if minZ > minScore: |
95 inputTop, t, p) | 79 if targetName > inputName: |
96 if minZ > minScore: | 80 interactionKey = "%s_%s" % (targetName, inputName) |
97 if targetName > inputName: | 81 else: |
98 interactionKey = "%s_%s" % (targetName, inputName) | 82 interactionKey = "%s_%s" % (inputName, targetName) |
99 else: | 83 if interactionKey in interactions: |
100 interactionKey = "%s_%s" % (inputName, targetName) | 84 if interactions[interactionKey]["minZ"] >= minZ: |
101 if interactionKey in interactions: | 85 continue |
102 if interactions[interactionKey]["minZ"] >= minZ: | 86 interactions[interactionKey] = dict(targetName=targetName, |
103 continue | 87 inputName=inputName, |
104 interactions[interactionKey] = dict(targetName=targetName, | 88 minZ=minZ, minInfo=minInfo) |
105 inputName=inputName, | |
106 minZ=minZ, minInfo=minInfo) | |
107 logFile.write("Interaction between %s and %s [min-Z: %s].\n" % | |
108 (targetName, inputName, minZ)) | |
109 | |
110 | |
111 def getTemplateScores(hhrFile, minScore, idLength): | |
112 result = dict() | |
113 topTemplate = None | |
114 idLength = idLength + 4 | |
115 if os.path.isfile(hhrFile): | |
116 with open(hhrFile) as file: | |
117 for index, line in enumerate(file): | |
118 if index > 8: | |
119 if not line.strip(): | |
120 break | |
121 templateId = line[4:idLength] | |
122 templateScore = float(line[57:63]) | |
123 if templateScore > minScore: | |
124 if topTemplate is None: | |
125 topTemplate = templateId | |
126 result[templateId] = templateScore | |
127 return topTemplate, result | |
128 | 89 |
129 | 90 |
130 if __name__ == "__main__": | 91 if __name__ == "__main__": |
131 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') | 92 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') |
132 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) | 93 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) |
133 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) | 94 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) |
134 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) | 95 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) |
135 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False) | 96 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False) |
136 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) | 97 parser.add_argument('-c', '--cross', help='PDB Cross Reference', required=True) |
137 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) | 98 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) |
138 parser.add_argument('-l', '--log', help='Log file', required=True) | 99 parser.add_argument('-l', '--log', help='Log file', required=True) |
139 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) | 100 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=25) |
140 parser.add_argument('-idx', '--idlength', help='Length of identifier in reference', type=int, default=6) | |
141 args = parser.parse_args() | 101 args = parser.parse_args() |
142 main(args) | 102 main(args) |