comparison spring_minz.py @ 39:172398348efd draft

"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
author guerler
date Fri, 22 Jan 2021 15:50:27 +0000
parents 41353488926c
children
comparison
equal deleted inserted replaced
38:80a4b98121b6 39:172398348efd
1 #! /usr/bin/env python3 1 #! /usr/bin/env python3
2 import argparse 2 import argparse
3 import os 3
4 from spring_package.Utilities import getCrossReference, getTemplates
4 5
5 6
6 def main(args): 7 def main(args):
7 logFile = open(args.log, 'a+') 8 minScore = args.minscore
9 logFile = open(args.log, 'w')
8 targets = list() 10 targets = list()
9 targetPath = args.targetpath.rstrip("/") 11 targetPath = args.targetpath.rstrip("/")
12 hhrResults = dict()
10 with open(args.targetlist) as file: 13 with open(args.targetlist) as file:
11 for line in file: 14 for line in file:
12 name = line.strip() 15 name = line.strip()
13 targets.append(name) 16 targets.append(name)
14 print("Loaded %s target names from `%s`." % (len(targets), 17 print("Loaded %s target names from `%s`." % (len(targets), args.targetlist))
15 args.targetlist)) 18 for targetName in targets:
19 targetFile = "%s/%s" % (targetPath, targetName)
20 hhrResults[targetName] = getTemplates(targetFile, minScore)
16 if args.inputlist: 21 if args.inputlist:
17 inputs = list() 22 inputs = list()
18 inputPath = args.inputpath.rstrip("/") 23 inputPath = args.inputpath.rstrip("/")
19 with open(args.inputlist) as file: 24 with open(args.inputlist) as file:
20 for line in file: 25 for line in file:
21 name = line.strip() 26 name = line.strip()
22 inputs.append(name) 27 inputs.append(name)
23 print("Loaded %s input names from `%s`." % (len(inputs), 28 print("Loaded %s input names from `%s`." % (len(inputs), args.inputlist))
24 args.inputlist)) 29 for inputName in inputs:
30 if inputName not in hhrResults:
31 inputFile = "%s/%s" % (inputPath, inputName)
32 hhrResults[inputName] = getTemplates(inputFile, minScore)
25 else: 33 else:
26 inputs = targets 34 inputs = targets
27 inputPath = targetPath 35 print("Loaded hhr results for %s entries." % len(hhrResults.keys()))
28 crossReference = dict() 36 crossReference = getCrossReference(args.cross)
29 with open(args.crossreference) as file: 37 print("Loaded cross reference from `%s`." % args.cross)
30 for line in file:
31 columns = line.split()
32 core = columns[0]
33 partner = columns[-1]
34 if core not in crossReference:
35 crossReference[core] = []
36 crossReference[core].append(partner)
37 print("Loaded cross reference from `%s`." % args.crossreference)
38 interactions = dict() 38 interactions = dict()
39 for targetName in targets: 39 for targetName in targets:
40 targetFile = "%s/%s" % (targetPath, targetName) 40 matchScores(hhrResults=hhrResults,
41 matchScores(targetFile=targetFile,
42 targetName=targetName, 41 targetName=targetName,
43 inputs=inputs, 42 inputs=inputs,
44 inputPath=inputPath,
45 crossReference=crossReference, 43 crossReference=crossReference,
46 idLength=args.idlength,
47 minScore=args.minscore, 44 minScore=args.minscore,
48 logFile=logFile, 45 logFile=logFile,
49 interactions=interactions) 46 interactions=interactions)
50 if args.inputlist: 47 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True)
51 for inputName in inputs:
52 inputDirectory = inputPath
53 inputFile = "%s/%s" % (inputDirectory, inputName)
54 matchScores(targetFile=inputFile,
55 targetName=inputName,
56 inputs=targets,
57 inputPath=targetPath,
58 crossReference=crossReference,
59 minScore=args.minscore,
60 idLength=args.idlength,
61 logFile=logFile,
62 interactions=interactions)
63 interactions = sorted(interactions.values(), key=lambda item: item["minZ"],
64 reverse=True)
65 with open(args.output, 'w') as output_file: 48 with open(args.output, 'w') as output_file:
66 for entry in interactions: 49 for entry in interactions:
67 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], 50 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"],
68 entry["inputName"], entry["minZ"], 51 entry["inputName"], entry["minZ"],
69 entry["minInfo"])) 52 entry["minInfo"]))
70 logFile.close() 53 logFile.close()
71 54
72 55
73 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, 56 def matchScores(hhrResults, targetName, inputs, crossReference, minScore, logFile, interactions):
74 minScore, idLength, logFile, interactions): 57 if targetName not in hhrResults:
75 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) 58 print("Target not found `%s`" % targetName)
76 if not targetHits:
77 print("No targets found `%s`" % targetFile)
78 else: 59 else:
79 print("Loaded target scores from `%s`." % targetFile) 60 targetTop, targetHits = hhrResults[targetName]
61 print("Evaluating %s." % targetName)
62 logFile.write("Evaluating %s.\n" % targetName)
63 logFile.flush()
80 for inputName in inputs: 64 for inputName in inputs:
81 inputFile = "%s/%s" % (inputPath, inputName) 65 if inputName in hhrResults:
82 inputTop, inputHits = getTemplateScores(inputFile, 66 inputTop, inputHits = hhrResults[inputName]
83 minScore, idLength) 67 minZ = 0
84 minZ = 0 68 minInfo = ""
85 minInfo = "" 69 for t in targetHits:
86 for t in targetHits: 70 if t in crossReference:
87 if t in crossReference: 71 partners = crossReference[t]["partners"]
88 partners = crossReference[t] 72 for p in partners:
89 for p in partners: 73 if p in inputHits:
90 if p in inputHits: 74 score = min(targetHits[t], inputHits[p])
91 score = min(targetHits[t], inputHits[p]) 75 if score > minZ:
92 if score > minZ: 76 minZ = score
93 minZ = score 77 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p)
94 minInfo = "%s\t%s\t%s\t%s" % (targetTop, 78 if minZ > minScore:
95 inputTop, t, p) 79 if targetName > inputName:
96 if minZ > minScore: 80 interactionKey = "%s_%s" % (targetName, inputName)
97 if targetName > inputName: 81 else:
98 interactionKey = "%s_%s" % (targetName, inputName) 82 interactionKey = "%s_%s" % (inputName, targetName)
99 else: 83 if interactionKey in interactions:
100 interactionKey = "%s_%s" % (inputName, targetName) 84 if interactions[interactionKey]["minZ"] >= minZ:
101 if interactionKey in interactions: 85 continue
102 if interactions[interactionKey]["minZ"] >= minZ: 86 interactions[interactionKey] = dict(targetName=targetName,
103 continue 87 inputName=inputName,
104 interactions[interactionKey] = dict(targetName=targetName, 88 minZ=minZ, minInfo=minInfo)
105 inputName=inputName,
106 minZ=minZ, minInfo=minInfo)
107 logFile.write("Interaction between %s and %s [min-Z: %s].\n" %
108 (targetName, inputName, minZ))
109
110
111 def getTemplateScores(hhrFile, minScore, idLength):
112 result = dict()
113 topTemplate = None
114 idLength = idLength + 4
115 if os.path.isfile(hhrFile):
116 with open(hhrFile) as file:
117 for index, line in enumerate(file):
118 if index > 8:
119 if not line.strip():
120 break
121 templateId = line[4:idLength]
122 templateScore = float(line[57:63])
123 if templateScore > minScore:
124 if topTemplate is None:
125 topTemplate = templateId
126 result[templateId] = templateScore
127 return topTemplate, result
128 89
129 90
130 if __name__ == "__main__": 91 if __name__ == "__main__":
131 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') 92 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.')
132 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) 93 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True)
133 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) 94 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True)
134 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) 95 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False)
135 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False) 96 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False)
136 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) 97 parser.add_argument('-c', '--cross', help='PDB Cross Reference', required=True)
137 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) 98 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True)
138 parser.add_argument('-l', '--log', help='Log file', required=True) 99 parser.add_argument('-l', '--log', help='Log file', required=True)
139 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) 100 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=25)
140 parser.add_argument('-idx', '--idlength', help='Length of identifier in reference', type=int, default=6)
141 args = parser.parse_args() 101 args = parser.parse_args()
142 main(args) 102 main(args)