comparison spring_minz.py @ 29:41353488926c draft

"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
author guerler
date Sun, 22 Nov 2020 14:15:24 +0000
parents e34da554d415
children 172398348efd
comparison
equal deleted inserted replaced
28:75d1aedc9b3f 29:41353488926c
1 #! /usr/bin/env python3 1 #! /usr/bin/env python3
2 import argparse 2 import argparse
3 import os 3 import os
4
4 5
5 def main(args): 6 def main(args):
6 logFile = open(args.log, 'a+') 7 logFile = open(args.log, 'a+')
7 targets = list() 8 targets = list()
8 targetPath = args.targetpath.rstrip("/") 9 targetPath = args.targetpath.rstrip("/")
9 with open(args.targetlist) as file: 10 with open(args.targetlist) as file:
10 for index, line in enumerate(file): 11 for line in file:
11 name = line.strip() 12 name = line.strip()
12 targets.append(name) 13 targets.append(name)
13 print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) 14 print("Loaded %s target names from `%s`." % (len(targets),
15 args.targetlist))
14 if args.inputlist: 16 if args.inputlist:
15 inputs = list() 17 inputs = list()
16 inputPath = args.inputpath.rstrip("/") 18 inputPath = args.inputpath.rstrip("/")
17 with open(args.inputlist) as file: 19 with open(args.inputlist) as file:
18 for index, line in enumerate(file): 20 for line in file:
19 name = line.strip() 21 name = line.strip()
20 inputs.append(name) 22 inputs.append(name)
21 print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) 23 print("Loaded %s input names from `%s`." % (len(inputs),
24 args.inputlist))
22 else: 25 else:
23 inputs = targets 26 inputs = targets
24 inputPath = targetPath 27 inputPath = targetPath
25 crossReference = dict() 28 crossReference = dict()
26 with open(args.crossreference) as file: 29 with open(args.crossreference) as file:
27 for index, line in enumerate(file): 30 for line in file:
28 columns = line.split() 31 columns = line.split()
29 core = columns[0] 32 core = columns[0]
30 partner = columns[-1] 33 partner = columns[-1]
31 if core not in crossReference: 34 if core not in crossReference:
32 crossReference[core] = [] 35 crossReference[core] = []
33 crossReference[core].append(partner) 36 crossReference[core].append(partner)
34 print ("Loaded cross reference from `%s`." % args.crossreference) 37 print("Loaded cross reference from `%s`." % args.crossreference)
35 interactions = dict() 38 interactions = dict()
36 for targetName in targets: 39 for targetName in targets:
37 targetFile = "%s/%s" % (targetPath, targetName) 40 targetFile = "%s/%s" % (targetPath, targetName)
38 matchScores(targetFile=targetFile, 41 matchScores(targetFile=targetFile,
39 targetName=targetName, 42 targetName=targetName,
55 crossReference=crossReference, 58 crossReference=crossReference,
56 minScore=args.minscore, 59 minScore=args.minscore,
57 idLength=args.idlength, 60 idLength=args.idlength,
58 logFile=logFile, 61 logFile=logFile,
59 interactions=interactions) 62 interactions=interactions)
60 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) 63 interactions = sorted(interactions.values(), key=lambda item: item["minZ"],
64 reverse=True)
61 with open(args.output, 'w') as output_file: 65 with open(args.output, 'w') as output_file:
62 for entry in interactions: 66 for entry in interactions:
63 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) 67 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"],
68 entry["inputName"], entry["minZ"],
69 entry["minInfo"]))
64 logFile.close() 70 logFile.close()
65 71
66 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, logFile, interactions): 72
73 def matchScores(targetFile, targetName, inputs, inputPath, crossReference,
74 minScore, idLength, logFile, interactions):
67 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) 75 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength)
68 if not targetHits: 76 if not targetHits:
69 print("No targets found `%s`" % targetFile) 77 print("No targets found `%s`" % targetFile)
70 else: 78 else:
71 print ("Loaded target scores from `%s`." % targetFile) 79 print("Loaded target scores from `%s`." % targetFile)
72 for inputName in inputs: 80 for inputName in inputs:
73 inputFile = "%s/%s" % (inputPath, inputName) 81 inputFile = "%s/%s" % (inputPath, inputName)
74 inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) 82 inputTop, inputHits = getTemplateScores(inputFile,
83 minScore, idLength)
75 minZ = 0 84 minZ = 0
76 minInfo = "" 85 minInfo = ""
77 for t in targetHits: 86 for t in targetHits:
78 if t in crossReference: 87 if t in crossReference:
79 partners = crossReference[t] 88 partners = crossReference[t]
80 for p in partners: 89 for p in partners:
81 if p in inputHits: 90 if p in inputHits:
82 score = min(targetHits[t], inputHits[p]) 91 score = min(targetHits[t], inputHits[p])
83 if score > minZ: 92 if score > minZ:
84 minZ = score 93 minZ = score
85 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) 94 minInfo = "%s\t%s\t%s\t%s" % (targetTop,
95 inputTop, t, p)
86 if minZ > minScore: 96 if minZ > minScore:
87 if targetName > inputName: 97 if targetName > inputName:
88 interactionKey = "%s_%s" % (targetName, inputName) 98 interactionKey = "%s_%s" % (targetName, inputName)
89 else: 99 else:
90 interactionKey = "%s_%s" % (inputName, targetName) 100 interactionKey = "%s_%s" % (inputName, targetName)
91 if interactionKey in interactions: 101 if interactionKey in interactions:
92 if interactions[interactionKey]["minZ"] >= minZ: 102 if interactions[interactionKey]["minZ"] >= minZ:
93 continue 103 continue
94 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) 104 interactions[interactionKey] = dict(targetName=targetName,
95 logFile.write("Interaction between %s and %s [min-Z: %s].\n" % (targetName, inputName, minZ)) 105 inputName=inputName,
106 minZ=minZ, minInfo=minInfo)
107 logFile.write("Interaction between %s and %s [min-Z: %s].\n" %
108 (targetName, inputName, minZ))
109
96 110
97 def getTemplateScores(hhrFile, minScore, idLength): 111 def getTemplateScores(hhrFile, minScore, idLength):
98 result = dict() 112 result = dict()
99 topTemplate = None 113 topTemplate = None
100 idLength = idLength + 4 114 idLength = idLength + 4
110 if topTemplate is None: 124 if topTemplate is None:
111 topTemplate = templateId 125 topTemplate = templateId
112 result[templateId] = templateScore 126 result[templateId] = templateScore
113 return topTemplate, result 127 return topTemplate, result
114 128
129
115 if __name__ == "__main__": 130 if __name__ == "__main__":
116 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') 131 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.')
117 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) 132 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True)
118 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) 133 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True)
119 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) 134 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False)