comparison spring_minz.py @ 23:5469e19f1f96 draft

"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
author guerler
date Thu, 29 Oct 2020 13:04:47 +0000
parents acaff61a09b2
children 5d1ae615e4ec
comparison
equal deleted inserted replaced
22:acaff61a09b2 23:5469e19f1f96
1 #! /usr/bin/env python3 1 #! /usr/bin/env python3
2 import argparse 2 import argparse
3 import os 3 import os
4 4
5 def main(args): 5 def main(args):
6 inputs = list() 6 logFile = open(args.log, 'a+')
7 with open(args.inputlist) as file: 7 targets = list()
8 for index, line in enumerate(file): 8 targetPath = args.targetpath.rstrip("/")
9 name = line.strip() 9 with open(args.targetlist) as file:
10 inputs.append(name) 10 for index, line in enumerate(file):
11 print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) 11 name = line.strip()
12 targets = list() 12 targets.append(name)
13 duplicates = 0 13 print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist))
14 with open(args.targetlist) as file: 14 if args.inputlist:
15 for index, line in enumerate(file): 15 inputs = list()
16 name = line.strip() 16 inputPath = args.inputpath.rstrip("/")
17 targets.append(name) 17 with open(args.inputlist) as file:
18 if name in inputs: 18 for index, line in enumerate(file):
19 duplicates = duplicates + 1 19 name = line.strip()
20 print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) 20 inputs.append(name)
21 crossReference = dict() 21 print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist))
22 with open(args.crossreference) as file: 22 else:
23 for index, line in enumerate(file): 23 inputs = targets
24 columns = line.split() 24 inputPath = targetPath
25 core = columns[0] 25 crossReference = dict()
26 partner = columns[-1] 26 with open(args.crossreference) as file:
27 if core not in crossReference: 27 for index, line in enumerate(file):
28 crossReference[core] = [] 28 columns = line.split()
29 crossReference[core].append(partner) 29 core = columns[0]
30 print ("Loaded cross reference from `%s`." % args.crossreference) 30 partner = columns[-1]
31 interactions = dict() 31 if core not in crossReference:
32 for targetName in targets: 32 crossReference[core] = []
33 targetDirectory = args.targetpath.rstrip("/") 33 crossReference[core].append(partner)
34 targetFile = "%s/%s" % (targetDirectory, targetName) 34 print ("Loaded cross reference from `%s`." % args.crossreference)
35 matchScores(targetFile=targetFile, 35 interactions = dict()
36 targetName=targetName, 36 for targetName in targets:
37 inputs=sorted(inputs), 37 targetFile = "%s/%s" % (targetPath, targetName)
38 inputPath=args.inputpath, 38 matchScores(targetFile=targetFile,
39 crossReference=crossReference, 39 targetName=targetName,
40 minScore=args.minscore, 40 inputs=inputs,
41 idLength=args.idx, 41 inputPath=inputPath,
42 interactions=interactions) 42 crossReference=crossReference,
43 if duplicates != len(targets): 43 minScore=args.minscore,
44 for inputName in inputs: 44 idLength=args.idx,
45 inputDirectory = args.inputpath.rstrip("/") 45 logFile=logFile,
46 inputFile = "%s/%s" % (inputDirectory, inputName) 46 interactions=interactions)
47 matchScores(targetFile=inputFile, 47 if args.inputlist:
48 targetName=inputName, 48 for inputName in inputs:
49 inputs=targets, 49 inputDirectory = inputPath
50 inputPath=args.targetpath, 50 inputFile = "%s/%s" % (inputDirectory, inputName)
51 crossReference=crossReference, 51 matchScores(targetFile=inputFile,
52 minScore=args.minscore, 52 targetName=inputName,
53 idLength=args.idx, 53 inputs=targets,
54 interactions=interactions) 54 inputPath=targetPath,
55 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) 55 crossReference=crossReference,
56 with open(args.output, 'w') as output_file: 56 minScore=args.minscore,
57 for entry in interactions: 57 idLength=args.idx,
58 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) 58 logFile=logFile,
59 interactions=interactions)
60 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True)
61 with open(args.output, 'w') as output_file:
62 for entry in interactions:
63 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"]))
64 logFile.close()
59 65
60 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, interactions): 66 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, logFile, interactions):
61 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) 67 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength)
62 if not targetHits: 68 if not targetHits:
63 print("No targets found `%s`" % targetFile) 69 print("No targets found `%s`" % targetFile)
64 else: 70 else:
65 print ("Loaded target scores from `%s`." % targetFile) 71 print ("Loaded target scores from `%s`." % targetFile)
66 for inputName in inputs: 72 for inputName in inputs:
67 inputDirectory = inputPath.rstrip("/") 73 inputFile = "%s/%s" % (inputPath, inputName)
68 inputFile = "%s/%s" % (inputDirectory, inputName) 74 inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength)
69 inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) 75 minZ = 0
70 minZ = 0 76 minInfo = ""
71 minInfo = "" 77 for t in targetHits:
72 for t in targetHits: 78 if t in crossReference:
73 if t in crossReference: 79 partners = crossReference[t]
74 partners = crossReference[t] 80 for p in partners:
75 for p in partners: 81 if p in inputHits:
76 if p in inputHits: 82 score = min(targetHits[t], inputHits[p])
77 score = min(targetHits[t], inputHits[p]) 83 if score > minZ:
78 if score > minZ: 84 minZ = score
79 minZ = score 85 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p)
80 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) 86 if minZ > minScore:
81 if minZ > minScore: 87 if targetName > inputName:
82 if targetName > inputName: 88 interactionKey = "%s_%s" % (targetName, inputName)
83 interactionKey = "%s_%s" % (targetName, inputName) 89 else:
84 else: 90 interactionKey = "%s_%s" % (inputName, targetName)
85 interactionKey = "%s_%s" % (inputName, targetName) 91 if interactionKey in interactions:
86 if interactionKey in interactions: 92 if interactions[interactionKey]["minZ"] >= minZ:
87 if interactions[interactionKey]["minZ"] >= minZ: 93 continue
88 continue 94 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo)
89 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) 95 logFile.write("Interaction between %s and %s [min-Z: %s].\n" % (targetName, inputName, minZ))
90 print("Predicting: %s, min-Z: %s, templates: %s" % (inputName, minZ, minInfo))
91 return interactions
92 96
93 def getTemplateScores(hhrFile, minScore, identifierLength): 97 def getTemplateScores(hhrFile, minScore, identifierLength):
94 result = dict() 98 result = dict()
95 topTemplate = None 99 topTemplate = None
96 identifierLength = identifierLength + 4 100 identifierLength = identifierLength + 4
97 if os.path.isfile(hhrFile): 101 if os.path.isfile(hhrFile):
98 with open(hhrFile) as file: 102 with open(hhrFile) as file:
99 for index, line in enumerate(file): 103 for index, line in enumerate(file):
100 if index > 8: 104 if index > 8:
101 if not line.strip(): 105 if not line.strip():
102 break 106 break
103 templateId = line[4:identifierLength] 107 templateId = line[4:identifierLength]
104 templateScore = float(line[57:63]) 108 templateScore = float(line[57:63])
105 if templateScore > minScore: 109 if templateScore > minScore:
106 if topTemplate is None: 110 if topTemplate is None:
107 topTemplate = templateId 111 topTemplate = templateId
108 result[templateId] = templateScore 112 result[templateId] = templateScore
109 return topTemplate, result 113 return topTemplate, result
110 114
111 if __name__ == "__main__": 115 if __name__ == "__main__":
112 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') 116 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.')
113 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=True) 117 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True)
114 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=True) 118 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True)
115 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) 119 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False)
116 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) 120 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False)
117 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) 121 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True)
118 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) 122 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6)
119 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) 123 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True)
120 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) 124 parser.add_argument('-l', '--log', help='Log file', required=True)
121 args = parser.parse_args() 125 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10)
122 main(args) 126 args = parser.parse_args()
127 main(args)