comparison spring_minz.py @ 17:c790d25086dc draft

"planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
author guerler
date Wed, 28 Oct 2020 05:11:56 +0000
parents 16eb2acaaa20
children 5feab7f00f02
comparison
equal deleted inserted replaced
16:16eb2acaaa20 17:c790d25086dc
1 #! /usr/bin/env python3 1 #! /usr/bin/env python3
2 import argparse 2 import argparse
3 import os 3 import os
4 4
5 def main(args): 5 def main(args):
6 names = [] 6 inputs = set()
7 with open(args.list) as file: 7 with open(args.inputlist) as file:
8 for index, line in enumerate(file): 8 for index, line in enumerate(file):
9 names.append(line.strip()) 9 name = line.strip()
10 print ("Loaded %s names from `%s`." % (len(names), args.list)) 10 inputs.add(name)
11 crossreference = {} 11 print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist))
12 targets = set()
13 duplicates = 0
14 with open(args.targetlist) as file:
15 for index, line in enumerate(file):
16 name = line.strip()
17 targets.add(name)
18 if name in inputs:
19 duplicates = duplicates + 1
20 print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist))
21 crossReference = dict()
12 with open(args.crossreference) as file: 22 with open(args.crossreference) as file:
13 for index, line in enumerate(file): 23 for index, line in enumerate(file):
14 columns = line.split() 24 columns = line.split()
15 core = columns[0] 25 core = columns[0]
16 partner = columns[-1] 26 partner = columns[-1]
17 if core not in crossreference: 27 if core not in crossReference:
18 crossreference[core] = [] 28 crossReference[core] = []
19 crossreference[core].append(partner) 29 crossReference[core].append(partner)
20 print ("Loaded cross reference from `%s`." % args.crossreference) 30 print ("Loaded cross reference from `%s`." % args.crossreference)
21 toptarget, targets = get_template_scores(args.target, args.minscore, args.idx) 31 interactions = dict()
22 interactions = [] 32 for targetName in targets:
23 if not targets: 33 targetDirectory = args.targetpath.rstrip("/")
24 print("No targets found `%s`" % args.target) 34 targetFile = "%s/%s" % (targetDirectory, targetName)
35 matchScores(targetFile=targetFile,
36 targetName=targetName,
37 inputs=inputs,
38 inputPath=args.inputpath,
39 crossReference=crossReference,
40 minScore=args.minscore,
41 idLength=args.idx,
42 interactions=interactions)
43 if duplicates == len(targets):
44 for inputName in inputs:
45 inputDirectory = args.inputpath.rstrip("/")
46 inputFile = "%s/%s" % (inputDirectory, inputName)
47 matchScores(targetFile=inputFile,
48 targetName=inputName,
49 inputs=targets,
50 inputPath=args.targetpath,
51 crossReference=crossReference,
52 minScore=args.minscore,
53 idLength=args.idx,
54 interactions=interactions)
55 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True)
56 with open(args.output, 'w') as output_file:
57 for entry in interactions:
58 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"]))
59
60 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, interactions):
61 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength)
62 if not targetHits:
63 print("No targets found `%s`" % targetFile)
25 else: 64 else:
26 print ("Loaded target scores from `%s`." % args.target) 65 print ("Loaded target scores from `%s`." % targetFile)
27 for name in names: 66 for inputName in inputs:
28 input_directory = args.inputs.rstrip("/") 67 inputDirectory = inputPath.rstrip("/")
29 input_file = "%s/%s" % (input_directory, name) 68 inputFile = "%s/%s" % (inputDirectory, inputName)
30 toptemplate, templates = get_template_scores(input_file, args.minscore, args.idx) 69 inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength)
31 minz = 0 70 minZ = 0
32 mint = "" 71 minInfo = ""
33 for t in targets: 72 for t in targetHits:
34 if t in crossreference: 73 if t in crossReference:
35 partners = crossreference[t] 74 partners = crossReference[t]
36 for p in partners: 75 for p in partners:
37 if p in templates: 76 if p in inputHits:
38 score = min(targets[t], templates[p]) 77 score = min(targetHits[t], inputHits[p])
39 if score > minz: 78 if score > minZ:
40 minz = score 79 minZ = score
41 mint = "%s\t%s\t%s\t%s" % (toptarget, toptemplate, t, p) 80 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p)
42 if minz > args.minscore: 81 if minZ > minScore:
43 interactions.append((name, minz, mint)) 82 if targetName > inputName:
44 print("Predicting: %s, min-Z: %s, templates: %s" % (name, minz, mint)) 83 interactionKey = "%s_%s_%s" % (targetName, inputName, minZ)
45 interactions.sort(key=lambda tup: tup[1], reverse=True) 84 else:
46 with open(args.output, 'a+') as output_file: 85 interactionKey = "%s_%s_%s" % (inputName, targetName, minZ)
47 for i in interactions: 86 if interactionKey not in interactions:
48 output_file.write("%s\t%s\t%s\t%s\n" % (args.name, i[0], i[1], i[2])) 87 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo)
88 print("Predicting: %s, min-Z: %s, templates: %s" % (inputName, minZ, minInfo))
89 return interactions
49 90
50 def get_template_scores(hhr_file, min_score, identifier_length): 91 def getTemplateScores(hhrFile, minScore, identifierLength):
51 result = {} 92 result = dict()
52 toptemplate = None 93 topTemplate = None
53 identifier_length = identifier_length + 4 94 identifierLength = identifierLength + 4
54 if os.path.isfile(hhr_file): 95 if os.path.isfile(hhrFile):
55 with open(hhr_file) as file: 96 with open(hhrFile) as file:
56 for index, line in enumerate(file): 97 for index, line in enumerate(file):
57 if index > 8: 98 if index > 8:
58 if not line.strip(): 99 if not line.strip():
59 break 100 break
60 template_id = line[4:identifier_length] 101 templateId = line[4:identifierLength]
61 template_score = float(line[57:63]) 102 templateScore = float(line[57:63])
62 if template_score > min_score: 103 if templateScore > minScore:
63 if toptemplate is None: 104 if topTemplate is None:
64 toptemplate = template_id 105 topTemplate = templateId
65 result[template_id] = template_score 106 result[templateId] = templateScore
66 return toptemplate, result 107 return topTemplate, result
67 108
68 if __name__ == "__main__": 109 if __name__ == "__main__":
69 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') 110 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.')
70 parser.add_argument('-t', '--target', help='HHR target file result', required=True) 111 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=True)
71 parser.add_argument('-n', '--name', help='HHR target name', required=True) 112 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=True)
113 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True)
114 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True)
72 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) 115 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True)
73 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) 116 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6)
74 parser.add_argument('-l', '--list', help='Text file containing identifiers.', required=True) 117 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True)
75 parser.add_argument('-i', '--inputs', help='Directory containing `hhr` files', required=True)
76 parser.add_argument('-o', '--output', help='Output file containing min-Z scores`', required=True)
77 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) 118 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10)
78 args = parser.parse_args() 119 args = parser.parse_args()
79 main(args) 120 main(args)