Mercurial > repos > guerler > springsuite
comparison spring_minz.py @ 23:5469e19f1f96 draft
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
| author | guerler |
|---|---|
| date | Thu, 29 Oct 2020 13:04:47 +0000 |
| parents | acaff61a09b2 |
| children | 5d1ae615e4ec |
comparison
equal
deleted
inserted
replaced
| 22:acaff61a09b2 | 23:5469e19f1f96 |
|---|---|
| 1 #! /usr/bin/env python3 | 1 #! /usr/bin/env python3 |
| 2 import argparse | 2 import argparse |
| 3 import os | 3 import os |
| 4 | 4 |
| 5 def main(args): | 5 def main(args): |
| 6 inputs = list() | 6 logFile = open(args.log, 'a+') |
| 7 with open(args.inputlist) as file: | 7 targets = list() |
| 8 for index, line in enumerate(file): | 8 targetPath = args.targetpath.rstrip("/") |
| 9 name = line.strip() | 9 with open(args.targetlist) as file: |
| 10 inputs.append(name) | 10 for index, line in enumerate(file): |
| 11 print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) | 11 name = line.strip() |
| 12 targets = list() | 12 targets.append(name) |
| 13 duplicates = 0 | 13 print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) |
| 14 with open(args.targetlist) as file: | 14 if args.inputlist: |
| 15 for index, line in enumerate(file): | 15 inputs = list() |
| 16 name = line.strip() | 16 inputPath = args.inputpath.rstrip("/") |
| 17 targets.append(name) | 17 with open(args.inputlist) as file: |
| 18 if name in inputs: | 18 for index, line in enumerate(file): |
| 19 duplicates = duplicates + 1 | 19 name = line.strip() |
| 20 print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) | 20 inputs.append(name) |
| 21 crossReference = dict() | 21 print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) |
| 22 with open(args.crossreference) as file: | 22 else: |
| 23 for index, line in enumerate(file): | 23 inputs = targets |
| 24 columns = line.split() | 24 inputPath = targetPath |
| 25 core = columns[0] | 25 crossReference = dict() |
| 26 partner = columns[-1] | 26 with open(args.crossreference) as file: |
| 27 if core not in crossReference: | 27 for index, line in enumerate(file): |
| 28 crossReference[core] = [] | 28 columns = line.split() |
| 29 crossReference[core].append(partner) | 29 core = columns[0] |
| 30 print ("Loaded cross reference from `%s`." % args.crossreference) | 30 partner = columns[-1] |
| 31 interactions = dict() | 31 if core not in crossReference: |
| 32 for targetName in targets: | 32 crossReference[core] = [] |
| 33 targetDirectory = args.targetpath.rstrip("/") | 33 crossReference[core].append(partner) |
| 34 targetFile = "%s/%s" % (targetDirectory, targetName) | 34 print ("Loaded cross reference from `%s`." % args.crossreference) |
| 35 matchScores(targetFile=targetFile, | 35 interactions = dict() |
| 36 targetName=targetName, | 36 for targetName in targets: |
| 37 inputs=sorted(inputs), | 37 targetFile = "%s/%s" % (targetPath, targetName) |
| 38 inputPath=args.inputpath, | 38 matchScores(targetFile=targetFile, |
| 39 crossReference=crossReference, | 39 targetName=targetName, |
| 40 minScore=args.minscore, | 40 inputs=inputs, |
| 41 idLength=args.idx, | 41 inputPath=inputPath, |
| 42 interactions=interactions) | 42 crossReference=crossReference, |
| 43 if duplicates != len(targets): | 43 minScore=args.minscore, |
| 44 for inputName in inputs: | 44 idLength=args.idx, |
| 45 inputDirectory = args.inputpath.rstrip("/") | 45 logFile=logFile, |
| 46 inputFile = "%s/%s" % (inputDirectory, inputName) | 46 interactions=interactions) |
| 47 matchScores(targetFile=inputFile, | 47 if args.inputlist: |
| 48 targetName=inputName, | 48 for inputName in inputs: |
| 49 inputs=targets, | 49 inputDirectory = inputPath |
| 50 inputPath=args.targetpath, | 50 inputFile = "%s/%s" % (inputDirectory, inputName) |
| 51 crossReference=crossReference, | 51 matchScores(targetFile=inputFile, |
| 52 minScore=args.minscore, | 52 targetName=inputName, |
| 53 idLength=args.idx, | 53 inputs=targets, |
| 54 interactions=interactions) | 54 inputPath=targetPath, |
| 55 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) | 55 crossReference=crossReference, |
| 56 with open(args.output, 'w') as output_file: | 56 minScore=args.minscore, |
| 57 for entry in interactions: | 57 idLength=args.idx, |
| 58 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) | 58 logFile=logFile, |
| 59 interactions=interactions) | |
| 60 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) | |
| 61 with open(args.output, 'w') as output_file: | |
| 62 for entry in interactions: | |
| 63 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) | |
| 64 logFile.close() | |
| 59 | 65 |
| 60 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, interactions): | 66 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, logFile, interactions): |
| 61 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) | 67 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) |
| 62 if not targetHits: | 68 if not targetHits: |
| 63 print("No targets found `%s`" % targetFile) | 69 print("No targets found `%s`" % targetFile) |
| 64 else: | 70 else: |
| 65 print ("Loaded target scores from `%s`." % targetFile) | 71 print ("Loaded target scores from `%s`." % targetFile) |
| 66 for inputName in inputs: | 72 for inputName in inputs: |
| 67 inputDirectory = inputPath.rstrip("/") | 73 inputFile = "%s/%s" % (inputPath, inputName) |
| 68 inputFile = "%s/%s" % (inputDirectory, inputName) | 74 inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) |
| 69 inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) | 75 minZ = 0 |
| 70 minZ = 0 | 76 minInfo = "" |
| 71 minInfo = "" | 77 for t in targetHits: |
| 72 for t in targetHits: | 78 if t in crossReference: |
| 73 if t in crossReference: | 79 partners = crossReference[t] |
| 74 partners = crossReference[t] | 80 for p in partners: |
| 75 for p in partners: | 81 if p in inputHits: |
| 76 if p in inputHits: | 82 score = min(targetHits[t], inputHits[p]) |
| 77 score = min(targetHits[t], inputHits[p]) | 83 if score > minZ: |
| 78 if score > minZ: | 84 minZ = score |
| 79 minZ = score | 85 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) |
| 80 minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) | 86 if minZ > minScore: |
| 81 if minZ > minScore: | 87 if targetName > inputName: |
| 82 if targetName > inputName: | 88 interactionKey = "%s_%s" % (targetName, inputName) |
| 83 interactionKey = "%s_%s" % (targetName, inputName) | 89 else: |
| 84 else: | 90 interactionKey = "%s_%s" % (inputName, targetName) |
| 85 interactionKey = "%s_%s" % (inputName, targetName) | 91 if interactionKey in interactions: |
| 86 if interactionKey in interactions: | 92 if interactions[interactionKey]["minZ"] >= minZ: |
| 87 if interactions[interactionKey]["minZ"] >= minZ: | 93 continue |
| 88 continue | 94 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) |
| 89 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) | 95 logFile.write("Interaction between %s and %s [min-Z: %s].\n" % (targetName, inputName, minZ)) |
| 90 print("Predicting: %s, min-Z: %s, templates: %s" % (inputName, minZ, minInfo)) | |
| 91 return interactions | |
| 92 | 96 |
| 93 def getTemplateScores(hhrFile, minScore, identifierLength): | 97 def getTemplateScores(hhrFile, minScore, identifierLength): |
| 94 result = dict() | 98 result = dict() |
| 95 topTemplate = None | 99 topTemplate = None |
| 96 identifierLength = identifierLength + 4 | 100 identifierLength = identifierLength + 4 |
| 97 if os.path.isfile(hhrFile): | 101 if os.path.isfile(hhrFile): |
| 98 with open(hhrFile) as file: | 102 with open(hhrFile) as file: |
| 99 for index, line in enumerate(file): | 103 for index, line in enumerate(file): |
| 100 if index > 8: | 104 if index > 8: |
| 101 if not line.strip(): | 105 if not line.strip(): |
| 102 break | 106 break |
| 103 templateId = line[4:identifierLength] | 107 templateId = line[4:identifierLength] |
| 104 templateScore = float(line[57:63]) | 108 templateScore = float(line[57:63]) |
| 105 if templateScore > minScore: | 109 if templateScore > minScore: |
| 106 if topTemplate is None: | 110 if topTemplate is None: |
| 107 topTemplate = templateId | 111 topTemplate = templateId |
| 108 result[templateId] = templateScore | 112 result[templateId] = templateScore |
| 109 return topTemplate, result | 113 return topTemplate, result |
| 110 | 114 |
| 111 if __name__ == "__main__": | 115 if __name__ == "__main__": |
| 112 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') | 116 parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') |
| 113 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=True) | 117 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) |
| 114 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=True) | 118 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) |
| 115 parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) | 119 parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) |
| 116 parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) | 120 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False) |
| 117 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) | 121 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) |
| 118 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) | 122 parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) |
| 119 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) | 123 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) |
| 120 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) | 124 parser.add_argument('-l', '--log', help='Log file', required=True) |
| 121 args = parser.parse_args() | 125 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) |
| 122 main(args) | 126 args = parser.parse_args() |
| 127 main(args) |
