Mercurial > repos > guerler > springsuite
diff spring_minz.py @ 29:41353488926c draft
"planemo upload commit 1c0a60f98e36bccb6d6c85ff82a8d737a811b4d5"
author | guerler |
---|---|
date | Sun, 22 Nov 2020 14:15:24 +0000 |
parents | e34da554d415 |
children | 172398348efd |
line wrap: on
line diff
--- a/spring_minz.py Sat Oct 31 22:55:35 2020 +0000 +++ b/spring_minz.py Sun Nov 22 14:15:24 2020 +0000 @@ -2,36 +2,39 @@ import argparse import os + def main(args): logFile = open(args.log, 'a+') targets = list() targetPath = args.targetpath.rstrip("/") with open(args.targetlist) as file: - for index, line in enumerate(file): + for line in file: name = line.strip() targets.append(name) - print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) + print("Loaded %s target names from `%s`." % (len(targets), + args.targetlist)) if args.inputlist: inputs = list() inputPath = args.inputpath.rstrip("/") with open(args.inputlist) as file: - for index, line in enumerate(file): + for line in file: name = line.strip() inputs.append(name) - print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) + print("Loaded %s input names from `%s`." % (len(inputs), + args.inputlist)) else: inputs = targets inputPath = targetPath crossReference = dict() with open(args.crossreference) as file: - for index, line in enumerate(file): + for line in file: columns = line.split() core = columns[0] partner = columns[-1] if core not in crossReference: crossReference[core] = [] crossReference[core].append(partner) - print ("Loaded cross reference from `%s`." % args.crossreference) + print("Loaded cross reference from `%s`." % args.crossreference) interactions = dict() for targetName in targets: targetFile = "%s/%s" % (targetPath, targetName) @@ -57,21 +60,27 @@ idLength=args.idlength, logFile=logFile, interactions=interactions) - interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) + interactions = sorted(interactions.values(), key=lambda item: item["minZ"], + reverse=True) with open(args.output, 'w') as output_file: for entry in interactions: - output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) + output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], + entry["inputName"], entry["minZ"], + entry["minInfo"])) logFile.close() -def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, logFile, interactions): + +def matchScores(targetFile, targetName, inputs, inputPath, crossReference, + minScore, idLength, logFile, interactions): targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) if not targetHits: print("No targets found `%s`" % targetFile) else: - print ("Loaded target scores from `%s`." % targetFile) + print("Loaded target scores from `%s`." % targetFile) for inputName in inputs: inputFile = "%s/%s" % (inputPath, inputName) - inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) + inputTop, inputHits = getTemplateScores(inputFile, + minScore, idLength) minZ = 0 minInfo = "" for t in targetHits: @@ -82,7 +91,8 @@ score = min(targetHits[t], inputHits[p]) if score > minZ: minZ = score - minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) + minInfo = "%s\t%s\t%s\t%s" % (targetTop, + inputTop, t, p) if minZ > minScore: if targetName > inputName: interactionKey = "%s_%s" % (targetName, inputName) @@ -91,8 +101,12 @@ if interactionKey in interactions: if interactions[interactionKey]["minZ"] >= minZ: continue - interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) - logFile.write("Interaction between %s and %s [min-Z: %s].\n" % (targetName, inputName, minZ)) + interactions[interactionKey] = dict(targetName=targetName, + inputName=inputName, + minZ=minZ, minInfo=minInfo) + logFile.write("Interaction between %s and %s [min-Z: %s].\n" % + (targetName, inputName, minZ)) + def getTemplateScores(hhrFile, minScore, idLength): result = dict() @@ -112,6 +126,7 @@ result[templateId] = templateScore return topTemplate, result + if __name__ == "__main__": parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) @@ -124,4 +139,4 @@ parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) parser.add_argument('-idx', '--idlength', help='Length of identifier in reference', type=int, default=6) args = parser.parse_args() - main(args) \ No newline at end of file + main(args)