# HG changeset patch # User guerler # Date 1603976687 0 # Node ID 5469e19f1f96627a4c7742e12b0e513c924b57b1 # Parent acaff61a09b26c2142c3eb75ea8f5e67cd0203ec "planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3" diff -r acaff61a09b2 -r 5469e19f1f96 spring_minz.py --- a/spring_minz.py Wed Oct 28 06:49:58 2020 +0000 +++ b/spring_minz.py Thu Oct 29 13:04:47 2020 +0000 @@ -3,120 +3,125 @@ import os def main(args): - inputs = list() - with open(args.inputlist) as file: - for index, line in enumerate(file): - name = line.strip() - inputs.append(name) - print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) - targets = list() - duplicates = 0 - with open(args.targetlist) as file: - for index, line in enumerate(file): - name = line.strip() - targets.append(name) - if name in inputs: - duplicates = duplicates + 1 - print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) - crossReference = dict() - with open(args.crossreference) as file: - for index, line in enumerate(file): - columns = line.split() - core = columns[0] - partner = columns[-1] - if core not in crossReference: - crossReference[core] = [] - crossReference[core].append(partner) - print ("Loaded cross reference from `%s`." % args.crossreference) - interactions = dict() - for targetName in targets: - targetDirectory = args.targetpath.rstrip("/") - targetFile = "%s/%s" % (targetDirectory, targetName) - matchScores(targetFile=targetFile, - targetName=targetName, - inputs=sorted(inputs), - inputPath=args.inputpath, - crossReference=crossReference, - minScore=args.minscore, - idLength=args.idx, - interactions=interactions) - if duplicates != len(targets): - for inputName in inputs: - inputDirectory = args.inputpath.rstrip("/") - inputFile = "%s/%s" % (inputDirectory, inputName) - matchScores(targetFile=inputFile, - targetName=inputName, - inputs=targets, - inputPath=args.targetpath, - crossReference=crossReference, - minScore=args.minscore, - idLength=args.idx, - interactions=interactions) - interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) - with open(args.output, 'w') as output_file: - for entry in interactions: - output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) + logFile = open(args.log, 'a+') + targets = list() + targetPath = args.targetpath.rstrip("/") + with open(args.targetlist) as file: + for index, line in enumerate(file): + name = line.strip() + targets.append(name) + print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) + if args.inputlist: + inputs = list() + inputPath = args.inputpath.rstrip("/") + with open(args.inputlist) as file: + for index, line in enumerate(file): + name = line.strip() + inputs.append(name) + print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) + else: + inputs = targets + inputPath = targetPath + crossReference = dict() + with open(args.crossreference) as file: + for index, line in enumerate(file): + columns = line.split() + core = columns[0] + partner = columns[-1] + if core not in crossReference: + crossReference[core] = [] + crossReference[core].append(partner) + print ("Loaded cross reference from `%s`." % args.crossreference) + interactions = dict() + for targetName in targets: + targetFile = "%s/%s" % (targetPath, targetName) + matchScores(targetFile=targetFile, + targetName=targetName, + inputs=inputs, + inputPath=inputPath, + crossReference=crossReference, + minScore=args.minscore, + idLength=args.idx, + logFile=logFile, + interactions=interactions) + if args.inputlist: + for inputName in inputs: + inputDirectory = inputPath + inputFile = "%s/%s" % (inputDirectory, inputName) + matchScores(targetFile=inputFile, + targetName=inputName, + inputs=targets, + inputPath=targetPath, + crossReference=crossReference, + minScore=args.minscore, + idLength=args.idx, + logFile=logFile, + interactions=interactions) + interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) + with open(args.output, 'w') as output_file: + for entry in interactions: + output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) + logFile.close() -def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, interactions): - targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) - if not targetHits: - print("No targets found `%s`" % targetFile) - else: - print ("Loaded target scores from `%s`." % targetFile) - for inputName in inputs: - inputDirectory = inputPath.rstrip("/") - inputFile = "%s/%s" % (inputDirectory, inputName) - inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) - minZ = 0 - minInfo = "" - for t in targetHits: - if t in crossReference: - partners = crossReference[t] - for p in partners: - if p in inputHits: - score = min(targetHits[t], inputHits[p]) - if score > minZ: - minZ = score - minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) - if minZ > minScore: - if targetName > inputName: - interactionKey = "%s_%s" % (targetName, inputName) - else: - interactionKey = "%s_%s" % (inputName, targetName) - if interactionKey in interactions: - if interactions[interactionKey]["minZ"] >= minZ: - continue - interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) - print("Predicting: %s, min-Z: %s, templates: %s" % (inputName, minZ, minInfo)) - return interactions +def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, logFile, interactions): + targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) + if not targetHits: + print("No targets found `%s`" % targetFile) + else: + print ("Loaded target scores from `%s`." % targetFile) + for inputName in inputs: + inputFile = "%s/%s" % (inputPath, inputName) + inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) + minZ = 0 + minInfo = "" + for t in targetHits: + if t in crossReference: + partners = crossReference[t] + for p in partners: + if p in inputHits: + score = min(targetHits[t], inputHits[p]) + if score > minZ: + minZ = score + minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) + if minZ > minScore: + if targetName > inputName: + interactionKey = "%s_%s" % (targetName, inputName) + else: + interactionKey = "%s_%s" % (inputName, targetName) + if interactionKey in interactions: + if interactions[interactionKey]["minZ"] >= minZ: + continue + interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) + logFile.write("Interaction between %s and %s [min-Z: %s].\n" % (targetName, inputName, minZ)) def getTemplateScores(hhrFile, minScore, identifierLength): - result = dict() - topTemplate = None - identifierLength = identifierLength + 4 - if os.path.isfile(hhrFile): - with open(hhrFile) as file: - for index, line in enumerate(file): - if index > 8: - if not line.strip(): - break - templateId = line[4:identifierLength] - templateScore = float(line[57:63]) - if templateScore > minScore: - if topTemplate is None: - topTemplate = templateId - result[templateId] = templateScore - return topTemplate, result + result = dict() + topTemplate = None + identifierLength = identifierLength + 4 + if os.path.isfile(hhrFile): + with open(hhrFile) as file: + for index, line in enumerate(file): + if index > 8: + if not line.strip(): + break + templateId = line[4:identifierLength] + templateScore = float(line[57:63]) + if templateScore > minScore: + if topTemplate is None: + topTemplate = templateId + result[templateId] = templateScore + return topTemplate, result if __name__ == "__main__": - parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') - parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=True) - parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=True) - parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) - parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) - parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) - parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) - parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) - parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) - args = parser.parse_args() - main(args) \ No newline at end of file + parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') + parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) + parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) + parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) + parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False) + parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) + parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) + parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) + parser.add_argument('-l', '--log', help='Log file', required=True) + parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) + args = parser.parse_args() + main(args) \ No newline at end of file diff -r acaff61a09b2 -r 5469e19f1f96 spring_minz.xml --- a/spring_minz.xml Wed Oct 28 06:49:58 2020 +0000 +++ b/spring_minz.xml Thu Oct 29 13:04:47 2020 +0000 @@ -2,59 +2,60 @@ filter operation > 'input_list' && - #end for - mkdir -p targets && + mkdir -p targets && #for target in $targets link '${str(target)}' 'targets/${target.element_identifier}' && echo '${target.element_identifier}' >> 'target_list' && #end for - python3 '$__tool_directory__/spring_minz.py' -il input_list -ip inputs -tl target_list -tp targets -m '$minscore' -c '$crossreference' -x '$idx' -o '$output' + #if str($input_type.input_type_selector) == "true": + mkdir -p inputs && + #for input in $input_type.inputs + link '${str(input)}' 'inputs/${input.element_identifier}' && + echo '${input.element_identifier}' >> 'input_list' && + #end for + python3 '$__tool_directory__/spring_minz.py' -tl target_list -tp targets -il input_list -ip inputs -m '$minscore' -c '$crossreference' -x '$idx' -o '$output' -l '$log' + #else + python3 '$__tool_directory__/spring_minz.py' -tl target_list -tp targets -m '$minscore' -c '$crossreference' -x '$idx' -o '$output' -l '$log' + #end if ]]> - - + + + + + + + - + + - + + - - + + - + + + - - - - - - - - - - - - - + @@ -62,16 +63,20 @@ - - - - - - - - + + + + + + + + + + + +