diff spring_minz.py @ 0:d30785e31577 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:18:57 -0400
parents
children f2f38991c36f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spring_minz.py	Fri Jul 31 00:18:57 2020 -0400
@@ -0,0 +1,71 @@
+#! /usr/bin/env python3
+import argparse
+import os
+
+def main(args):
+	names = []
+	with open(args.list) as file:
+		for index, line in enumerate(file):
+			names.append(line.strip())
+	print ("Loaded %s names from `%s`." % (len(names), args.list))
+	crossreference = {}
+	with open(args.crossreference) as file:
+		for index, line in enumerate(file):
+			columns = line.split()
+			core = columns[0]
+			partner = columns[2]
+			if core not in crossreference:
+				crossreference[core] = []
+			crossreference[core].append(partner)
+	print ("Loaded cross reference from `%s`." % args.crossreference)
+	targets = get_template_scores(args.target, args.minscore)
+	if not targets:
+		print("No targets found `%s`" % args.target)
+	else:
+		print ("Loaded target scores from `%s`." % args.target)
+		interactions = []
+		for name in names:
+			input_directory = args.inputs.rstrip("/")
+			input_file = "%s/%s" % (input_directory, name)
+			templates = get_template_scores(input_file, args.minscore)
+			minz = 0
+			for t in targets:
+				if t in crossreference:
+					partners = crossreference[t]
+					for p in partners:
+						if p in templates:
+							score = min(targets[t], templates[p])
+							if score > minz:
+								minz = score
+			if minz > args.minscore:
+				interactions.append((name, minz))
+				print("Predicting: %s, min-Z: %s" % (name, minz))
+		interactions.sort(key=lambda tup: tup[1], reverse=True)
+		with open(args.output, 'w') as output_file:
+			for i in interactions:
+				output_file.write("%s %s\n" % (i[0], i[1]))
+
+def get_template_scores(hhr_file, min_score):
+	result = {}
+	if os.path.isfile(hhr_file):
+		with open(hhr_file) as file:
+			for index, line in enumerate(file):
+				if index > 8:
+					if not line.strip():
+						break
+					template_id = line[4:10]
+					template_score = float(line[57:63])
+					if template_score > min_score:
+						result[template_id] = template_score
+	return result
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.')
+	parser.add_argument('-t', '--target', help='HHR target file result', required=True)
+	parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True)
+	parser.add_argument('-l', '--list', help='Text file containing identifiers.', required=True)
+	parser.add_argument('-i', '--inputs', help='Directory containing `hhr/X/Y.hhr` files', required=True)
+	parser.add_argument('-o', '--output', help='Output file containing minZ-scores`', required=True)
+	parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10)
+	args = parser.parse_args()
+	main(args)
\ No newline at end of file