changeset 16:16eb2acaaa20 draft

"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
author guerler
date Sat, 24 Oct 2020 17:48:06 +0000
parents 4a4888bf0338
children c790d25086dc
files spring_minz.py test-data/dbCAN_result.txt test-data/pdb70_result.txt
diffstat 3 files changed, 24 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/spring_minz.py	Sun Sep 27 20:52:58 2020 +0000
+++ b/spring_minz.py	Sat Oct 24 17:48:06 2020 +0000
@@ -13,12 +13,12 @@
 		for index, line in enumerate(file):
 			columns = line.split()
 			core = columns[0]
-			partner = columns[2]
+			partner = columns[-1]
 			if core not in crossreference:
 				crossreference[core] = []
 			crossreference[core].append(partner)
 	print ("Loaded cross reference from `%s`." % args.crossreference)
-	targets = get_template_scores(args.target, args.minscore, args.idx)
+	toptarget, targets = get_template_scores(args.target, args.minscore, args.idx)
 	interactions = []
 	if not targets:
 		print("No targets found `%s`" % args.target)
@@ -27,8 +27,9 @@
 		for name in names:
 			input_directory = args.inputs.rstrip("/")
 			input_file = "%s/%s" % (input_directory, name)
-			templates = get_template_scores(input_file, args.minscore, args.idx)
+			toptemplate, templates = get_template_scores(input_file, args.minscore, args.idx)
 			minz = 0
+			mint = ""
 			for t in targets:
 				if t in crossreference:
 					partners = crossreference[t]
@@ -37,16 +38,18 @@
 							score = min(targets[t], templates[p])
 							if score > minz:
 								minz = score
+								mint = "%s\t%s\t%s\t%s" % (toptarget, toptemplate, t, p)
 			if minz > args.minscore:
-				interactions.append((name, minz))
-				print("Predicting: %s, min-Z: %s" % (name, minz))
+				interactions.append((name, minz, mint))
+				print("Predicting: %s, min-Z: %s, templates: %s" % (name, minz, mint))
 		interactions.sort(key=lambda tup: tup[1], reverse=True)
 	with open(args.output, 'a+') as output_file:
 		for i in interactions:
-			output_file.write("%s\t%s\t%s\n" % (args.name, i[0], i[1]))
+			output_file.write("%s\t%s\t%s\t%s\n" % (args.name, i[0], i[1], i[2]))
 
 def get_template_scores(hhr_file, min_score, identifier_length):
 	result = {}
+	toptemplate = None
 	identifier_length = identifier_length + 4
 	if os.path.isfile(hhr_file):
 		with open(hhr_file) as file:
@@ -57,8 +60,10 @@
 					template_id = line[4:identifier_length]
 					template_score = float(line[57:63])
 					if template_score > min_score:
+						if toptemplate is None:
+							toptemplate = template_id
 						result[template_id] = template_score
-	return result
+	return toptemplate, result
 
 if __name__ == "__main__":
 	parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.')
@@ -68,7 +73,7 @@
 	parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6)
 	parser.add_argument('-l', '--list', help='Text file containing identifiers.', required=True)
 	parser.add_argument('-i', '--inputs', help='Directory containing `hhr` files', required=True)
-	parser.add_argument('-o', '--output', help='Output file containing minZ-scores`', required=True)
+	parser.add_argument('-o', '--output', help='Output file containing min-Z scores`', required=True)
 	parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10)
 	args = parser.parse_args()
 	main(args)
\ No newline at end of file
--- a/test-data/dbCAN_result.txt	Sun Sep 27 20:52:58 2020 +0000
+++ b/test-data/dbCAN_result.txt	Sat Oct 24 17:48:06 2020 +0000
@@ -1,9 +1,9 @@
-6W4H_A.hhr	6W4H_A.hhr	24.7
-6W9Q_A.hhr	6W4H_B.hhr	22.3
-6W9Q_A.hhr	6W9C_A.hhr	21.0
-6W37_A.hhr	6W9C_A.hhr	21.3
-6W37_A.hhr	6W4H_B.hhr	16.0
-6W37_A.hhr	6W37_A.hhr	14.0
-7BQY_A.hhr	6W9C_A.hhr	24.2
-7BQY_A.hhr	6W4H_B.hhr	24.0
-7BQY_A.hhr	6W37_A.hhr	14.0
+6W4H_A.hhr	6W4H_A.hhr	24.7	CAA16605.1	CAA16605.1	CAA16605.1	CAA16605.1
+6W9Q_A.hhr	6W4H_B.hhr	22.3	AJP22991.1	CAR47543.1	AJP22991.1	ACI15933.1
+6W9Q_A.hhr	6W9C_A.hhr	21.0	AJP22991.1	ACY47587.1	ATY34423.1	AHA42547.2
+6W37_A.hhr	6W9C_A.hhr	21.3	CAG81347.1	ACY47587.1	CAG81347.1	AAO76127.1
+6W37_A.hhr	6W4H_B.hhr	16.0	CAG81347.1	CAR47543.1	ABO94043.1	ACY95489.1
+6W37_A.hhr	6W37_A.hhr	14.0	CAG81347.1	CAG81347.1	ABO94043.1	ACY95489.1
+7BQY_A.hhr	6W9C_A.hhr	24.2	QCE11709.1	ACY47587.1	QCE11709.1	AAW84061.1
+7BQY_A.hhr	6W4H_B.hhr	24.0	QCE11709.1	CAR47543.1	CAK38741.1	ACD98010.1
+7BQY_A.hhr	6W37_A.hhr	14.0	QCE11709.1	CAG81347.1	ABO94043.1	ACY95489.1
--- a/test-data/pdb70_result.txt	Sun Sep 27 20:52:58 2020 +0000
+++ b/test-data/pdb70_result.txt	Sat Oct 24 17:48:06 2020 +0000
@@ -1,2 +1,2 @@
-NP_000282.1.hhr	NP_000282.1.hhr	875.1
-NP_000282.1.hhr	NP_000290.2.hhr	86.5
+NP_000282.1.hhr	NP_000282.1.hhr	875.1	2WZB_A	2WZB_A	2WZB_A	2WZB_A
+NP_000282.1.hhr	NP_000290.2.hhr	86.5	2WZB_A	1XM9_A	3UWD_A	3NMZ_A