diff shm_csr.py @ 63:8728284105ee draft

Uploaded
author davidvanzessen
date Wed, 06 Dec 2017 08:04:52 -0500
parents aa8d37bd1930
children 43a1aa648537
line wrap: on
line diff
--- a/shm_csr.py	Tue Dec 05 10:57:13 2017 -0500
+++ b/shm_csr.py	Wed Dec 06 08:04:52 2017 -0500
@@ -104,23 +104,6 @@
 			if len(linesplt[fr3Index]) > 5:
 				mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
 				
-			try:
-				pass
-			except Exception as e:
-				print "Something went wrong while processing this line:"
-				print "line:", linecount
-				print "fr1 len:", len(linesplt[fr1Index]), "value:", linesplt[fr1Index]
-				print "cdr1 len:", len(linesplt[cdr1Index]), "value:", linesplt[cdr1Index]
-				print "fr2 len:", len(linesplt[fr2Index]), "value:", linesplt[fr2Index]
-				print "cdr2 len:", len(linesplt[cdr2Index]), "value:", linesplt[cdr2Index]
-				print "fr3 len:", len(linesplt[fr3Index]), "value:", linesplt[fr3Index]
-				print ID + "_FR1 in mutationdic", ID + "_FR1" in mutationdic
-				print ID + "_CDR1 in mutationdic", ID + "_CDR1" in mutationdic
-				print ID + "_FR2 in mutationdic", ID + "_FR2" in mutationdic
-				print ID + "_CDR2 in mutationdic", ID + "_CDR2" in mutationdic
-				print ID + "_FR3 in mutationdic", ID + "_FR3" in mutationdic
-				print linesplt
-				print e
 			mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
 			mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
 
@@ -393,6 +376,50 @@
 					WRCYCount[ID] += (1.0 * int(mutation_in_WRCY)) / in_how_many_motifs
 					WACount[ID] += (1.0 * int(mutation_in_WA)) / in_how_many_motifs
 					TWCount[ID] += (1.0 * int(mutation_in_TW)) / in_how_many_motifs
+			
+			mutations_in_motifs_file = os.path.join(os.path.dirname(os.path.abspath(infile)), "mutation_in_motifs.txt")
+			if not os.path.exists(mutation_by_id_file):
+				with open(mutations_in_motifs_file, 'w') as out_handle:
+					out_handle.write("{0}\n".format("\t".join([
+						"Sequence.ID",
+						"mutation_position",
+						"region",
+						"from_nt",
+						"to_nt",
+						"mutation_position_AA",
+						"from_AA",
+						"to_AA",
+						"motif",
+						"motif_start_nt",
+						"motif_end_nt",
+						"rest"
+					])))
+
+			with open(mutations_in_motifs_file, 'a') as out_handle:
+				motif_dic = {"RGYW": RGYW, "WRCY": WRCY, "WA": WA, "TW": TW}
+				for mutation in mutationList:
+					frm, where, to, AAfrm, AAwhere, AAto, junk = mutation
+					for motif in motif_dic.keys():
+							
+						for start, end, region in motif_dic[motif]:
+							if start <= int(where) <= end:
+								out_handle.write("{0}\n".format(
+									"\t".join([
+										ID,
+										where,
+										region,
+										frm,
+										to,
+										str(AAwhere),
+										str(AAfrm),
+										str(AAto),
+										motif,
+										str(start),
+										str(end),
+										str(junk)
+									])
+								))
+
 
 
 	def mean(lst):