| 0 | 1 # ----------------------------------------------------------------------# | 
|  | 2 # Copyright (c) 2011, Richard Lupat & Jason Li. | 
|  | 3 # | 
|  | 4 # > Source License < | 
|  | 5 # This file is part of CONTRA. | 
|  | 6 # | 
|  | 7 #    CONTRA is free software: you can redistribute it and/or modify | 
|  | 8 #    it under the terms of the GNU General Public License as published by | 
|  | 9 #    the Free Software Foundation, either version 3 of the License, or | 
|  | 10 #    (at your option) any later version. | 
|  | 11 # | 
|  | 12 #    CONTRA is distributed in the hope that it will be useful, | 
|  | 13 #    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 14 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | 15 #    GNU General Public License for more details. | 
|  | 16 # | 
|  | 17 #    You should have received a copy of the GNU General Public License | 
|  | 18 #    along with CONTRA.  If not, see <http://www.gnu.org/licenses/>. | 
|  | 19 # | 
|  | 20 # | 
|  | 21 #-----------------------------------------------------------------------# | 
|  | 22 # Last Updated : 03 Oct 2011 11:00AM | 
|  | 23 | 
|  | 24 import sys | 
|  | 25 | 
|  | 26 def outputwrite(output, gene,chr,a,b,c,id,n,sOri, eOri): | 
|  | 27         id = str(id) | 
|  | 28 	n = str(n) | 
|  | 29         output.write(gene+"\t"+chr+"\t"+a+"\t"+b+"\t"+c+"\t"+id+"\t"+n+"\t"+sOri+"\t"+eOri+"\n") | 
|  | 30 | 
|  | 31 def outputwrite2(output2, chr,c,sOri, eOri): | 
|  | 32         output2.write(chr+"\t"+sOri+"\t"+eOri+"\t"+c+"\n") | 
|  | 33 | 
|  | 34 | 
|  | 35 def convertGeneCoordinate(targetList, bufLocFolder): | 
|  | 36 	inputfile2 = bufLocFolder + "chr/" | 
|  | 37 	outputfile = bufLocFolder + "geneRefCoverage.txt" | 
|  | 38 	outputfile2= bufLocFolder + "geneRefCoverage2.txt" | 
|  | 39 | 
|  | 40 	output= open(outputfile,"w") | 
|  | 41 	output2 = open(outputfile2 , "w") | 
|  | 42 | 
|  | 43 	rn = 1 | 
|  | 44 	prevchr = "" | 
|  | 45 	for target in targetList: | 
|  | 46 		chr = target.chr | 
|  | 47 		starts = target.oriStart.split(",") | 
|  | 48 		ends = target.oriEnd.split(",") | 
|  | 49 | 
|  | 50 		if ((len(chr) > 5) or (chr[len(chr)-1] == "M")): | 
|  | 51 			continue | 
|  | 52 | 
|  | 53 		if (prevchr != chr): | 
|  | 54 			print chr #progress checking | 
|  | 55 			prevchr = chr | 
|  | 56 			t = 0 | 
|  | 57 			covFile = file.readlines(open(inputfile2+chr+".txt","r")) | 
|  | 58 | 
|  | 59 		for n in range(0,target.numberExon): | 
|  | 60 			if t >= len(covFile): | 
|  | 61 				break | 
|  | 62 			cov = covFile[t].split() | 
|  | 63 			while  ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))): | 
|  | 64 				if (int(cov[1]) > int(starts[n])): | 
|  | 65 					t-=1 | 
|  | 66 				else: | 
|  | 67 					t+=1 | 
|  | 68 				cov = covFile[t].split() | 
|  | 69 | 
|  | 70 			while  ((int(ends[n]) < int(cov[1])) or (int(ends[n]) >  int(cov[2]))): | 
|  | 71                 		# print output # | 
|  | 72 				if (rn == 1): | 
|  | 73 					prev = target.id | 
|  | 74                 		nID = target.id | 
|  | 75                 		if (nID != prev): | 
|  | 76 					rn = 1 | 
|  | 77 				ref1 = str(rn) | 
|  | 78 				ref2 = str(int(cov[2]) - int(starts[n]) + rn) | 
|  | 79 				outputwrite(output, target.gene,chr,ref1,ref2,cov[3],target.id,n,starts[n],cov[2]) | 
|  | 80 | 
|  | 81 				outputwrite2(output2, chr,cov[3],starts[n],cov[2]) | 
|  | 82 | 
|  | 83 | 
|  | 84 				rn = int(ref2) | 
|  | 85 				prev = nID | 
|  | 86 				# -- # | 
|  | 87 | 
|  | 88 				# get to the next line of inputfile# | 
|  | 89 				t+= 1 | 
|  | 90 				cov = covFile[t].split() | 
|  | 91 				starts[n] = cov[1] | 
|  | 92 | 
|  | 93 		#print output # | 
|  | 94 		if (t == 0) and (t >= len(covFile)): | 
|  | 95 			continue | 
|  | 96 | 
|  | 97 		if (rn == 1): | 
|  | 98 			prev = target.id | 
|  | 99 		nID = target.id | 
|  | 100 		if (nID != prev): | 
|  | 101 			rn = 1 | 
|  | 102 		ref1 = str(rn) | 
|  | 103 		ref2 = str(int(ends[n]) - int(starts[n]) + rn) | 
|  | 104 		outputwrite(output, target.gene, chr, ref1, ref2, cov[3], target.id, n, starts[n], ends[n]) | 
|  | 105 | 
|  | 106 		outputwrite2(output2, chr, cov[3], starts[n], ends[n]) | 
|  | 107 | 
|  | 108 | 
|  | 109 		rn = int(ref2) | 
|  | 110 		prev = nID | 
|  | 111 		# -- # | 
|  | 112 	output.close() | 
|  | 113 	output2.close() | 
|  | 114 | 
|  | 115 | 
|  | 116 def convertGeneCoordinate2(targetList, bufLocFolder): | 
|  | 117         inputfile2 = bufLocFolder + "chr/" | 
|  | 118         outputfile = bufLocFolder + "geneRefCoverage.txt" | 
|  | 119         outputfile_avg = bufLocFolder + "geneRefCoverage_targetAverage.txt" | 
|  | 120 | 
|  | 121         output= open(outputfile,"w") | 
|  | 122         output_avg = open(outputfile_avg,"w") | 
|  | 123 | 
|  | 124         rn = 1 | 
|  | 125         prevchr = "" | 
|  | 126         for target in targetList: | 
|  | 127                 chr = target.chr | 
|  | 128                 starts = target.oriStart.split(",") | 
|  | 129                 ends = target.oriEnd.split(",") | 
|  | 130                 target_ttl_readdepth = 0 | 
|  | 131                 starts_leftmost = starts[0] | 
|  | 132 | 
|  | 133 | 
|  | 134                 if ((len(chr) > 5) or (chr[len(chr)-1] == "M")): | 
|  | 135                         continue | 
|  | 136 | 
|  | 137                 if (prevchr != chr): | 
|  | 138                         print chr #progress checking | 
|  | 139                         prevchr = chr | 
|  | 140                         t = 0 | 
|  | 141                         covFile = file.readlines(open(inputfile2+chr+".txt","r")) | 
|  | 142 | 
|  | 143                 for n in range(0,target.numberExon): | 
|  | 144                         if t >= len(covFile): | 
|  | 145                                 break | 
|  | 146                         cov = covFile[t].split() | 
|  | 147                         while  ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))): | 
|  | 148                                 if (int(cov[1]) > int(starts[n])):                                        t-=1 | 
|  | 149                                 else: | 
|  | 150                                         t+=1 | 
|  | 151                                 cov = covFile[t].split() | 
|  | 152 | 
|  | 153                         while  ((int(ends[n]) < int(cov[1])) or (int(ends[n]) >  int(cov[2]))): | 
|  | 154                                 # print output # | 
|  | 155                                 if (rn == 1): | 
|  | 156                                         prev = target.id | 
|  | 157                                 nID = target.id | 
|  | 158                                 if (nID != prev): | 
|  | 159                                         rn = 1 | 
|  | 160                                 ref1 = str(rn) | 
|  | 161                                 ref2 = str(int(cov[2]) - int(starts[n]) + rn) | 
|  | 162                                 outputwrite2(output, chr,cov[3],starts[n],cov[2]) | 
|  | 163                                 tmprange=int(cov[2])-int(starts[n])+1 | 
|  | 164                                 target_ttl_readdepth+=int(cov[3])*tmprange | 
|  | 165                                 #target_length+=tmprange | 
|  | 166 | 
|  | 167                                 rn = int(ref2) | 
|  | 168                                 prev = nID | 
|  | 169                                 # -- # | 
|  | 170 | 
|  | 171                                 # get to the next line of inputfile# | 
|  | 172                                 t+= 1 | 
|  | 173                                 cov = covFile[t].split() | 
|  | 174                                 starts[n] = cov[1] | 
|  | 175 | 
|  | 176                 #print output # | 
|  | 177                 if (t == 0) and (t >= len(covFile)): | 
|  | 178                         continue | 
|  | 179 | 
|  | 180                 if (rn == 1): | 
|  | 181                         prev = target.id | 
|  | 182                 nID = target.id | 
|  | 183                 if (nID != prev): | 
|  | 184                         rn = 1 | 
|  | 185                 ref1 = str(rn) | 
|  | 186                 ref2 = str(int(ends[n]) - int(starts[n]) + rn) | 
|  | 187                 outputwrite2(output, chr, cov[3], starts[n], ends[n]) | 
|  | 188                 tmprange=int(ends[n])-int(starts[n])+1 | 
|  | 189                 target_ttl_readdepth+=int(cov[3])*tmprange | 
|  | 190                 #target_length+=tmprange | 
|  | 191                 target_length = int(ends[n])-int(starts_leftmost)+1 | 
|  | 192                 output_avg.write("\t".join([chr,starts_leftmost,ends[n],str(target_ttl_readdepth/target_length),str(target_length)])+"\n") | 
|  | 193 | 
|  | 194                 rn = int(ref2) | 
|  | 195                 prev = nID | 
|  | 196                 # -- # | 
|  | 197         output.close() | 
|  | 198         output_avg.close() | 
|  | 199 | 
|  | 200 | 
|  | 201 | 
|  | 202 | 
|  | 203 | 
|  | 204 | 
|  | 205 | 
|  | 206 | 
|  | 207 | 
|  | 208 | 
|  | 209 | 
|  | 210 | 
|  | 211 | 
|  | 212 | 
|  | 213 | 
|  | 214 | 
|  | 215 | 
|  | 216 | 
|  | 217 | 
|  | 218 |