Mercurial > repos > fcaramia > contra
comparison Contra/scripts/convert_gene_coordinate.py @ 0:7564f3b1e675
Uploaded
| author | fcaramia |
|---|---|
| date | Thu, 13 Sep 2012 02:31:43 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:7564f3b1e675 |
|---|---|
| 1 # ----------------------------------------------------------------------# | |
| 2 # Copyright (c) 2011, Richard Lupat & Jason Li. | |
| 3 # | |
| 4 # > Source License < | |
| 5 # This file is part of CONTRA. | |
| 6 # | |
| 7 # CONTRA is free software: you can redistribute it and/or modify | |
| 8 # it under the terms of the GNU General Public License as published by | |
| 9 # the Free Software Foundation, either version 3 of the License, or | |
| 10 # (at your option) any later version. | |
| 11 # | |
| 12 # CONTRA is distributed in the hope that it will be useful, | |
| 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 15 # GNU General Public License for more details. | |
| 16 # | |
| 17 # You should have received a copy of the GNU General Public License | |
| 18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>. | |
| 19 # | |
| 20 # | |
| 21 #-----------------------------------------------------------------------# | |
| 22 # Last Updated : 03 Oct 2011 11:00AM | |
| 23 | |
| 24 import sys | |
| 25 | |
| 26 def outputwrite(output, gene,chr,a,b,c,id,n,sOri, eOri): | |
| 27 id = str(id) | |
| 28 n = str(n) | |
| 29 output.write(gene+"\t"+chr+"\t"+a+"\t"+b+"\t"+c+"\t"+id+"\t"+n+"\t"+sOri+"\t"+eOri+"\n") | |
| 30 | |
| 31 def outputwrite2(output2, chr,c,sOri, eOri): | |
| 32 output2.write(chr+"\t"+sOri+"\t"+eOri+"\t"+c+"\n") | |
| 33 | |
| 34 | |
| 35 def convertGeneCoordinate(targetList, bufLocFolder): | |
| 36 inputfile2 = bufLocFolder + "chr/" | |
| 37 outputfile = bufLocFolder + "geneRefCoverage.txt" | |
| 38 outputfile2= bufLocFolder + "geneRefCoverage2.txt" | |
| 39 | |
| 40 output= open(outputfile,"w") | |
| 41 output2 = open(outputfile2 , "w") | |
| 42 | |
| 43 rn = 1 | |
| 44 prevchr = "" | |
| 45 for target in targetList: | |
| 46 chr = target.chr | |
| 47 starts = target.oriStart.split(",") | |
| 48 ends = target.oriEnd.split(",") | |
| 49 | |
| 50 if ((len(chr) > 5) or (chr[len(chr)-1] == "M")): | |
| 51 continue | |
| 52 | |
| 53 if (prevchr != chr): | |
| 54 print chr #progress checking | |
| 55 prevchr = chr | |
| 56 t = 0 | |
| 57 covFile = file.readlines(open(inputfile2+chr+".txt","r")) | |
| 58 | |
| 59 for n in range(0,target.numberExon): | |
| 60 if t >= len(covFile): | |
| 61 break | |
| 62 cov = covFile[t].split() | |
| 63 while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))): | |
| 64 if (int(cov[1]) > int(starts[n])): | |
| 65 t-=1 | |
| 66 else: | |
| 67 t+=1 | |
| 68 cov = covFile[t].split() | |
| 69 | |
| 70 while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))): | |
| 71 # print output # | |
| 72 if (rn == 1): | |
| 73 prev = target.id | |
| 74 nID = target.id | |
| 75 if (nID != prev): | |
| 76 rn = 1 | |
| 77 ref1 = str(rn) | |
| 78 ref2 = str(int(cov[2]) - int(starts[n]) + rn) | |
| 79 outputwrite(output, target.gene,chr,ref1,ref2,cov[3],target.id,n,starts[n],cov[2]) | |
| 80 | |
| 81 outputwrite2(output2, chr,cov[3],starts[n],cov[2]) | |
| 82 | |
| 83 | |
| 84 rn = int(ref2) | |
| 85 prev = nID | |
| 86 # -- # | |
| 87 | |
| 88 # get to the next line of inputfile# | |
| 89 t+= 1 | |
| 90 cov = covFile[t].split() | |
| 91 starts[n] = cov[1] | |
| 92 | |
| 93 #print output # | |
| 94 if (t == 0) and (t >= len(covFile)): | |
| 95 continue | |
| 96 | |
| 97 if (rn == 1): | |
| 98 prev = target.id | |
| 99 nID = target.id | |
| 100 if (nID != prev): | |
| 101 rn = 1 | |
| 102 ref1 = str(rn) | |
| 103 ref2 = str(int(ends[n]) - int(starts[n]) + rn) | |
| 104 outputwrite(output, target.gene, chr, ref1, ref2, cov[3], target.id, n, starts[n], ends[n]) | |
| 105 | |
| 106 outputwrite2(output2, chr, cov[3], starts[n], ends[n]) | |
| 107 | |
| 108 | |
| 109 rn = int(ref2) | |
| 110 prev = nID | |
| 111 # -- # | |
| 112 output.close() | |
| 113 output2.close() | |
| 114 | |
| 115 | |
| 116 def convertGeneCoordinate2(targetList, bufLocFolder): | |
| 117 inputfile2 = bufLocFolder + "chr/" | |
| 118 outputfile = bufLocFolder + "geneRefCoverage.txt" | |
| 119 outputfile_avg = bufLocFolder + "geneRefCoverage_targetAverage.txt" | |
| 120 | |
| 121 output= open(outputfile,"w") | |
| 122 output_avg = open(outputfile_avg,"w") | |
| 123 | |
| 124 rn = 1 | |
| 125 prevchr = "" | |
| 126 for target in targetList: | |
| 127 chr = target.chr | |
| 128 starts = target.oriStart.split(",") | |
| 129 ends = target.oriEnd.split(",") | |
| 130 target_ttl_readdepth = 0 | |
| 131 starts_leftmost = starts[0] | |
| 132 | |
| 133 | |
| 134 if ((len(chr) > 5) or (chr[len(chr)-1] == "M")): | |
| 135 continue | |
| 136 | |
| 137 if (prevchr != chr): | |
| 138 print chr #progress checking | |
| 139 prevchr = chr | |
| 140 t = 0 | |
| 141 covFile = file.readlines(open(inputfile2+chr+".txt","r")) | |
| 142 | |
| 143 for n in range(0,target.numberExon): | |
| 144 if t >= len(covFile): | |
| 145 break | |
| 146 cov = covFile[t].split() | |
| 147 while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))): | |
| 148 if (int(cov[1]) > int(starts[n])): t-=1 | |
| 149 else: | |
| 150 t+=1 | |
| 151 cov = covFile[t].split() | |
| 152 | |
| 153 while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))): | |
| 154 # print output # | |
| 155 if (rn == 1): | |
| 156 prev = target.id | |
| 157 nID = target.id | |
| 158 if (nID != prev): | |
| 159 rn = 1 | |
| 160 ref1 = str(rn) | |
| 161 ref2 = str(int(cov[2]) - int(starts[n]) + rn) | |
| 162 outputwrite2(output, chr,cov[3],starts[n],cov[2]) | |
| 163 tmprange=int(cov[2])-int(starts[n])+1 | |
| 164 target_ttl_readdepth+=int(cov[3])*tmprange | |
| 165 #target_length+=tmprange | |
| 166 | |
| 167 rn = int(ref2) | |
| 168 prev = nID | |
| 169 # -- # | |
| 170 | |
| 171 # get to the next line of inputfile# | |
| 172 t+= 1 | |
| 173 cov = covFile[t].split() | |
| 174 starts[n] = cov[1] | |
| 175 | |
| 176 #print output # | |
| 177 if (t == 0) and (t >= len(covFile)): | |
| 178 continue | |
| 179 | |
| 180 if (rn == 1): | |
| 181 prev = target.id | |
| 182 nID = target.id | |
| 183 if (nID != prev): | |
| 184 rn = 1 | |
| 185 ref1 = str(rn) | |
| 186 ref2 = str(int(ends[n]) - int(starts[n]) + rn) | |
| 187 outputwrite2(output, chr, cov[3], starts[n], ends[n]) | |
| 188 tmprange=int(ends[n])-int(starts[n])+1 | |
| 189 target_ttl_readdepth+=int(cov[3])*tmprange | |
| 190 #target_length+=tmprange | |
| 191 target_length = int(ends[n])-int(starts_leftmost)+1 | |
| 192 output_avg.write("\t".join([chr,starts_leftmost,ends[n],str(target_ttl_readdepth/target_length),str(target_length)])+"\n") | |
| 193 | |
| 194 rn = int(ref2) | |
| 195 prev = nID | |
| 196 # -- # | |
| 197 output.close() | |
| 198 output_avg.close() | |
| 199 | |
| 200 | |
| 201 | |
| 202 | |
| 203 | |
| 204 | |
| 205 | |
| 206 | |
| 207 | |
| 208 | |
| 209 | |
| 210 | |
| 211 | |
| 212 | |
| 213 | |
| 214 | |
| 215 | |
| 216 | |
| 217 | |
| 218 |
