contra: Contra/scripts/convert_gene_coordinate.py comparison

comparison Contra/scripts/convert_gene_coordinate.py @ 0:7564f3b1e675

Uploaded

author	fcaramia
date	Thu, 13 Sep 2012 02:31:43 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:7564f3b1e675
+# ----------------------------------------------------------------------#
+# Copyright (c) 2011, Richard Lupat & Jason Li.
+#
+# > Source License <
+# This file is part of CONTRA.
+#
+#    CONTRA is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    CONTRA is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with CONTRA.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+#-----------------------------------------------------------------------#
+# Last Updated : 03 Oct 2011 11:00AM
+import sys
+def outputwrite(output, gene,chr,a,b,c,id,n,sOri, eOri):
+id = str(id)
+	n = str(n)
+output.write(gene+"\t"+chr+"\t"+a+"\t"+b+"\t"+c+"\t"+id+"\t"+n+"\t"+sOri+"\t"+eOri+"\n")
+def outputwrite2(output2, chr,c,sOri, eOri):
+output2.write(chr+"\t"+sOri+"\t"+eOri+"\t"+c+"\n")
+def convertGeneCoordinate(targetList, bufLocFolder):
+	inputfile2 = bufLocFolder + "chr/"
+	outputfile = bufLocFolder + "geneRefCoverage.txt"
+	outputfile2= bufLocFolder + "geneRefCoverage2.txt"
+	output= open(outputfile,"w")
+	output2 = open(outputfile2 , "w")
+	rn = 1
+	prevchr = ""
+	for target in targetList:
+		chr = target.chr
+		starts = target.oriStart.split(",")
+		ends = target.oriEnd.split(",")
+		if ((len(chr) > 5) or (chr[len(chr)-1] == "M")):
+			continue
+		if (prevchr != chr):
+			print chr #progress checking
+			prevchr = chr
+			t = 0
+			covFile = file.readlines(open(inputfile2+chr+".txt","r"))
+		for n in range(0,target.numberExon):
+			if t >= len(covFile):
+				break
+			cov = covFile[t].split()
+			while  ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))):
+				if (int(cov[1]) > int(starts[n])):
+					t-=1
+				else:
+					t+=1
+				cov = covFile[t].split()
+			while  ((int(ends[n]) < int(cov[1])) or (int(ends[n]) >  int(cov[2]))):
+		# print output #
+				if (rn == 1):
+					prev = target.id
+		nID = target.id
+		if (nID != prev):
+					rn = 1
+				ref1 = str(rn)
+				ref2 = str(int(cov[2]) - int(starts[n]) + rn)
+				outputwrite(output, target.gene,chr,ref1,ref2,cov[3],target.id,n,starts[n],cov[2])
+				outputwrite2(output2, chr,cov[3],starts[n],cov[2])
+				rn = int(ref2)
+				prev = nID
+				# -- #
+				# get to the next line of inputfile#
+				t+= 1
+				cov = covFile[t].split()
+				starts[n] = cov[1]
+		#print output #
+		if (t == 0) and (t >= len(covFile)):
+			continue
+		if (rn == 1):
+			prev = target.id
+		nID = target.id
+		if (nID != prev):
+			rn = 1
+		ref1 = str(rn)
+		ref2 = str(int(ends[n]) - int(starts[n]) + rn)
+		outputwrite(output, target.gene, chr, ref1, ref2, cov[3], target.id, n, starts[n], ends[n])
+		outputwrite2(output2, chr, cov[3], starts[n], ends[n])
+		rn = int(ref2)
+		prev = nID
+		# -- #
+	output.close()
+	output2.close()
+def convertGeneCoordinate2(targetList, bufLocFolder):
+inputfile2 = bufLocFolder + "chr/"
+outputfile = bufLocFolder + "geneRefCoverage.txt"
+outputfile_avg = bufLocFolder + "geneRefCoverage_targetAverage.txt"
+output= open(outputfile,"w")
+output_avg = open(outputfile_avg,"w")
+rn = 1
+prevchr = ""
+for target in targetList:
+chr = target.chr
+starts = target.oriStart.split(",")
+ends = target.oriEnd.split(",")
+target_ttl_readdepth = 0
+starts_leftmost = starts[0]
+if ((len(chr) > 5) or (chr[len(chr)-1] == "M")):
+continue
+if (prevchr != chr):
+print chr #progress checking
+prevchr = chr
+t = 0
+covFile = file.readlines(open(inputfile2+chr+".txt","r"))
+for n in range(0,target.numberExon):
+if t >= len(covFile):
+break
+cov = covFile[t].split()
+while  ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))):
+if (int(cov[1]) > int(starts[n])):                                        t-=1
+else:
+t+=1
+cov = covFile[t].split()
+while  ((int(ends[n]) < int(cov[1])) or (int(ends[n]) >  int(cov[2]))):
+# print output #
+if (rn == 1):
+prev = target.id
+nID = target.id
+if (nID != prev):
+rn = 1
+ref1 = str(rn)
+ref2 = str(int(cov[2]) - int(starts[n]) + rn)
+outputwrite2(output, chr,cov[3],starts[n],cov[2])
+tmprange=int(cov[2])-int(starts[n])+1
+target_ttl_readdepth+=int(cov[3])*tmprange
+#target_length+=tmprange
+rn = int(ref2)
+prev = nID
+# -- #
+# get to the next line of inputfile#
+t+= 1
+cov = covFile[t].split()
+starts[n] = cov[1]
+#print output #
+if (t == 0) and (t >= len(covFile)):
+continue
+if (rn == 1):
+prev = target.id
+nID = target.id
+if (nID != prev):
+rn = 1
+ref1 = str(rn)
+ref2 = str(int(ends[n]) - int(starts[n]) + rn)
+outputwrite2(output, chr, cov[3], starts[n], ends[n])
+tmprange=int(ends[n])-int(starts[n])+1
+target_ttl_readdepth+=int(cov[3])*tmprange
+#target_length+=tmprange
+target_length = int(ends[n])-int(starts_leftmost)+1
+output_avg.write("\t".join([chr,starts_leftmost,ends[n],str(target_ttl_readdepth/target_length),str(target_length)])+"\n")
+rn = int(ref2)
+prev = nID
+# -- #
+output.close()
+output_avg.close()

Mercurial > repos > fcaramia > contra

comparison Contra/scripts/convert_gene_coordinate.py @ 0:7564f3b1e675