diff Contra/scripts/cn_apply_threshold.py @ 0:7564f3b1e675

Uploaded
author fcaramia
date Thu, 13 Sep 2012 02:31:43 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Contra/scripts/cn_apply_threshold.py	Thu Sep 13 02:31:43 2012 -0400
@@ -0,0 +1,111 @@
+# ----------------------------------------------------------------------#
+# Copyright (c) 2011, Richard Lupat & Jason Li.
+#
+# > Source License <
+# This file is part of CONTRA.
+#
+#    CONTRA is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    CONTRA is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with CONTRA.  If not, see <http://www.gnu.org/licenses/>.
+#
+# 
+#-----------------------------------------------------------------------#
+# Last Updated : 12 Oct 2011 11:00AM
+
+def applyThreshold(outputName, bufTable, threshold, maxGap):
+	srcFile = outputName + ".txt"
+	outFile = bufTable + ".LargeVariations.txt"
+	bedFile = bufTable + ".BED"
+	fFile 	= outputName + ".DetailsFILTERED.txt" 
+	ts	= float(threshold)
+
+	# Read and open files
+	srcTable = file.readlines(open(srcFile))
+	outTable = open(outFile, "w")
+	bedOut	 = open(bedFile, "w")
+	filteredTable = open(fFile, "w")
+
+
+	#header
+	outTable.write("Chr \tStartCoordinate \tEndCoordinate \tGenes \tGain.Loss \n")
+	filteredTable.write(srcTable[0])
+
+	prevChr = ''
+	prevStatus = ''
+	prevEnd = -1
+	genes = []
+	chrList = []
+
+	for exons in srcTable:
+		exon = exons.split()
+		try:
+			adjPVal = float(exon[12])
+		except:
+			continue
+
+		if adjPVal <= ts:
+			chr 	= exon[3]
+			gene 	= exon[2]
+			status 	= exon[13]
+			start 	= exon[4]
+			end	= exon[5]
+
+			# For first row
+			if prevEnd == -1:
+				gap = 0
+			else:
+				gap = int(prevEnd) - int(start)
+
+			# Write Filtered Table
+			filteredTable.write(exons)
+
+			# Write Bed File
+			bedOut.write(chr.strip("chr") +"\t" +start +"\t"+ end+"\t"+ 
+			   chr.strip("chr")+":"+start+"-"+end+":"+str(adjPVal)+"\n")
+		
+			if prevChr == '' and prevStatus == '':
+				if chr not in chrList:
+					print chr
+					chrList.append(chr)
+			elif (chr == prevChr) and (status == prevStatus) and (gap < maxGap):
+				start = prevStart
+			else:
+				outTable.write(prevChr +"\t" +prevStart +"\t" +prevEnd + "\t")
+				for gsym in genes:
+					outTable.write(gsym + ", ")
+				outTable.write("\t" + prevStatus + "\n")
+				genes=[]
+		
+			if gene not in genes:
+				genes.append(gene)
+			prevChr = chr
+			prevStatus = status
+			prevStart = start
+			prevEnd = end
+		elif len(genes) > 0:
+			outTable.write(prevChr +"\t" +prevStart +"\t" +prevEnd + "\t")
+			for gsym in genes:
+				outTable.write(gsym + ", " )
+			outTable.write("\t" + prevStatus + "\n")
+			prevChr = ''
+			prevStatus = ''
+			genes = []
+
+	if len(genes) > 0:
+        	outTable.write(prevChr +"\t" +prevStart +"\t" +prevEnd + "\t")
+        	for gsym in genes:
+                	outTable.write(gsym + ", ")
+        	outTable.write("\t" + prevStatus + "\n")
+
+	filteredTable.close()
+	bedOut.close()
+	outTable.close()