diff VCFFiltering_wrapper.py @ 0:3552a8d9f51c draft

Uploaded
author urgi-team
date Tue, 10 Nov 2015 08:30:56 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFFiltering_wrapper.py	Tue Nov 10 08:30:56 2015 -0500
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+
+
+import subprocess, tempfile, sys, os, glob, shutil, time
+from optparse import OptionParser
+from optparse import Option, OptionValueError
+			
+class VCFFilteringWrapper(object):
+
+	def __init__(self):
+		self._options = None
+		
+		
+	def stop_err(self, msg ):
+		sys.stderr.write( "%s\n" % msg )
+		sys.exit()
+		
+		
+	def setAttributesFromCmdLine(self):
+		description = "VCFFiltering_wrapper"
+		description += "\nWrapper for VCFFiltering ;\n VCFFiltering filters SNP on a VCF depending on depth (DP) allele number (AN), allele frequency (AF) and SNP quality.\n"
+		description += "example 1 : VCFFiltering.py -f myVCF.vcf -o FilteredVCF.vcf\n"
+		description += "example 2 : VCFFiltering.py -f myVCF.vcf -N 2 -F 0.87 -b bed1.bed bed2.bed -o FilteredVCF.vcf\n"
+		parser = OptionParser(description = description, version = "0.1")
+		parser.add_option("-f", "--vcf",	   dest = "VCFFile",   action = "store", type = "string", help = "Input VCF File name [compulsory] [format: VCF]",                                           default = "")
+		parser.add_option("-o", "--output",	dest = "outFile",   action = "store", type = "string", help = "output VCF File name [compulsory] [format: VCF]",                                             default = "")
+		parser.add_option("-m", "--minDP",	 dest = "minDP",	 action = "store", type = "int",	help = "minimum of depth ; if both minDP and maxDP are set, optimal DP will not be calculated ",     default = 0)
+		parser.add_option("-M", "--maxDP",	 dest = "maxDP",	 action = "store", type = "int",	help = "maximum of depth ; if both minDP and maxDP are set, optimal DP will not be calculated ",     default = 0)
+		parser.add_option("-N", "--AN",		dest = "AN",		action = "store", type = "int",	help = "maximum number of allele for a SNP; default = 2",                                                default = 2)
+		parser.add_option("-F", "--AF",		dest = "AF",		action = "store", type = "float",  help = "minimum frequency for the alternative allele of a SNP; default = 0.9",                        default = 0.9)
+		parser.add_option("-b", "--bed",	   dest = "bedFiles",  action = "append", type = "string", help = "bed files: list of coordinates to filter, multiple arguments allowed '-b file1 file2' ",  default = [])
+		parser.add_option("-G", "--graphHTML", dest = "graphHTML", action = "store", type = "string", help = "name of the HTML linking to graphs ",                                                      default = "")
+		parser.add_option("-d", "--dirGraphs", dest = "dirGraphs", action = "store", type = "string", help = "name of the folder containing graphs ",                                                    default = "")
+		options = parser.parse_args()[0]
+		self._setAttributesFromOptions(options)
+
+
+	def _setAttributesFromOptions(self, options):
+		self._options = options
+
+	def run(self):
+		if self._options.minDP and self._options.maxDP :
+			if self._options.minDP > self._options.maxDP :
+				self.stop_err( 'error in options : minDP > max DP (%s > %s)' %(self._options.minDP,self._options.maxDP)) 
+					
+		prg = "VCFFiltering.py -g -G 'png' "
+		args = ""
+		args += "-f %s" % self._options.VCFFile
+		args += " "
+		args += "-o %s" % self._options.outFile
+		if self._options.AF :
+			args += " "
+			args += "-F %s" % self._options.AF
+		if self._options.AN :
+			args += " "
+			args += "-N %s" % self._options.AN
+		if self._options.minDP :
+			args += " "
+			args += "-m %s" % self._options.minDP
+		if self._options.maxDP :
+			args += " "
+			args += "-M %s" % self._options.maxDP
+		if self._options.bedFiles :
+			if self._options.bedFiles == "":
+				pass
+			else :
+				self._lBedFiles = self._options.bedFiles
+		cmd = "%s %s" %(prg, args)
+		
+		print cmd
+		
+		try:
+			tmp_err = tempfile.NamedTemporaryFile().name
+			tmp_stderr = open( tmp_err, 'wb' )
+			proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stderr=tmp_stderr )
+			returncode = proc.wait()
+			tmp_stderr.close()
+			# get stderr, allowing for case where it's very large
+			tmp_stderr = open( tmp_err, 'rb' )
+			stderr = ''
+			buffsize = 1048576
+			try:
+				while True:
+					stderr += tmp_stderr.read( buffsize )
+					if not stderr or len( stderr ) % buffsize != 0:
+						break
+			except OverflowError:
+				pass
+			tmp_stderr.close()
+			if stderr:
+				raise Exception, stderr
+		except Exception, e:
+			self.stop_err( 'Error in VCFFiltering:\n' + str( e ) ) 
+			
+		if True :
+			html = open(self._options.graphHTML, "w")
+			
+			os.mkdir(self._options.dirGraphs)
+			lGraphsFiles = glob.glob("VCFFiltering_graphs/*")
+			for file in lGraphsFiles :
+				baseName = os.path.basename(file)
+				shutil.move( file ,"%s/%s" %(self._options.dirGraphs, baseName))
+				line = "<img src=\"%s\" > \n" %(baseName)
+				html.write(line)
+
+
+if __name__ == "__main__":
+	iWrapper = VCFFilteringWrapper()
+	iWrapper.setAttributesFromCmdLine()
+	iWrapper.run()