view VCFGandalfTools/VCFFiltering_wrapper.py @ 3:1fd1f727c330 draft default tip

Uploaded
author urgi-team
date Fri, 08 Apr 2016 12:07:35 -0400
parents 6bebeb76fa8d
children
line wrap: on
line source

#!/usr/bin/env python


import subprocess, tempfile, sys, os, glob, shutil, time
from optparse import OptionParser
from optparse import Option, OptionValueError
			
class VCFFilteringWrapper(object):

	def __init__(self):
		self._options = None
		
		
	def stop_err(self, msg ):
		sys.stderr.write( "%s\n" % msg )
		sys.exit()
		
		
	def setAttributesFromCmdLine(self):
		description = "VCFFiltering_wrapper"
		description += "\nWrapper for VCFFiltering ;\n VCFFiltering filters SNP on a VCF depending on depth (DP) allele number (AN), allele frequency (AF) and SNP quality.\n"
		description += "example 1 : VCFFiltering.py -f myVCF.vcf -o FilteredVCF.vcf\n"
		description += "example 2 : VCFFiltering.py -f myVCF.vcf -N 2 -F 0.87 -b bed1.bed bed2.bed -o FilteredVCF.vcf\n"
		parser = OptionParser(description = description, version = "0.2")
		parser.add_option("-f", "--vcf",	   dest = "VCFFile",   action = "store", type = "string", help = "Input VCF File name [compulsory] [format: VCF]",                                           default = "")
		parser.add_option("-o", "--output",	dest = "outFile",   action = "store", type = "string", help = "output VCF File name [compulsory] [format: VCF]",                                             default = "")
		parser.add_option("-m", "--minDP",	 dest = "minDP",	 action = "store", type = "int",	help = "minimum of depth ; if both minDP and maxDP are set, optimal DP will not be calculated ",     default = 0)
		parser.add_option("-M", "--maxDP",	 dest = "maxDP",	 action = "store", type = "int",	help = "maximum of depth ; if both minDP and maxDP are set, optimal DP will not be calculated ",     default = 0)
		parser.add_option("-N", "--AN",		dest = "AN",		action = "store", type = "int",	help = "maximum number of allele for a SNP; default = 2",                                                default = 2)
		parser.add_option("-F", "--AF",		dest = "AF",		action = "store", type = "float",  help = "minimum frequency for the alternative allele of a SNP; default = 0.9",                        default = 0.9)
		parser.add_option("-b", "--bed",	   dest = "bedFiles",  action = "append", type = "string", help = "bed files: list of coordinates to filter, multiple arguments allowed '-b file1 file2' ",  default = [])
		parser.add_option("-G", "--graphHTML", dest = "graphHTML", action = "store", type = "string", help = "name of the HTML linking to graphs ",                                                      default = "")
		parser.add_option("-d", "--dirGraphs", dest = "dirGraphs", action = "store", type = "string", help = "name of the folder containing graphs ",                                                    default = "")
		options = parser.parse_args()[0]
		self._setAttributesFromOptions(options)


	def _setAttributesFromOptions(self, options):
		self._options = options

	def run(self):
		if self._options.minDP and self._options.maxDP :
			if self._options.minDP > self._options.maxDP :
				self.stop_err( 'error in options : minDP > max DP (%s > %s)' %(self._options.minDP,self._options.maxDP)) 
					
		prg = "VCFFiltering.py -g -G 'png' "
		args = ""
		args += "-f %s" % self._options.VCFFile
		args += " "
		args += "-o %s" % self._options.outFile
		if self._options.AF :
			args += " "
			args += "-F %s" % self._options.AF
		if self._options.AN :
			args += " "
			args += "-N %s" % self._options.AN
		if self._options.minDP :
			args += " "
			args += "-m %s" % self._options.minDP
		if self._options.maxDP :
			args += " "
			args += "-M %s" % self._options.maxDP
		for bedfile in self._options.bedFiles :
			args += " "
			args += "-b %s" % bedfile
		cmd = "%s %s" %(prg, args)
		
		print cmd
		
		try:
			tmp_err = tempfile.NamedTemporaryFile().name
			tmp_stderr = open( tmp_err, 'wb' )
			proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stderr=tmp_stderr )
			returncode = proc.wait()
			tmp_stderr.close()
			# get stderr, allowing for case where it's very large
			tmp_stderr = open( tmp_err, 'rb' )
			stderr = ''
			buffsize = 1048576
			try:
				while True:
					stderr += tmp_stderr.read( buffsize )
					if not stderr or len( stderr ) % buffsize != 0:
						break
			except OverflowError:
				pass
			tmp_stderr.close()
			if stderr:
				raise Exception, stderr
		except Exception, e:
			self.stop_err( 'Error in VCFFiltering:\n' + str( e ) ) 
			
		if True :
			html = open(self._options.graphHTML, "w")
			
			os.mkdir(self._options.dirGraphs)
			lGraphsFiles = glob.glob("VCFFiltering_graphs/*")
			for file in lGraphsFiles :
				baseName = os.path.basename(file)
				shutil.move( file ,"%s/%s" %(self._options.dirGraphs, baseName))
				line = "<img src=\"%s\" > \n" %(baseName)
				html.write(line)


if __name__ == "__main__":
	iWrapper = VCFFilteringWrapper()
	iWrapper.setAttributesFromCmdLine()
	iWrapper.run()