changeset 7:4a739d7dc458 draft

Uploaded
author jeremie
date Tue, 17 Jun 2014 08:24:57 -0400
parents a630dc777540
children ab3c1999f607
files delly.py
diffstat 1 files changed, 114 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delly.py	Tue Jun 17 08:24:57 2014 -0400
@@ -0,0 +1,114 @@
+#!/usr/bin/python
+
+import argparse, os, shutil, subprocess, sys, tempfile, shlex, vcf
+
+parser = argparse.ArgumentParser(description='')
+parser.add_argument ( '-b', dest='bam', help='the bam file', required=True )	
+parser.add_argument ( '-t', dest='types', help='SV analysis type (DEL, DUP, INV, TRA)', nargs='+', required=True )
+parser.add_argument ( '-q', dest='map_qual', help='min. paired-end mapping quality', default='0' )
+parser.add_argument ( '-s', dest='mad_cutoff', help='insert size cutoff, median+s*MAD (deletions only)', default=5 )
+parser.add_argument ( '-g', dest='genome', help='genome fasta file' )
+parser.add_argument ( '-m', dest='min_flank', help='minimum flanking sequence size', default=13 )
+parser.add_argument ( '-e', dest='epsilon', help='allowed epsilon deviation of PE vs. SR deletion', default=0.1 )
+parser.add_argument ( '-o', dest='output', help='output file' )
+
+delly="delly_v0.5.5"
+
+def execute( cmd, output="" ):
+	tmp_dir = tempfile.mkdtemp()
+	try: 
+		err = open(tmp_dir+"/errorLog", 'a')
+		if output != "":
+			out = open(output, 'w')
+		else:
+			out = subprocess.PIPE
+		process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
+		process.wait()
+		err.close()
+		if out != subprocess.PIPE:
+			out.close()
+	except Exception, e:
+		sys.stderr.write("problem doing : %s\n" %(cmd))
+		sys.stderr.write( '%s\n\n' % str(e) )
+
+
+def delly(args, tempDir):
+	tempOutputs=[];
+	for typ in args.types:
+		output=str(tempDir)+"/"+str(typ)
+		tempOutputs.append(output)
+		cmd = "%s %s -t %s -o %s -q %s -s %s -m %s -e %s " % (delly, args.bam, typ, output, args.map_qual, args.mad_cutoff, args.min_flank, args.epsilon)
+		if (args.genome!="" and args.genome!=None):
+			cmd += "-g %s" % (args.genome)
+		# print ("commande = "+cmd)
+		execute( cmd )
+	return tempOutputs
+
+
+def merge(outputs, outputFile):
+	template = vcf.Reader(filename=outputs[0])
+	vcfWriter = vcf.Writer(open(outputFile, 'w'), template)
+	for output in outputs:
+		vcfReader = vcf.Reader(filename=output)
+		for record in vcfReader:
+			vcfWriter.write_record(record)
+	return 0
+
+
+def getVersion(program):
+	try:
+		tmp = tempfile.NamedTemporaryFile().name
+		tmp_stdout = open( tmp, 'wb' )
+		proc = subprocess.Popen( args=program, shell=True, stdout=tmp_stdout )
+		tmp_stdout.close()
+		returncode = proc.wait()
+		stdout = None
+		for line in open( tmp_stdout.name, 'rb' ):
+			if line.lower().find( 'version' ) >= 0:
+				stdout = line.strip()
+				break
+		if stdout:
+			sys.stdout.write( '%s\n' % stdout )
+	except:
+		sys.stdout.write( 'Could not determine %s version\n' % (program) )
+
+
+def __main__():
+	
+	args = parser.parse_args()
+
+	tempDir = tempfile.mkdtemp();
+	getVersion(delly)
+
+	try:
+		execute("samtools index " + args.bam)
+	except Exception, e:
+		print("problem while indexing bam file " + str(e))
+
+	try:
+		tempOutputs = delly(args, tempDir)
+	except Exception, e:
+		sys.stdout.write("problem while runing delly " + str(e))
+
+	try:	
+		if args.output:
+			outputs=[]
+			for output in tempOutputs:
+				if os.path.exists(output):
+					if os.path.getsize(output)>0:
+						outputs.append(output)
+			if len(outputs)>0:
+				merge(outputs, args.output)
+	except Exception, e:
+		sys.stdout.write("problem while merging files " + str(e))
+
+	finally:
+		try:
+			if os.path.exists(tempDir):
+				shutil.rmtree(tempDir)
+			os.system("rm "+str(args.bam)+".bai")
+		except:
+			pass
+
+if __name__=="__main__":
+	__main__()