# HG changeset patch # User jeremie # Date 1403007897 14400 # Node ID 4a739d7dc4588aa264146994cc7c267c9686b964 # Parent a630dc7775406093a818c65e2575744623a3fed2 Uploaded diff -r a630dc777540 -r 4a739d7dc458 delly.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delly.py Tue Jun 17 08:24:57 2014 -0400 @@ -0,0 +1,114 @@ +#!/usr/bin/python + +import argparse, os, shutil, subprocess, sys, tempfile, shlex, vcf + +parser = argparse.ArgumentParser(description='') +parser.add_argument ( '-b', dest='bam', help='the bam file', required=True ) +parser.add_argument ( '-t', dest='types', help='SV analysis type (DEL, DUP, INV, TRA)', nargs='+', required=True ) +parser.add_argument ( '-q', dest='map_qual', help='min. paired-end mapping quality', default='0' ) +parser.add_argument ( '-s', dest='mad_cutoff', help='insert size cutoff, median+s*MAD (deletions only)', default=5 ) +parser.add_argument ( '-g', dest='genome', help='genome fasta file' ) +parser.add_argument ( '-m', dest='min_flank', help='minimum flanking sequence size', default=13 ) +parser.add_argument ( '-e', dest='epsilon', help='allowed epsilon deviation of PE vs. SR deletion', default=0.1 ) +parser.add_argument ( '-o', dest='output', help='output file' ) + +delly="delly_v0.5.5" + +def execute( cmd, output="" ): + tmp_dir = tempfile.mkdtemp() + try: + err = open(tmp_dir+"/errorLog", 'a') + if output != "": + out = open(output, 'w') + else: + out = subprocess.PIPE + process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err ) + process.wait() + err.close() + if out != subprocess.PIPE: + out.close() + except Exception, e: + sys.stderr.write("problem doing : %s\n" %(cmd)) + sys.stderr.write( '%s\n\n' % str(e) ) + + +def delly(args, tempDir): + tempOutputs=[]; + for typ in args.types: + output=str(tempDir)+"/"+str(typ) + tempOutputs.append(output) + cmd = "%s %s -t %s -o %s -q %s -s %s -m %s -e %s " % (delly, args.bam, typ, output, args.map_qual, args.mad_cutoff, args.min_flank, args.epsilon) + if (args.genome!="" and args.genome!=None): + cmd += "-g %s" % (args.genome) + # print ("commande = "+cmd) + execute( cmd ) + return tempOutputs + + +def merge(outputs, outputFile): + template = vcf.Reader(filename=outputs[0]) + vcfWriter = vcf.Writer(open(outputFile, 'w'), template) + for output in outputs: + vcfReader = vcf.Reader(filename=output) + for record in vcfReader: + vcfWriter.write_record(record) + return 0 + + +def getVersion(program): + try: + tmp = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp, 'wb' ) + proc = subprocess.Popen( args=program, shell=True, stdout=tmp_stdout ) + tmp_stdout.close() + returncode = proc.wait() + stdout = None + for line in open( tmp_stdout.name, 'rb' ): + if line.lower().find( 'version' ) >= 0: + stdout = line.strip() + break + if stdout: + sys.stdout.write( '%s\n' % stdout ) + except: + sys.stdout.write( 'Could not determine %s version\n' % (program) ) + + +def __main__(): + + args = parser.parse_args() + + tempDir = tempfile.mkdtemp(); + getVersion(delly) + + try: + execute("samtools index " + args.bam) + except Exception, e: + print("problem while indexing bam file " + str(e)) + + try: + tempOutputs = delly(args, tempDir) + except Exception, e: + sys.stdout.write("problem while runing delly " + str(e)) + + try: + if args.output: + outputs=[] + for output in tempOutputs: + if os.path.exists(output): + if os.path.getsize(output)>0: + outputs.append(output) + if len(outputs)>0: + merge(outputs, args.output) + except Exception, e: + sys.stdout.write("problem while merging files " + str(e)) + + finally: + try: + if os.path.exists(tempDir): + shutil.rmtree(tempDir) + os.system("rm "+str(args.bam)+".bai") + except: + pass + +if __name__=="__main__": + __main__()