annotate delly.py @ 7:4a739d7dc458 draft

Uploaded
author jeremie
date Tue, 17 Jun 2014 08:24:57 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
1 #!/usr/bin/python
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
2
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
3 import argparse, os, shutil, subprocess, sys, tempfile, shlex, vcf
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
4
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
5 parser = argparse.ArgumentParser(description='')
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
6 parser.add_argument ( '-b', dest='bam', help='the bam file', required=True )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
7 parser.add_argument ( '-t', dest='types', help='SV analysis type (DEL, DUP, INV, TRA)', nargs='+', required=True )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
8 parser.add_argument ( '-q', dest='map_qual', help='min. paired-end mapping quality', default='0' )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
9 parser.add_argument ( '-s', dest='mad_cutoff', help='insert size cutoff, median+s*MAD (deletions only)', default=5 )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
10 parser.add_argument ( '-g', dest='genome', help='genome fasta file' )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
11 parser.add_argument ( '-m', dest='min_flank', help='minimum flanking sequence size', default=13 )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
12 parser.add_argument ( '-e', dest='epsilon', help='allowed epsilon deviation of PE vs. SR deletion', default=0.1 )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
13 parser.add_argument ( '-o', dest='output', help='output file' )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
14
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
15 delly="delly_v0.5.5"
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
16
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
17 def execute( cmd, output="" ):
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
18 tmp_dir = tempfile.mkdtemp()
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
19 try:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
20 err = open(tmp_dir+"/errorLog", 'a')
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
21 if output != "":
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
22 out = open(output, 'w')
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
23 else:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
24 out = subprocess.PIPE
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
25 process = subprocess.Popen( args=shlex.split(cmd), stdout=out, stderr=err )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
26 process.wait()
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
27 err.close()
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
28 if out != subprocess.PIPE:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
29 out.close()
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
30 except Exception, e:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
31 sys.stderr.write("problem doing : %s\n" %(cmd))
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
32 sys.stderr.write( '%s\n\n' % str(e) )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
33
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
34
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
35 def delly(args, tempDir):
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
36 tempOutputs=[];
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
37 for typ in args.types:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
38 output=str(tempDir)+"/"+str(typ)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
39 tempOutputs.append(output)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
40 cmd = "%s %s -t %s -o %s -q %s -s %s -m %s -e %s " % (delly, args.bam, typ, output, args.map_qual, args.mad_cutoff, args.min_flank, args.epsilon)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
41 if (args.genome!="" and args.genome!=None):
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
42 cmd += "-g %s" % (args.genome)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
43 # print ("commande = "+cmd)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
44 execute( cmd )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
45 return tempOutputs
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
46
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
47
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
48 def merge(outputs, outputFile):
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
49 template = vcf.Reader(filename=outputs[0])
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
50 vcfWriter = vcf.Writer(open(outputFile, 'w'), template)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
51 for output in outputs:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
52 vcfReader = vcf.Reader(filename=output)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
53 for record in vcfReader:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
54 vcfWriter.write_record(record)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
55 return 0
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
56
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
57
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
58 def getVersion(program):
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
59 try:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
60 tmp = tempfile.NamedTemporaryFile().name
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
61 tmp_stdout = open( tmp, 'wb' )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
62 proc = subprocess.Popen( args=program, shell=True, stdout=tmp_stdout )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
63 tmp_stdout.close()
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
64 returncode = proc.wait()
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
65 stdout = None
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
66 for line in open( tmp_stdout.name, 'rb' ):
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
67 if line.lower().find( 'version' ) >= 0:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
68 stdout = line.strip()
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
69 break
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
70 if stdout:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
71 sys.stdout.write( '%s\n' % stdout )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
72 except:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
73 sys.stdout.write( 'Could not determine %s version\n' % (program) )
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
74
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
75
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
76 def __main__():
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
77
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
78 args = parser.parse_args()
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
79
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
80 tempDir = tempfile.mkdtemp();
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
81 getVersion(delly)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
82
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
83 try:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
84 execute("samtools index " + args.bam)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
85 except Exception, e:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
86 print("problem while indexing bam file " + str(e))
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
87
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
88 try:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
89 tempOutputs = delly(args, tempDir)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
90 except Exception, e:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
91 sys.stdout.write("problem while runing delly " + str(e))
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
92
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
93 try:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
94 if args.output:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
95 outputs=[]
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
96 for output in tempOutputs:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
97 if os.path.exists(output):
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
98 if os.path.getsize(output)>0:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
99 outputs.append(output)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
100 if len(outputs)>0:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
101 merge(outputs, args.output)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
102 except Exception, e:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
103 sys.stdout.write("problem while merging files " + str(e))
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
104
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
105 finally:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
106 try:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
107 if os.path.exists(tempDir):
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
108 shutil.rmtree(tempDir)
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
109 os.system("rm "+str(args.bam)+".bai")
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
110 except:
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
111 pass
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
112
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
113 if __name__=="__main__":
4a739d7dc458 Uploaded
jeremie
parents:
diff changeset
114 __main__()