annotate SNV/filter_snvmix.py @ 5:a4975ec34575

Uploaded
author ryanmorin
date Mon, 17 Oct 2011 14:57:09 -0400
parents 74f5ea818cea
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1 #!/usr/bin/env python
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
2
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
3 """
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
4 Filters raw SNVmix output on posterior probability (keeps SNVs with sum of pAB and pBB > 0.99)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
5 Also requires SNV to be supported by at least one 'centered' base call
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
6 Can also filter SNVs adjacent to indels (-i) and require SNVs supported by both strands (-S)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
7
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
8 usage: %prog [options]
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
9 -s, --input1=s: raw snvmix output file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
10 -o, --output1=o: filtered snvmix file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
11 -i, --max_indels=i: max number of indels in reads allowed
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
12 -S, --dual_strand=S: require dual-strand coverage
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
13
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
14 """
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
15
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
16 import os, shutil, subprocess, sys, tempfile
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
17 from galaxy import eggs
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
18 import pkg_resources; pkg_resources.require( "bx-python" )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
19 from bx.cookbook import doc_optparse
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
20
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
21 def stop_err( msg ):
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
22 sys.stderr.write( '%s\n' % msg )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
23 sys.exit()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
24
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
25 def __main__():
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
26 #Parse Command Line
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
27 options, args = doc_optparse.parse( __doc__ )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
28 tmpDir = tempfile.mkdtemp()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
29 #prepare basic filter_snvmix command
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
30 cmd = "filter_snvmix.pl "
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
31 if options.dual_strand == 'yes':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
32 cmd = cmd + " -S"
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
33 if options.max_indels:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
34 cmd = cmd + " -i " + options.max_indels + " "
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
35 cmd = cmd + '< %s > %s'
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
36 try:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
37 cmd = cmd % ( options.input1, options.output1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
38 #run command
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
39 print(cmd)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
40 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
41 tmp_stderr = open( tmp, 'wb' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
42 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
43 returncode = proc.wait()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
44 tmp_stderr.close()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
45 #did it succeed?
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
46 # get stderr, allowing for case where it's very large
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
47 tmp_stderr = open( tmp, 'rb' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
48 stderr = ''
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
49 buffsize = 1048576
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
50 try:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
51 while True:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
52 stderr += tmp_stderr.read( buffsize )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
53 if not stderr or len( stderr ) % buffsize != 0:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
54 break
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
55 except OverflowError:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
56 pass
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
57 tmp_stderr.close()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
58 if returncode != 0:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
59 raise Exception, stderr
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
60 except Exception, e:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
61 stop_err( 'Error running filter_snvmix tool\n' + str( e ) )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
62
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
63 # check that there are results in the output file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
64 if os.path.getsize( options.output1 ) > 0:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
65 sys.stdout.write( 'wrote SNVMix output' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
66 else:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
67 stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
68
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
69 if __name__ == "__main__" : __main__()