annotate SNV/filter_snvmix_somatic.py @ 5:a4975ec34575

Uploaded
author ryanmorin
date Mon, 17 Oct 2011 14:57:09 -0400
parents 74f5ea818cea
children 361d6506850a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1 #!/usr/bin/env python
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
2
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
3 """
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
4 Identifies sites with strong evidence for no variant (to be used on SNVMix output from matched normal samples)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
5
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
6 usage: %prog [options]
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
7 -n, --input1=n: raw snvmix output file from normal
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
8 -t, --input2=t: annoated snvmix output from tumour
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
9 -o, --output1=o: sorted list of germline sites
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
10 -p, --posterior=p: threshold for posterior probability of AA
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
11 -d, --max_nonref_depth=d: maximum number of non-reference bases allowed at site
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
12
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
13 """
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
14
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
15 import os, shutil, subprocess, sys, tempfile
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
16 from galaxy import eggs
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
17 import pkg_resources; pkg_resources.require( "bx-python" )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
18 from bx.cookbook import doc_optparse
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
19 import re
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
20
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
21 def stop_err( msg ):
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
22 sys.stderr.write( '%s\n' % msg )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
23 sys.exit()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
24
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
25 def __main__():
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
26 #Parse Command Line
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
27 options, args = doc_optparse.parse( __doc__ )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
28 input = open(options.input1, 'r')
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
29 tmpDir = tempfile.mkdtemp()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
30 output = tempfile.NamedTemporaryFile(mode='w',delete=False)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
31 min_p = options.posterior
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
32 max_d = int(options.max_nonref_depth)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
33 for line in input:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
34 cols = line.split()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
35 #expect input to look like this:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
36 #1:10143872 C N C:44,N:0,1.0000000000,0.0000000000,0.0000000000,1
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
37 data = cols[3]
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
38 #print data
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
39 data_list = data.split(',')
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
40 nref_val = data_list[1]
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
41 ref_p = data_list[2]
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
42 nref_depth = int(nref_val.split(":")[1])
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
43 #print nref_depth, 'vs', max_d
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
44 if ref_p >= min_p and nref_depth <= max_d:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
45 #print nref_depth, '<=', max_d
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
46 has_chr = re.match("chr",cols[0])
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
47 if not has_chr:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
48 output.write("chr")
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
49 output.write(cols[0])
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
50 output.write("\n")
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
51 output_file_nosort = output.name
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
52
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
53 output.close()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
54 cmd = 'sort -S 2000M %s | join - %s > %s'
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
55 cmd = cmd % (output_file_nosort,options.input2,options.output1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
56 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
57
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
58 returncode = proc.wait()
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
59 if os.path.getsize( options.output1 ) > 0:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
60 sys.stdout.write( 'wrote filtered output' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
61 else:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
62 stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
63 #clean up temp files
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
64 if os.path.exists( tmpDir ):
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
65 shutil.rmtree( tmpDir )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
66
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
67 if __name__ == "__main__" : __main__()