comparison SNV/filter_snvmix_somatic.py @ 0:74f5ea818cea

Uploaded
author ryanmorin
date Wed, 12 Oct 2011 19:50:38 -0400
parents
children 361d6506850a
comparison
equal deleted inserted replaced
-1:000000000000 0:74f5ea818cea
1 #!/usr/bin/env python
2
3 """
4 Identifies sites with strong evidence for no variant (to be used on SNVMix output from matched normal samples)
5
6 usage: %prog [options]
7 -n, --input1=n: raw snvmix output file from normal
8 -t, --input2=t: annoated snvmix output from tumour
9 -o, --output1=o: sorted list of germline sites
10 -p, --posterior=p: threshold for posterior probability of AA
11 -d, --max_nonref_depth=d: maximum number of non-reference bases allowed at site
12
13 """
14
15 import os, shutil, subprocess, sys, tempfile
16 from galaxy import eggs
17 import pkg_resources; pkg_resources.require( "bx-python" )
18 from bx.cookbook import doc_optparse
19 import re
20
21 def stop_err( msg ):
22 sys.stderr.write( '%s\n' % msg )
23 sys.exit()
24
25 def __main__():
26 #Parse Command Line
27 options, args = doc_optparse.parse( __doc__ )
28 input = open(options.input1, 'r')
29 tmpDir = tempfile.mkdtemp()
30 output = tempfile.NamedTemporaryFile(mode='w',delete=False)
31 min_p = options.posterior
32 max_d = int(options.max_nonref_depth)
33 for line in input:
34 cols = line.split()
35 #expect input to look like this:
36 #1:10143872 C N C:44,N:0,1.0000000000,0.0000000000,0.0000000000,1
37 data = cols[3]
38 #print data
39 data_list = data.split(',')
40 nref_val = data_list[1]
41 ref_p = data_list[2]
42 nref_depth = int(nref_val.split(":")[1])
43 #print nref_depth, 'vs', max_d
44 if ref_p >= min_p and nref_depth <= max_d:
45 #print nref_depth, '<=', max_d
46 has_chr = re.match("chr",cols[0])
47 if not has_chr:
48 output.write("chr")
49 output.write(cols[0])
50 output.write("\n")
51 output_file_nosort = output.name
52
53 output.close()
54 cmd = 'sort -S 2000M %s | join - %s > %s'
55 cmd = cmd % (output_file_nosort,options.input2,options.output1)
56 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir)
57
58 returncode = proc.wait()
59 if os.path.getsize( options.output1 ) > 0:
60 sys.stdout.write( 'wrote filtered output' )
61 else:
62 stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )
63 #clean up temp files
64 if os.path.exists( tmpDir ):
65 shutil.rmtree( tmpDir )
66
67 if __name__ == "__main__" : __main__()