0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Identifies sites with strong evidence for no variant (to be used on SNVMix output from matched normal samples)
|
|
5
|
|
6 usage: %prog [options]
|
|
7 -n, --input1=n: raw snvmix output file from normal
|
|
8 -t, --input2=t: annoated snvmix output from tumour
|
|
9 -o, --output1=o: sorted list of germline sites
|
|
10 -p, --posterior=p: threshold for posterior probability of AA
|
|
11 -d, --max_nonref_depth=d: maximum number of non-reference bases allowed at site
|
|
12
|
|
13 """
|
|
14
|
|
15 import os, shutil, subprocess, sys, tempfile
|
|
16 from galaxy import eggs
|
|
17 import pkg_resources; pkg_resources.require( "bx-python" )
|
|
18 from bx.cookbook import doc_optparse
|
|
19 import re
|
|
20
|
|
21 def stop_err( msg ):
|
|
22 sys.stderr.write( '%s\n' % msg )
|
|
23 sys.exit()
|
|
24
|
|
25 def __main__():
|
|
26 #Parse Command Line
|
|
27 options, args = doc_optparse.parse( __doc__ )
|
|
28 input = open(options.input1, 'r')
|
|
29 tmpDir = tempfile.mkdtemp()
|
|
30 output = tempfile.NamedTemporaryFile(mode='w',delete=False)
|
|
31 min_p = options.posterior
|
|
32 max_d = int(options.max_nonref_depth)
|
|
33 for line in input:
|
|
34 cols = line.split()
|
|
35 #expect input to look like this:
|
|
36 #1:10143872 C N C:44,N:0,1.0000000000,0.0000000000,0.0000000000,1
|
|
37 data = cols[3]
|
|
38 #print data
|
|
39 data_list = data.split(',')
|
|
40 nref_val = data_list[1]
|
|
41 ref_p = data_list[2]
|
|
42 nref_depth = int(nref_val.split(":")[1])
|
|
43 #print nref_depth, 'vs', max_d
|
|
44 if ref_p >= min_p and nref_depth <= max_d:
|
|
45 #print nref_depth, '<=', max_d
|
|
46 has_chr = re.match("chr",cols[0])
|
|
47 if not has_chr:
|
|
48 output.write("chr")
|
|
49 output.write(cols[0])
|
|
50 output.write("\n")
|
|
51 output_file_nosort = output.name
|
|
52
|
|
53 output.close()
|
|
54 cmd = 'sort -S 2000M %s | join - %s > %s'
|
|
55 cmd = cmd % (output_file_nosort,options.input2,options.output1)
|
|
56 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir)
|
|
57
|
|
58 returncode = proc.wait()
|
|
59 if os.path.getsize( options.output1 ) > 0:
|
|
60 sys.stdout.write( 'wrote filtered output' )
|
|
61 else:
|
|
62 stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )
|
|
63 #clean up temp files
|
|
64 if os.path.exists( tmpDir ):
|
|
65 shutil.rmtree( tmpDir )
|
|
66
|
|
67 if __name__ == "__main__" : __main__()
|