Mercurial > repos > ryanmorin > nextgen_variant_identification
comparison SNV/filter_snvmix_somatic.py @ 0:74f5ea818cea
Uploaded
author | ryanmorin |
---|---|
date | Wed, 12 Oct 2011 19:50:38 -0400 |
parents | |
children | 361d6506850a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:74f5ea818cea |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Identifies sites with strong evidence for no variant (to be used on SNVMix output from matched normal samples) | |
5 | |
6 usage: %prog [options] | |
7 -n, --input1=n: raw snvmix output file from normal | |
8 -t, --input2=t: annoated snvmix output from tumour | |
9 -o, --output1=o: sorted list of germline sites | |
10 -p, --posterior=p: threshold for posterior probability of AA | |
11 -d, --max_nonref_depth=d: maximum number of non-reference bases allowed at site | |
12 | |
13 """ | |
14 | |
15 import os, shutil, subprocess, sys, tempfile | |
16 from galaxy import eggs | |
17 import pkg_resources; pkg_resources.require( "bx-python" ) | |
18 from bx.cookbook import doc_optparse | |
19 import re | |
20 | |
21 def stop_err( msg ): | |
22 sys.stderr.write( '%s\n' % msg ) | |
23 sys.exit() | |
24 | |
25 def __main__(): | |
26 #Parse Command Line | |
27 options, args = doc_optparse.parse( __doc__ ) | |
28 input = open(options.input1, 'r') | |
29 tmpDir = tempfile.mkdtemp() | |
30 output = tempfile.NamedTemporaryFile(mode='w',delete=False) | |
31 min_p = options.posterior | |
32 max_d = int(options.max_nonref_depth) | |
33 for line in input: | |
34 cols = line.split() | |
35 #expect input to look like this: | |
36 #1:10143872 C N C:44,N:0,1.0000000000,0.0000000000,0.0000000000,1 | |
37 data = cols[3] | |
38 #print data | |
39 data_list = data.split(',') | |
40 nref_val = data_list[1] | |
41 ref_p = data_list[2] | |
42 nref_depth = int(nref_val.split(":")[1]) | |
43 #print nref_depth, 'vs', max_d | |
44 if ref_p >= min_p and nref_depth <= max_d: | |
45 #print nref_depth, '<=', max_d | |
46 has_chr = re.match("chr",cols[0]) | |
47 if not has_chr: | |
48 output.write("chr") | |
49 output.write(cols[0]) | |
50 output.write("\n") | |
51 output_file_nosort = output.name | |
52 | |
53 output.close() | |
54 cmd = 'sort -S 2000M %s | join - %s > %s' | |
55 cmd = cmd % (output_file_nosort,options.input2,options.output1) | |
56 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir) | |
57 | |
58 returncode = proc.wait() | |
59 if os.path.getsize( options.output1 ) > 0: | |
60 sys.stdout.write( 'wrote filtered output' ) | |
61 else: | |
62 stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' ) | |
63 #clean up temp files | |
64 if os.path.exists( tmpDir ): | |
65 shutil.rmtree( tmpDir ) | |
66 | |
67 if __name__ == "__main__" : __main__() |