Mercurial > repos > ryanmorin > nextgen_variant_identification
diff SNV/filter_snvmix_somatic.py @ 0:74f5ea818cea
Uploaded
author | ryanmorin |
---|---|
date | Wed, 12 Oct 2011 19:50:38 -0400 |
parents | |
children | 361d6506850a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNV/filter_snvmix_somatic.py Wed Oct 12 19:50:38 2011 -0400 @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +""" +Identifies sites with strong evidence for no variant (to be used on SNVMix output from matched normal samples) + +usage: %prog [options] + -n, --input1=n: raw snvmix output file from normal + -t, --input2=t: annoated snvmix output from tumour + -o, --output1=o: sorted list of germline sites + -p, --posterior=p: threshold for posterior probability of AA + -d, --max_nonref_depth=d: maximum number of non-reference bases allowed at site + +""" + +import os, shutil, subprocess, sys, tempfile +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +from bx.cookbook import doc_optparse +import re + +def stop_err( msg ): + sys.stderr.write( '%s\n' % msg ) + sys.exit() + +def __main__(): + #Parse Command Line + options, args = doc_optparse.parse( __doc__ ) + input = open(options.input1, 'r') + tmpDir = tempfile.mkdtemp() + output = tempfile.NamedTemporaryFile(mode='w',delete=False) + min_p = options.posterior + max_d = int(options.max_nonref_depth) + for line in input: + cols = line.split() + #expect input to look like this: + #1:10143872 C N C:44,N:0,1.0000000000,0.0000000000,0.0000000000,1 + data = cols[3] + #print data + data_list = data.split(',') + nref_val = data_list[1] + ref_p = data_list[2] + nref_depth = int(nref_val.split(":")[1]) + #print nref_depth, 'vs', max_d + if ref_p >= min_p and nref_depth <= max_d: + #print nref_depth, '<=', max_d + has_chr = re.match("chr",cols[0]) + if not has_chr: + output.write("chr") + output.write(cols[0]) + output.write("\n") + output_file_nosort = output.name + + output.close() + cmd = 'sort -S 2000M %s | join - %s > %s' + cmd = cmd % (output_file_nosort,options.input2,options.output1) + proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir) + + returncode = proc.wait() + if os.path.getsize( options.output1 ) > 0: + sys.stdout.write( 'wrote filtered output' ) + else: + stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' ) + #clean up temp files + if os.path.exists( tmpDir ): + shutil.rmtree( tmpDir ) + +if __name__ == "__main__" : __main__()