Mercurial > repos > ryanmorin > nextgen_variant_identification
view SNV/filter_snvmix_somatic.py @ 7:351b3acadd17 default tip
Uploaded
author | ryanmorin |
---|---|
date | Tue, 18 Oct 2011 18:33:15 -0400 |
parents | 361d6506850a |
children |
line wrap: on
line source
#!/usr/bin/env python """ Identifies sites with strong evidence for no variant (to be used on SNVMix output from matched normal samples) usage: %prog [options] -n, --input1=n: raw snvmix output file from normal -t, --input2=t: annoated snvmix output from tumour -o, --output1=o: sorted list of germline sites -p, --posterior=p: threshold for posterior probability of AA -d, --max_nonref_depth=d: maximum number of non-reference bases allowed at site """ import os, shutil, subprocess, sys, tempfile from galaxy import eggs import pkg_resources; pkg_resources.require( "bx-python" ) from bx.cookbook import doc_optparse import re os.environ['LC_COLLATE'] = 'C' def stop_err( msg ): sys.stderr.write( '%s\n' % msg ) sys.exit() def __main__(): #Parse Command Line options, args = doc_optparse.parse( __doc__ ) input = open(options.input1, 'r') tmpDir = tempfile.mkdtemp() output = tempfile.NamedTemporaryFile(mode='w',delete=False) min_p = options.posterior max_d = int(options.max_nonref_depth) for line in input: cols = line.split() #expect input to look like this: #1:10143872 C N C:44,N:0,1.0000000000,0.0000000000,0.0000000000,1 data = cols[3] #print data data_list = data.split(',') nref_val = data_list[1] ref_p = data_list[2] nref_depth = int(nref_val.split(":")[1]) #print nref_depth, 'vs', max_d if ref_p >= min_p and nref_depth <= max_d: #print nref_depth, '<=', max_d has_chr = re.match("chr",cols[0]) if not has_chr: output.write("chr") output.write(cols[0]) output.write("\n") output_file_nosort = output.name output.close() cmd = 'sort -S 2000M %s | join - %s > %s' cmd = cmd % (output_file_nosort,options.input2,options.output1) proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir) returncode = proc.wait() if os.path.getsize( options.output1 ) > 0: sys.stdout.write( 'wrote filtered output' ) else: stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' ) #clean up temp files if os.path.exists( tmpDir ): shutil.rmtree( tmpDir ) if __name__ == "__main__" : __main__()