view SNV/filter_snvmix.py @ 5:a4975ec34575

Uploaded
author ryanmorin
date Mon, 17 Oct 2011 14:57:09 -0400
parents 74f5ea818cea
children
line wrap: on
line source

#!/usr/bin/env python

"""
Filters raw SNVmix output on posterior probability (keeps SNVs with sum of pAB and pBB > 0.99)
Also requires SNV to be supported by at least one 'centered' base call
Can also filter SNVs adjacent to indels (-i) and require SNVs supported by both strands (-S)

usage: %prog [options]
   -s, --input1=s: raw snvmix output file
   -o, --output1=o: filtered snvmix file
   -i, --max_indels=i: max number of indels in reads allowed
   -S, --dual_strand=S: require dual-strand coverage
   
"""

import os, shutil, subprocess, sys, tempfile
from galaxy import eggs
import pkg_resources; pkg_resources.require( "bx-python" )
from bx.cookbook import doc_optparse

def stop_err( msg ):
    sys.stderr.write( '%s\n' % msg )
    sys.exit()

def __main__():
    #Parse Command Line
    options, args = doc_optparse.parse( __doc__ )
    tmpDir = tempfile.mkdtemp()
    #prepare basic filter_snvmix command
    cmd = "filter_snvmix.pl "
    if options.dual_strand == 'yes':
        cmd = cmd + " -S"
    if options.max_indels:
        cmd = cmd + " -i " + options.max_indels + " "
    cmd = cmd + '< %s > %s'
    try:
        cmd = cmd % ( options.input1, options.output1)
        #run command
        print(cmd)
        tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
        tmp_stderr = open( tmp, 'wb' )
        proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
        returncode = proc.wait()
        tmp_stderr.close()
        #did it succeed?
        # get stderr, allowing for case where it's very large
        tmp_stderr = open( tmp, 'rb' )
        stderr = ''
        buffsize = 1048576
        try:
            while True:
                stderr += tmp_stderr.read( buffsize )
                if not stderr or len( stderr ) % buffsize != 0:
                    break
        except OverflowError:
            pass
        tmp_stderr.close()
        if returncode != 0:
            raise Exception, stderr
    except Exception, e:
        stop_err( 'Error running filter_snvmix tool\n' + str( e ) )
    
    # check that there are results in the output file
    if os.path.getsize( options.output1 ) > 0:
        sys.stdout.write( 'wrote SNVMix output' )
    else:
        stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )

if __name__ == "__main__" : __main__()