view SNV/snp_filters.py @ 5:a4975ec34575

Uploaded
author ryanmorin
date Mon, 17 Oct 2011 14:57:09 -0400
parents 74f5ea818cea
children 361d6506850a
line wrap: on
line source

#!/usr/bin/env python

"""
Creates a pileup file from a bam file and a reference.

usage: %prog [options]
   -i, --input=i: raw snp call file chr:pos
   -o, --output1=o: novel snp calls in file
   -c, --output2=c: filtered novel SNPs associated with codons
   -K, --known_snps=k: known SNPs for filtering (sorted chr:pos file)
   -C, --codon=C: codon lookup file (sorted chr:pos)
   
"""

#my $cmd7 = "sort -S 2000M -k 1 $snps | join -a 1 - $known | grep -v dbS | grep -v Vent | grep -v Yor | grep -v Wats | sort -S 2000 -k 1 > $out";
#my $cmd8 = "join $codon $snps\_novel.txt > $snps\_novel." . $base . "codon";

import os, shutil, subprocess, sys, tempfile
from galaxy import eggs
import pkg_resources; pkg_resources.require( "bx-python" )
from bx.cookbook import doc_optparse

def stop_err( msg ):
    sys.stderr.write( '%s\n' % msg )
    sys.exit()

def __main__():
    #Parse Command Line
    options, args = doc_optparse.parse( __doc__ )
#    if options.known_snps == "" or options.input == "" or options.codon or "":
#        print('Error, required arguments not provided\n')
 #       return(1)
    tmpDir = tempfile.mkdtemp()
    #prepare basic filter_snvmix command
    filter_cmd = "sort -S 2G -k 1 %s | join -a 1 - %s | grep -v dbS | grep -v Vent | grep -v Yor | grep -v Wats | sort -S 2G -k 1 > %s"
    try:
        filter_cmd = filter_cmd % ( options.input, options.known_snps, options.output1 )
        #run command
        #print(filter_cmd)
        tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
        tmp_stderr = open( tmp, 'wb' )
        proc = subprocess.Popen( args=filter_cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
        returncode = proc.wait()
        tmp_stderr.close()
        #did it succeed?
        # get stderr, allowing for case where it's very large
        tmp_stderr = open( tmp, 'rb' )
        stderr = ''
        while True:
            stderr += tmp_stderr.read( )
            if not stderr:
                break
        tmp_stderr.close()
        if returncode != 0:
            raise Exception, stderr
    except Exception, e:
        stop_err( 'Error running filter command\n' + str( e ) )
    
    # check that there are results in the output file
    if os.path.getsize( options.output1 ) > 0:
        sys.stdout.write( 'wrote output1' )
    else:
        stop_err( 'The output file is empty. All SNVs might have been known or there may be an error with your input file or settings.' )

    codon_cmd = "join %s %s > %s"
    try:
        codon_cmd = codon_cmd % ( options.codon, options.output1, options.output2 )
        #run command                                                                                                                                                                                         
        #print(codon_cmd)
        tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
        tmp_stderr = open( tmp, 'wb' )
        proc = subprocess.Popen( args=codon_cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
        returncode = proc.wait()
        tmp_stderr.close()
        #did it succeed?                                                                                                                                                                                         
        # get stderr, allowing for case where it's very large                                                                                                                                                    
        tmp_stderr = open( tmp, 'rb' )
        stderr = ''
        while True:
            stderr += tmp_stderr.read()
            if not stderr:
                break
            tmp_stderr.close()
        if returncode != 0:
            raise Exception, stderr
    except Exception, e:
        stop_err( 'Error running codon command\n' + str( e ) )

    # check that there are results in the output file                                                                                                                                                            
    if os.path.getsize( options.output1 ) > 0:
        sys.stdout.write( 'wrote output2' )
    else:
        stop_err( 'The output file is empty. All SNVs might have been intronic or intergenic or there may be an error with your input file or settings.' )


    
if __name__ == "__main__" : __main__()