Mercurial > repos > ryanmorin > nextgen_variant_identification
diff SNV/snvmix.py @ 0:74f5ea818cea
Uploaded
author | ryanmorin |
---|---|
date | Wed, 12 Oct 2011 19:50:38 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNV/snvmix.py Wed Oct 12 19:50:38 2011 -0400 @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +""" +Runs the SNVMix2 binary on a bam input file with various options. + +usage: %prog [options] + -i, --input1=i: bam file + -o, --output1=o: Output SNVMix (raw) + -d, --dbkey=d: dbkey of user-supplied file + -x, --indexDir=x: index directory + -t, --type=t: analysis type (e.g. mb|m|b|M|Mb|MB|SNVMix1) + -q, --base=q: base qual threshold + -Q, --map=Q: map qual threshold + -l, --pos=l: position file + -f, --full=f: Full mode (output scores for every position) + -R, --keep_dups: Retain reads flagged as duplicates (not recommended!) + -c, --keep_chastity: Retain reads that failed the chastity filter +""" + +import os, shutil, subprocess, sys, tempfile +from galaxy import eggs +import pkg_resources; pkg_resources.require( "bx-python" ) +from bx.cookbook import doc_optparse + + + +def stop_err( msg ): + sys.stderr.write( '%s\n' % msg ) + sys.exit() + +def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): + seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR + seqPath = '' + for line in open( seqFile ): + line = line.rstrip( '\r\n' ) + if line and not line.startswith( '#' ) and line.startswith( 'index' ): + fields = line.split( '\t' ) + if len( fields ) < 3: + continue + if fields[1] == dbkey: + seqPath = fields[2].strip() + break + return seqPath + +def __main__(): + #Parse Command Line + options, args = doc_optparse.parse( __doc__ ) + seqPath = check_seq_file( options.dbkey, options.indexDir ) + + #make temp dir + tmpDir = tempfile.mkdtemp() + + #prepare basic SNVMix2 command + cmd = 'SNVMix2 -p b -i %s -r %s -o %s -q %s -Q %s -t %s' + try: + # have to nest try-except in try-finally to handle 2.4 + try: + if not os.path.exists( "%s.fai" % seqPath ): + raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey + cmd = cmd % ( options.input1, seqPath, options.output1, options.base, options.map, options.type) + + if options.pos != "none": + if os.path.isfile(options.pos): + cmd = cmd + ' -l ' + options.pos + if options.full == "yes": + cmd = cmd + ' -f ' + else: + raise Exception, "position file doesn't exist" + + if options.keep_chastity == "yes": + cmd = cmd + ' -c' + if options.keep_dups == "yes": + cmd = cmd + ' -R' + + #perform SNVMix2 command + tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name + tmp_stderr = open( tmp, 'wb' ) + + proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) + returncode = proc.wait() + tmp_stderr.close() + #did it succeed? + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + except Exception, e: + stop_err( 'Error running SNVMix2 tool\n' + str( e ) ) + finally: + #clean up temp files + if os.path.exists( tmpDir ): + shutil.rmtree( tmpDir ) + # check that there are results in the output file + if os.path.getsize( options.output1 ) > 0: + sys.stdout.write( 'wrote SNVMix output' ) + else: + stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' ) + +if __name__ == "__main__" : __main__()