Mercurial > repos > ryanmorin > nextgen_variant_identification
view SNV/snvmix.py @ 7:351b3acadd17 default tip
Uploaded
author | ryanmorin |
---|---|
date | Tue, 18 Oct 2011 18:33:15 -0400 |
parents | 74f5ea818cea |
children |
line wrap: on
line source
#!/usr/bin/env python """ Runs the SNVMix2 binary on a bam input file with various options. usage: %prog [options] -i, --input1=i: bam file -o, --output1=o: Output SNVMix (raw) -d, --dbkey=d: dbkey of user-supplied file -x, --indexDir=x: index directory -t, --type=t: analysis type (e.g. mb|m|b|M|Mb|MB|SNVMix1) -q, --base=q: base qual threshold -Q, --map=Q: map qual threshold -l, --pos=l: position file -f, --full=f: Full mode (output scores for every position) -R, --keep_dups: Retain reads flagged as duplicates (not recommended!) -c, --keep_chastity: Retain reads that failed the chastity filter """ import os, shutil, subprocess, sys, tempfile from galaxy import eggs import pkg_resources; pkg_resources.require( "bx-python" ) from bx.cookbook import doc_optparse def stop_err( msg ): sys.stderr.write( '%s\n' % msg ) sys.exit() def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR seqPath = '' for line in open( seqFile ): line = line.rstrip( '\r\n' ) if line and not line.startswith( '#' ) and line.startswith( 'index' ): fields = line.split( '\t' ) if len( fields ) < 3: continue if fields[1] == dbkey: seqPath = fields[2].strip() break return seqPath def __main__(): #Parse Command Line options, args = doc_optparse.parse( __doc__ ) seqPath = check_seq_file( options.dbkey, options.indexDir ) #make temp dir tmpDir = tempfile.mkdtemp() #prepare basic SNVMix2 command cmd = 'SNVMix2 -p b -i %s -r %s -o %s -q %s -Q %s -t %s' try: # have to nest try-except in try-finally to handle 2.4 try: if not os.path.exists( "%s.fai" % seqPath ): raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey cmd = cmd % ( options.input1, seqPath, options.output1, options.base, options.map, options.type) if options.pos != "none": if os.path.isfile(options.pos): cmd = cmd + ' -l ' + options.pos if options.full == "yes": cmd = cmd + ' -f ' else: raise Exception, "position file doesn't exist" if options.keep_chastity == "yes": cmd = cmd + ' -c' if options.keep_dups == "yes": cmd = cmd + ' -R' #perform SNVMix2 command tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name tmp_stderr = open( tmp, 'wb' ) proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) returncode = proc.wait() tmp_stderr.close() #did it succeed? # get stderr, allowing for case where it's very large tmp_stderr = open( tmp, 'rb' ) stderr = '' buffsize = 1048576 try: while True: stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass tmp_stderr.close() if returncode != 0: raise Exception, stderr except Exception, e: stop_err( 'Error running SNVMix2 tool\n' + str( e ) ) finally: #clean up temp files if os.path.exists( tmpDir ): shutil.rmtree( tmpDir ) # check that there are results in the output file if os.path.getsize( options.output1 ) > 0: sys.stdout.write( 'wrote SNVMix output' ) else: stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' ) if __name__ == "__main__" : __main__()