diff SNV/snvmix.py @ 0:74f5ea818cea

Uploaded
author ryanmorin
date Wed, 12 Oct 2011 19:50:38 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SNV/snvmix.py	Wed Oct 12 19:50:38 2011 -0400
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+
+"""
+Runs the SNVMix2 binary on a bam input file with various options.
+
+usage: %prog [options]
+   -i, --input1=i: bam file
+   -o, --output1=o: Output SNVMix (raw)
+   -d, --dbkey=d: dbkey of user-supplied file
+   -x, --indexDir=x: index directory
+   -t, --type=t: analysis type (e.g. mb|m|b|M|Mb|MB|SNVMix1)
+   -q, --base=q: base qual threshold
+   -Q, --map=Q: map qual threshold
+   -l, --pos=l: position file
+   -f, --full=f: Full mode (output scores for every position)
+   -R, --keep_dups: Retain reads flagged as duplicates (not recommended!)
+   -c, --keep_chastity: Retain reads that failed the chastity filter
+"""
+
+import os, shutil, subprocess, sys, tempfile
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "bx-python" )
+from bx.cookbook import doc_optparse
+
+
+
+def stop_err( msg ):
+    sys.stderr.write( '%s\n' % msg )
+    sys.exit()
+
+def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ):
+    seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR
+    seqPath = ''
+    for line in open( seqFile ):
+        line = line.rstrip( '\r\n' )
+        if line and not line.startswith( '#' ) and line.startswith( 'index' ):
+            fields = line.split( '\t' )
+            if len( fields ) < 3:
+                continue
+            if fields[1] == dbkey:
+                seqPath = fields[2].strip()
+                break
+    return seqPath
+
+def __main__():
+    #Parse Command Line
+    options, args = doc_optparse.parse( __doc__ )
+    seqPath = check_seq_file( options.dbkey, options.indexDir )
+
+    #make temp dir
+    tmpDir = tempfile.mkdtemp()
+    
+    #prepare basic SNVMix2 command
+    cmd = 'SNVMix2 -p b -i %s -r %s -o %s -q %s -Q %s -t %s'
+    try:
+        # have to nest try-except in try-finally to handle 2.4
+        try:
+            if not os.path.exists( "%s.fai" % seqPath ):
+                raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey
+            cmd = cmd % ( options.input1, seqPath, options.output1, options.base, options.map, options.type)
+            
+            if options.pos != "none":
+                if os.path.isfile(options.pos):
+                    cmd = cmd + ' -l ' + options.pos
+                    if options.full == "yes":
+                        cmd = cmd + ' -f '
+                else:
+                    raise Exception, "position file doesn't exist"
+
+            if options.keep_chastity == "yes":
+                cmd = cmd + ' -c'
+            if options.keep_dups == "yes":
+                cmd = cmd + ' -R'
+
+            #perform SNVMix2 command
+            tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
+            tmp_stderr = open( tmp, 'wb' )
+
+            proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
+            returncode = proc.wait()
+            tmp_stderr.close()
+            #did it succeed?
+            # get stderr, allowing for case where it's very large
+            tmp_stderr = open( tmp, 'rb' )
+            stderr = ''
+            buffsize = 1048576
+            try:
+                while True:
+                    stderr += tmp_stderr.read( buffsize )
+                    if not stderr or len( stderr ) % buffsize != 0:
+                        break
+            except OverflowError:
+                pass
+            tmp_stderr.close()
+            if returncode != 0:
+                raise Exception, stderr
+        except Exception, e:
+            stop_err( 'Error running SNVMix2 tool\n' + str( e ) )
+    finally:
+        #clean up temp files
+        if os.path.exists( tmpDir ):
+            shutil.rmtree( tmpDir )
+    # check that there are results in the output file
+    if os.path.getsize( options.output1 ) > 0:
+        sys.stdout.write( 'wrote SNVMix output' )
+    else:
+        stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )
+
+if __name__ == "__main__" : __main__()