| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 """ | 
|  | 4 Creates a pileup file from a bam file and a reference. | 
|  | 5 | 
|  | 6 usage: %prog [options] | 
|  | 7    -p, --input1=p: bam file | 
|  | 8    -o, --output1=o: Output pileup | 
|  | 9    -R, --ref=R: Reference file type | 
|  | 10    -n, --ownFile=n: User-supplied fasta reference file | 
|  | 11    -d, --dbkey=d: dbkey of user-supplied file | 
|  | 12    -x, --indexDir=x: Index directory | 
|  | 13    -b, --bamIndex=b: BAM index file | 
|  | 14    -s, --lastCol=s: Print the mapping quality as the last column | 
|  | 15    -i, --indels=i: Only output lines containing indels | 
|  | 16    -M, --mapCap=M: Cap mapping quality | 
|  | 17    -c, --consensus=c: Call the consensus sequence using MAQ consensu model | 
|  | 18    -T, --theta=T: Theta paramter (error dependency coefficient) | 
|  | 19    -N, --hapNum=N: Number of haplotypes in sample | 
|  | 20    -r, --fraction=r: Expected fraction of differences between a pair of haplotypes | 
|  | 21    -I, --phredProb=I: Phred probability of an indel in sequencing/prep | 
|  | 22 | 
|  | 23 """ | 
|  | 24 | 
|  | 25 import os, shutil, subprocess, sys, tempfile | 
|  | 26 from galaxy import eggs | 
|  | 27 import pkg_resources; pkg_resources.require( "bx-python" ) | 
|  | 28 from bx.cookbook import doc_optparse | 
|  | 29 | 
|  | 30 def stop_err( msg ): | 
|  | 31     sys.stderr.write( '%s\n' % msg ) | 
|  | 32     sys.exit() | 
|  | 33 | 
|  | 34 def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): | 
|  | 35     seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR | 
|  | 36     seqPath = '' | 
|  | 37     for line in open( seqFile ): | 
|  | 38         line = line.rstrip( '\r\n' ) | 
|  | 39         if line and not line.startswith( '#' ) and line.startswith( 'index' ): | 
|  | 40             fields = line.split( '\t' ) | 
|  | 41             if len( fields ) < 3: | 
|  | 42                 continue | 
|  | 43             if fields[1] == dbkey: | 
|  | 44                 seqPath = fields[2].strip() | 
|  | 45                 break | 
|  | 46     return seqPath | 
|  | 47 | 
|  | 48 def __main__(): | 
|  | 49     #Parse Command Line | 
|  | 50     options, args = doc_optparse.parse( __doc__ ) | 
|  | 51     seqPath = check_seq_file( options.dbkey, options.indexDir ) | 
|  | 52     # output version # of tool | 
|  | 53     try: | 
|  | 54         tmp = tempfile.NamedTemporaryFile().name | 
|  | 55         tmp_stdout = open( tmp, 'wb' ) | 
|  | 56         proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout ) | 
|  | 57         tmp_stdout.close() | 
|  | 58         returncode = proc.wait() | 
|  | 59         stdout = None | 
|  | 60         for line in open( tmp_stdout.name, 'rb' ): | 
|  | 61             if line.lower().find( 'version' ) >= 0: | 
|  | 62                 stdout = line.strip() | 
|  | 63                 break | 
|  | 64         if stdout: | 
|  | 65             sys.stdout.write( 'Samtools %s\n' % stdout ) | 
|  | 66         else: | 
|  | 67             raise Exception | 
|  | 68     except: | 
|  | 69         sys.stdout.write( 'Could not determine Samtools version\n' ) | 
|  | 70     #prepare file names | 
|  | 71     tmpDir = tempfile.mkdtemp() | 
|  | 72     tmpf0 = tempfile.NamedTemporaryFile( dir=tmpDir ) | 
|  | 73     tmpf0_name = tmpf0.name | 
|  | 74     tmpf0.close() | 
|  | 75     tmpf0bam_name = '%s.bam' % tmpf0_name | 
|  | 76     tmpf0bambai_name = '%s.bam.bai' % tmpf0_name | 
|  | 77     tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir ) | 
|  | 78     tmpf1_name = tmpf1.name | 
|  | 79     tmpf1.close() | 
|  | 80     tmpf1fai_name = '%s.fai' % tmpf1_name | 
|  | 81     #link bam and bam index to working directory (can't move because need to leave original) | 
|  | 82     os.symlink( options.input1, tmpf0bam_name ) | 
|  | 83     os.symlink( options.bamIndex, tmpf0bambai_name ) | 
|  | 84     #get parameters for pileup command | 
|  | 85     if options.lastCol == 'yes': | 
|  | 86         lastCol = '-s' | 
|  | 87     else: | 
|  | 88         lastCol = '' | 
|  | 89     if options.indels == 'yes': | 
|  | 90         indels = '-i' | 
|  | 91     else: | 
|  | 92         indels = '' | 
|  | 93     opts = '%s %s -M %s' % ( lastCol, indels, options.mapCap ) | 
|  | 94     if options.consensus == 'yes': | 
|  | 95         opts += ' -c -T %s -N %s -r %s -I %s' % ( options.theta, options.hapNum, options.fraction, options.phredProb ) | 
|  | 96     #prepare basic pileup command | 
|  | 97     cmd = 'samtools pileup %s -f %s %s > %s' | 
|  | 98     try: | 
|  | 99         # have to nest try-except in try-finally to handle 2.4 | 
|  | 100         try: | 
|  | 101             #index reference if necessary and prepare pileup command | 
|  | 102             if options.ref == 'indexed': | 
|  | 103                 if not os.path.exists( "%s.fai" % seqPath ): | 
|  | 104                     raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey | 
|  | 105                 cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 ) | 
|  | 106             elif options.ref == 'history': | 
|  | 107                 os.symlink( options.ownFile, tmpf1_name ) | 
|  | 108                 cmdIndex = 'samtools faidx %s' % ( tmpf1_name ) | 
|  | 109                 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name | 
|  | 110                 tmp_stderr = open( tmp, 'wb' ) | 
|  | 111                 proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) | 
|  | 112                 returncode = proc.wait() | 
|  | 113                 tmp_stderr.close() | 
|  | 114                 # get stderr, allowing for case where it's very large | 
|  | 115                 tmp_stderr = open( tmp, 'rb' ) | 
|  | 116                 stderr = '' | 
|  | 117                 buffsize = 1048576 | 
|  | 118                 try: | 
|  | 119                     while True: | 
|  | 120                         stderr += tmp_stderr.read( buffsize ) | 
|  | 121                         if not stderr or len( stderr ) % buffsize != 0: | 
|  | 122                             break | 
|  | 123                 except OverflowError: | 
|  | 124                     pass | 
|  | 125                 tmp_stderr.close() | 
|  | 126                 #did index succeed? | 
|  | 127                 if returncode != 0: | 
|  | 128                     raise Exception, 'Error creating index file\n' + stderr | 
|  | 129                 cmd = cmd % ( opts, tmpf1_name, tmpf0bam_name, options.output1 ) | 
|  | 130             #perform pileup command | 
|  | 131             tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name | 
|  | 132             tmp_stderr = open( tmp, 'wb' ) | 
|  | 133             proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) | 
|  | 134             returncode = proc.wait() | 
|  | 135             tmp_stderr.close() | 
|  | 136             #did it succeed? | 
|  | 137             # get stderr, allowing for case where it's very large | 
|  | 138             tmp_stderr = open( tmp, 'rb' ) | 
|  | 139             stderr = '' | 
|  | 140             buffsize = 1048576 | 
|  | 141             try: | 
|  | 142                 while True: | 
|  | 143                     stderr += tmp_stderr.read( buffsize ) | 
|  | 144                     if not stderr or len( stderr ) % buffsize != 0: | 
|  | 145                         break | 
|  | 146             except OverflowError: | 
|  | 147                 pass | 
|  | 148             tmp_stderr.close() | 
|  | 149             if returncode != 0: | 
|  | 150                 raise Exception, stderr | 
|  | 151         except Exception, e: | 
|  | 152             stop_err( 'Error running Samtools pileup tool\n' + str( e ) ) | 
|  | 153     finally: | 
|  | 154         #clean up temp files | 
|  | 155         if os.path.exists( tmpDir ): | 
|  | 156             shutil.rmtree( tmpDir ) | 
|  | 157     # check that there are results in the output file | 
|  | 158     if os.path.getsize( options.output1 ) > 0: | 
|  | 159         sys.stdout.write( 'Converted BAM to pileup' ) | 
|  | 160     else: | 
|  | 161         stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' ) | 
|  | 162 | 
|  | 163 if __name__ == "__main__" : __main__() |