Mercurial > repos > xuebing > sharplabtool
comparison tools/samtools/sam_pileup.py @ 0:9071e359b9a3
Uploaded
| author | xuebing |
|---|---|
| date | Fri, 09 Mar 2012 19:37:19 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:9071e359b9a3 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 Creates a pileup file from a bam file and a reference. | |
| 5 | |
| 6 usage: %prog [options] | |
| 7 -p, --input1=p: bam file | |
| 8 -o, --output1=o: Output pileup | |
| 9 -R, --ref=R: Reference file type | |
| 10 -n, --ownFile=n: User-supplied fasta reference file | |
| 11 -d, --dbkey=d: dbkey of user-supplied file | |
| 12 -x, --indexDir=x: Index directory | |
| 13 -b, --bamIndex=b: BAM index file | |
| 14 -s, --lastCol=s: Print the mapping quality as the last column | |
| 15 -i, --indels=i: Only output lines containing indels | |
| 16 -M, --mapCap=M: Cap mapping quality | |
| 17 -c, --consensus=c: Call the consensus sequence using MAQ consensu model | |
| 18 -T, --theta=T: Theta paramter (error dependency coefficient) | |
| 19 -N, --hapNum=N: Number of haplotypes in sample | |
| 20 -r, --fraction=r: Expected fraction of differences between a pair of haplotypes | |
| 21 -I, --phredProb=I: Phred probability of an indel in sequencing/prep | |
| 22 | |
| 23 """ | |
| 24 | |
| 25 import os, shutil, subprocess, sys, tempfile | |
| 26 from galaxy import eggs | |
| 27 import pkg_resources; pkg_resources.require( "bx-python" ) | |
| 28 from bx.cookbook import doc_optparse | |
| 29 | |
| 30 def stop_err( msg ): | |
| 31 sys.stderr.write( '%s\n' % msg ) | |
| 32 sys.exit() | |
| 33 | |
| 34 def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): | |
| 35 seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR | |
| 36 seqPath = '' | |
| 37 for line in open( seqFile ): | |
| 38 line = line.rstrip( '\r\n' ) | |
| 39 if line and not line.startswith( '#' ) and line.startswith( 'index' ): | |
| 40 fields = line.split( '\t' ) | |
| 41 if len( fields ) < 3: | |
| 42 continue | |
| 43 if fields[1] == dbkey: | |
| 44 seqPath = fields[2].strip() | |
| 45 break | |
| 46 return seqPath | |
| 47 | |
| 48 def __main__(): | |
| 49 #Parse Command Line | |
| 50 options, args = doc_optparse.parse( __doc__ ) | |
| 51 seqPath = check_seq_file( options.dbkey, options.indexDir ) | |
| 52 # output version # of tool | |
| 53 try: | |
| 54 tmp = tempfile.NamedTemporaryFile().name | |
| 55 tmp_stdout = open( tmp, 'wb' ) | |
| 56 proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout ) | |
| 57 tmp_stdout.close() | |
| 58 returncode = proc.wait() | |
| 59 stdout = None | |
| 60 for line in open( tmp_stdout.name, 'rb' ): | |
| 61 if line.lower().find( 'version' ) >= 0: | |
| 62 stdout = line.strip() | |
| 63 break | |
| 64 if stdout: | |
| 65 sys.stdout.write( 'Samtools %s\n' % stdout ) | |
| 66 else: | |
| 67 raise Exception | |
| 68 except: | |
| 69 sys.stdout.write( 'Could not determine Samtools version\n' ) | |
| 70 #prepare file names | |
| 71 tmpDir = tempfile.mkdtemp() | |
| 72 tmpf0 = tempfile.NamedTemporaryFile( dir=tmpDir ) | |
| 73 tmpf0_name = tmpf0.name | |
| 74 tmpf0.close() | |
| 75 tmpf0bam_name = '%s.bam' % tmpf0_name | |
| 76 tmpf0bambai_name = '%s.bam.bai' % tmpf0_name | |
| 77 tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir ) | |
| 78 tmpf1_name = tmpf1.name | |
| 79 tmpf1.close() | |
| 80 tmpf1fai_name = '%s.fai' % tmpf1_name | |
| 81 #link bam and bam index to working directory (can't move because need to leave original) | |
| 82 os.symlink( options.input1, tmpf0bam_name ) | |
| 83 os.symlink( options.bamIndex, tmpf0bambai_name ) | |
| 84 #get parameters for pileup command | |
| 85 if options.lastCol == 'yes': | |
| 86 lastCol = '-s' | |
| 87 else: | |
| 88 lastCol = '' | |
| 89 if options.indels == 'yes': | |
| 90 indels = '-i' | |
| 91 else: | |
| 92 indels = '' | |
| 93 opts = '%s %s -M %s' % ( lastCol, indels, options.mapCap ) | |
| 94 if options.consensus == 'yes': | |
| 95 opts += ' -c -T %s -N %s -r %s -I %s' % ( options.theta, options.hapNum, options.fraction, options.phredProb ) | |
| 96 #prepare basic pileup command | |
| 97 cmd = 'samtools pileup %s -f %s %s > %s' | |
| 98 try: | |
| 99 # have to nest try-except in try-finally to handle 2.4 | |
| 100 try: | |
| 101 #index reference if necessary and prepare pileup command | |
| 102 if options.ref == 'indexed': | |
| 103 if not os.path.exists( "%s.fai" % seqPath ): | |
| 104 raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey | |
| 105 cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 ) | |
| 106 elif options.ref == 'history': | |
| 107 os.symlink( options.ownFile, tmpf1_name ) | |
| 108 cmdIndex = 'samtools faidx %s' % ( tmpf1_name ) | |
| 109 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name | |
| 110 tmp_stderr = open( tmp, 'wb' ) | |
| 111 proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) | |
| 112 returncode = proc.wait() | |
| 113 tmp_stderr.close() | |
| 114 # get stderr, allowing for case where it's very large | |
| 115 tmp_stderr = open( tmp, 'rb' ) | |
| 116 stderr = '' | |
| 117 buffsize = 1048576 | |
| 118 try: | |
| 119 while True: | |
| 120 stderr += tmp_stderr.read( buffsize ) | |
| 121 if not stderr or len( stderr ) % buffsize != 0: | |
| 122 break | |
| 123 except OverflowError: | |
| 124 pass | |
| 125 tmp_stderr.close() | |
| 126 #did index succeed? | |
| 127 if returncode != 0: | |
| 128 raise Exception, 'Error creating index file\n' + stderr | |
| 129 cmd = cmd % ( opts, tmpf1_name, tmpf0bam_name, options.output1 ) | |
| 130 #perform pileup command | |
| 131 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name | |
| 132 tmp_stderr = open( tmp, 'wb' ) | |
| 133 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() ) | |
| 134 returncode = proc.wait() | |
| 135 tmp_stderr.close() | |
| 136 #did it succeed? | |
| 137 # get stderr, allowing for case where it's very large | |
| 138 tmp_stderr = open( tmp, 'rb' ) | |
| 139 stderr = '' | |
| 140 buffsize = 1048576 | |
| 141 try: | |
| 142 while True: | |
| 143 stderr += tmp_stderr.read( buffsize ) | |
| 144 if not stderr or len( stderr ) % buffsize != 0: | |
| 145 break | |
| 146 except OverflowError: | |
| 147 pass | |
| 148 tmp_stderr.close() | |
| 149 if returncode != 0: | |
| 150 raise Exception, stderr | |
| 151 except Exception, e: | |
| 152 stop_err( 'Error running Samtools pileup tool\n' + str( e ) ) | |
| 153 finally: | |
| 154 #clean up temp files | |
| 155 if os.path.exists( tmpDir ): | |
| 156 shutil.rmtree( tmpDir ) | |
| 157 # check that there are results in the output file | |
| 158 if os.path.getsize( options.output1 ) > 0: | |
| 159 sys.stdout.write( 'Converted BAM to pileup' ) | |
| 160 else: | |
| 161 stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' ) | |
| 162 | |
| 163 if __name__ == "__main__" : __main__() |
