comparison sam_pileup.py @ 1:e7d863c5c5d6

Update sam_pileup to use the fasta_indexes data table.
author Dave Bouvier <dave@bx.psu.edu>
date Wed, 11 Dec 2013 12:54:32 -0500
parents 95612c159681
children
comparison
equal deleted inserted replaced
0:95612c159681 1:e7d863c5c5d6
6 usage: %prog [options] 6 usage: %prog [options]
7 -p, --input1=p: bam file 7 -p, --input1=p: bam file
8 -o, --output1=o: Output pileup 8 -o, --output1=o: Output pileup
9 -R, --ref=R: Reference file type 9 -R, --ref=R: Reference file type
10 -n, --ownFile=n: User-supplied fasta reference file 10 -n, --ownFile=n: User-supplied fasta reference file
11 -d, --dbkey=d: dbkey of user-supplied file
12 -x, --indexDir=x: Index directory
13 -b, --bamIndex=b: BAM index file 11 -b, --bamIndex=b: BAM index file
12 -g, --index=g: Path of the indexed reference genome
14 -s, --lastCol=s: Print the mapping quality as the last column 13 -s, --lastCol=s: Print the mapping quality as the last column
15 -i, --indels=i: Only output lines containing indels 14 -i, --indels=i: Only output lines containing indels
16 -M, --mapCap=M: Cap mapping quality 15 -M, --mapCap=M: Cap mapping quality
17 -c, --consensus=c: Call the consensus sequence using MAQ consensu model 16 -c, --consensus=c: Call the consensus sequence using MAQ consensu model
18 -T, --theta=T: Theta paramter (error dependency coefficient) 17 -T, --theta=T: Theta paramter (error dependency coefficient)
29 28
30 def stop_err( msg ): 29 def stop_err( msg ):
31 sys.stderr.write( '%s\n' % msg ) 30 sys.stderr.write( '%s\n' % msg )
32 sys.exit() 31 sys.exit()
33 32
34 def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ):
35 seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR
36 seqPath = ''
37 for line in open( seqFile ):
38 line = line.rstrip( '\r\n' )
39 if line and not line.startswith( '#' ) and line.startswith( 'index' ):
40 fields = line.split( '\t' )
41 if len( fields ) < 3:
42 continue
43 if fields[1] == dbkey:
44 seqPath = fields[2].strip()
45 break
46 return seqPath
47
48 def __main__(): 33 def __main__():
49 #Parse Command Line 34 #Parse Command Line
50 options, args = doc_optparse.parse( __doc__ ) 35 options, args = doc_optparse.parse( __doc__ )
51 seqPath = check_seq_file( options.dbkey, options.indexDir )
52 # output version # of tool 36 # output version # of tool
53 try: 37 try:
54 tmp = tempfile.NamedTemporaryFile().name 38 tmp = tempfile.NamedTemporaryFile().name
55 tmp_stdout = open( tmp, 'wb' ) 39 tmp_stdout = open( tmp, 'wb' )
56 proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout ) 40 proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
75 tmpf0bam_name = '%s.bam' % tmpf0_name 59 tmpf0bam_name = '%s.bam' % tmpf0_name
76 tmpf0bambai_name = '%s.bam.bai' % tmpf0_name 60 tmpf0bambai_name = '%s.bam.bai' % tmpf0_name
77 tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir ) 61 tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir )
78 tmpf1_name = tmpf1.name 62 tmpf1_name = tmpf1.name
79 tmpf1.close() 63 tmpf1.close()
80 tmpf1fai_name = '%s.fai' % tmpf1_name
81 #link bam and bam index to working directory (can't move because need to leave original) 64 #link bam and bam index to working directory (can't move because need to leave original)
82 os.symlink( options.input1, tmpf0bam_name ) 65 os.symlink( options.input1, tmpf0bam_name )
83 os.symlink( options.bamIndex, tmpf0bambai_name ) 66 os.symlink( options.bamIndex, tmpf0bambai_name )
84 #get parameters for pileup command 67 #get parameters for pileup command
85 if options.lastCol == 'yes': 68 if options.lastCol == 'yes':
98 try: 81 try:
99 # have to nest try-except in try-finally to handle 2.4 82 # have to nest try-except in try-finally to handle 2.4
100 try: 83 try:
101 #index reference if necessary and prepare pileup command 84 #index reference if necessary and prepare pileup command
102 if options.ref == 'indexed': 85 if options.ref == 'indexed':
103 if not os.path.exists( "%s.fai" % seqPath ): 86 if not os.path.exists( "%s.fai" % options.index ):
104 raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey 87 raise Exception, "Indexed genome %s not present, request it by reporting this error." % options.index
105 cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 ) 88 cmd = cmd % ( opts, options.index, tmpf0bam_name, options.output1 )
106 elif options.ref == 'history': 89 elif options.ref == 'history':
107 os.symlink( options.ownFile, tmpf1_name ) 90 os.symlink( options.ownFile, tmpf1_name )
108 cmdIndex = 'samtools faidx %s' % ( tmpf1_name ) 91 cmdIndex = 'samtools faidx %s' % ( tmpf1_name )
109 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name 92 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
110 tmp_stderr = open( tmp, 'wb' ) 93 tmp_stderr = open( tmp, 'wb' )