view utilities/validateBam.py @ 10:7d10b55965c9 draft default tip

planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author thondeboer
date Wed, 16 May 2018 17:02:51 -0400
parents 6e75a84e9338
children
line wrap: on
line source

#!/usr/bin/env python

import sys
import os
import gzip
from struct import unpack

BAM_EOF = ['1f', '8b', '08', '04', '00', '00', '00', '00', '00', 'ff', '06', '00', '42', '43', '02', '00', '1b', '00', '03', '00', '00', '00', '00', '00', '00', '00', '00', '00']

def getBytes(fmt,amt):
	if fmt == '<i' or fmt == '<I':
		mySize = 4
	elif fmt == '<c' or fmt == '<b' or fmt == '<B':
		mySize = 1
	else:
		print '\nError, unknown format:',fmt,'\n'
		exit(1)
	if amt == 1:
		fread = f.read(mySize)
		if not fread:
			return None
		return unpack(fmt,fread)[0]
	else:
		fread = f.read(mySize*amt)
		if not fread:
			return None
		return unpack(fmt,fread)

# check eof
IN_BAM = sys.argv[1]
f = open(IN_BAM,'rb')
f.seek(os.path.getsize(IN_BAM)-28)
EOF = [format(ord(n),'02x') for n in f.read()]
print 'EOF_MARKER:  ', ' '.join(EOF)
if EOF != BAM_EOF:
	print '\nWARNING: BAM EOF DOES NOT MATCH EXPECTED STRING.\n'
f.close()

# check other stuff
f = gzip.open(IN_BAM,'rb')

print 'MAGIC STRING:', f.read(4)
l_text = getBytes('<i',1)
print 'l_text:      ', l_text
print 'text:      \n', f.read(l_text)
n_ref = getBytes('<i',1)
print 'n_ref:       ', n_ref

for i in xrange(n_ref):
	l_name = getBytes('<i',1)
	print 'ref'+str(i)+' - l_name:', l_name
	print 'ref'+str(i)+' - name:  ', f.read(l_name)
	print 'ref'+str(i)+' - l_ref: ', getBytes('<i',1)

print '\nEXAMINING ALIGNMENT DATA:\n'
aln_N = 0
while True:
	aln_N += 1
	blockSize = getBytes('<i',1)
	if blockSize == None:
		break
	print '['+str(aln_N)+']:', 'blockSize:', blockSize
	print '-- refID:', getBytes('<i',1)
	print '-- pos:  ', getBytes('<i',1)
	bmqnl = getBytes('<I',1)
	binv  = (bmqnl>>16)&65535
	mapq  = (bmqnl>>8)&255
	lrn   = bmqnl&255
	print '-- bmqnl:', bmqnl, '(bin='+str(binv)+', mapq='+str(mapq)+', l_readname+1='+str(lrn)+')'
	flgnc = getBytes('<I',1)
	flag  = (flgnc>>16)&65535
	ncig  = flgnc&65535
	print '-- flgnc:', flgnc, '(flag='+str(flag)+', ncig='+str(ncig)+')'
	print '-- l_seq:', getBytes('<i',1)
	print '-- nxtID:', getBytes('<i',1)
	print '-- nxtPo:', getBytes('<i',1)
	print '-- tlen: ', getBytes('<i',1)
	print '-- rname:', str([f.read(lrn)])[1:-1]

	f.read(blockSize-32-lrn)
	#print [block]


f.close()