Mercurial > repos > devteam > blat_coverage_report
comparison blat_coverage_report.py @ 0:30f0948c649c draft default tip
Imported from capsule None
| author | devteam |
|---|---|
| date | Mon, 19 May 2014 12:34:01 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:30f0948c649c |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import os, sys | |
| 4 | |
| 5 assert sys.version_info[:2] >= ( 2, 4 ) | |
| 6 | |
| 7 def stop_err( msg ): | |
| 8 sys.stderr.write( "%s\n" % msg ) | |
| 9 sys.exit() | |
| 10 | |
| 11 def reverse_complement(s): | |
| 12 complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."} | |
| 13 reversed_s = [] | |
| 14 for i in s: | |
| 15 reversed_s.append(complement_dna[i]) | |
| 16 reversed_s.reverse() | |
| 17 return "".join(reversed_s) | |
| 18 | |
| 19 def __main__(): | |
| 20 nuc_index = {'a':0,'t':1,'c':2,'g':3} | |
| 21 diff_hash = {} # key = (chrom, index) | |
| 22 infile = sys.argv[1] | |
| 23 outfile = sys.argv[2] | |
| 24 invalid_lines = 0 | |
| 25 invalid_chars = 0 | |
| 26 data_id = '' | |
| 27 data_seq = '' | |
| 28 | |
| 29 for i, line in enumerate( open( infile ) ): | |
| 30 line = line.rstrip( '\r\n' ) | |
| 31 if not line or line.startswith( '#' ): | |
| 32 continue | |
| 33 fields = line.split() | |
| 34 if len(fields) != 23: # standard number of pslx columns | |
| 35 invalid_lines += 1 | |
| 36 continue | |
| 37 if not fields[0].isdigit(): | |
| 38 invalid_lines += 1 | |
| 39 continue | |
| 40 read_id = fields[9] | |
| 41 chrom = fields[13] | |
| 42 try: | |
| 43 block_count = int(fields[17]) | |
| 44 except: | |
| 45 invalid_lines += 1 | |
| 46 continue | |
| 47 block_size = fields[18].split(',') | |
| 48 read_start = fields[19].split(',') | |
| 49 chrom_start = fields[20].split(',') | |
| 50 read_seq = fields[21].split(',') | |
| 51 chrom_seq = fields[22].split(',') | |
| 52 | |
| 53 for j in range(block_count): | |
| 54 try: | |
| 55 this_block_size = int(block_size[j]) | |
| 56 this_read_start = int(read_start[j]) | |
| 57 this_chrom_start = int(chrom_start[j]) | |
| 58 except: | |
| 59 invalid_lines += 1 | |
| 60 break | |
| 61 this_read_seq = read_seq[j] | |
| 62 this_chrom_seq = chrom_seq[j] | |
| 63 | |
| 64 if not this_read_seq.isalpha(): | |
| 65 continue | |
| 66 if not this_chrom_seq.isalpha(): | |
| 67 continue | |
| 68 | |
| 69 # brut force to check coverage | |
| 70 for k in range(this_block_size): | |
| 71 cur_index = this_chrom_start+k | |
| 72 sub_a = this_read_seq[k:(k+1)].lower() | |
| 73 sub_b = this_chrom_seq[k:(k+1)].lower() | |
| 74 if not diff_hash.has_key((chrom, cur_index)): | |
| 75 try: | |
| 76 diff_hash[(chrom, cur_index)] = [0,0,0,0,sub_b.upper()] # a, t, c, g, ref. nuc. | |
| 77 except Exception, e: | |
| 78 stop_err( str( e ) ) | |
| 79 if sub_a in ['a','t','c','g']: | |
| 80 diff_hash[(chrom, cur_index)][nuc_index[(sub_a)]] += 1 | |
| 81 else: | |
| 82 invalid_chars += 1 | |
| 83 | |
| 84 outputfh = open(outfile, 'w') | |
| 85 outputfh.write( "##title\tlocation\tref.\tcov.\tA\tT\tC\tG\n" ) | |
| 86 keys = diff_hash.keys() | |
| 87 keys.sort() | |
| 88 for i in keys: | |
| 89 (chrom, location) = i | |
| 90 sum = diff_hash[ (i) ][ 0 ] + diff_hash[ ( i ) ][ 1 ] + diff_hash[ ( i ) ][ 2 ] + diff_hash[ ( i ) ][ 3 ] # did not include N's | |
| 91 if sum == 0: | |
| 92 continue | |
| 93 ratio_A = diff_hash[ ( i ) ][ 0 ] * 100.0 / sum | |
| 94 ratio_T = diff_hash[ ( i ) ][ 1 ] * 100.0 / sum | |
| 95 ratio_C = diff_hash[ ( i ) ][ 2 ] * 100.0 / sum | |
| 96 ratio_G = diff_hash[ ( i ) ][ 3 ] * 100.0 / sum | |
| 97 (title_head, title_tail) = os.path.split(chrom) | |
| 98 result = "%s\t%s\t%s\t%d\tA(%0.0f)\tT(%0.0f)\tC(%0.0f)\tG(%0.0f)\n" % ( title_tail, location, diff_hash[(i)][4], sum, ratio_A, ratio_T, ratio_C, ratio_G ) | |
| 99 outputfh.write(result) | |
| 100 outputfh.close() | |
| 101 | |
| 102 if invalid_lines: | |
| 103 print 'Skipped %d invalid lines. ' % ( invalid_lines ) | |
| 104 if invalid_chars: | |
| 105 print 'Skipped %d invalid characters in the alignment. ' % (invalid_chars) | |
| 106 | |
| 107 if __name__ == '__main__': __main__() |
