Commit message:
Imported from capsule None |
added:
blat_coverage_report.py blat_coverage_report.xml test-data/blat_coverage_report_test1.out test-data/blat_coverage_report_test1.txt |
b |
diff -r 000000000000 -r 30f0948c649c blat_coverage_report.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blat_coverage_report.py Mon May 19 12:34:01 2014 -0400 |
[ |
@@ -0,0 +1,107 @@ +#!/usr/bin/env python + +import os, sys + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def reverse_complement(s): + complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."} + reversed_s = [] + for i in s: + reversed_s.append(complement_dna[i]) + reversed_s.reverse() + return "".join(reversed_s) + +def __main__(): + nuc_index = {'a':0,'t':1,'c':2,'g':3} + diff_hash = {} # key = (chrom, index) + infile = sys.argv[1] + outfile = sys.argv[2] + invalid_lines = 0 + invalid_chars = 0 + data_id = '' + data_seq = '' + + for i, line in enumerate( open( infile ) ): + line = line.rstrip( '\r\n' ) + if not line or line.startswith( '#' ): + continue + fields = line.split() + if len(fields) != 23: # standard number of pslx columns + invalid_lines += 1 + continue + if not fields[0].isdigit(): + invalid_lines += 1 + continue + read_id = fields[9] + chrom = fields[13] + try: + block_count = int(fields[17]) + except: + invalid_lines += 1 + continue + block_size = fields[18].split(',') + read_start = fields[19].split(',') + chrom_start = fields[20].split(',') + read_seq = fields[21].split(',') + chrom_seq = fields[22].split(',') + + for j in range(block_count): + try: + this_block_size = int(block_size[j]) + this_read_start = int(read_start[j]) + this_chrom_start = int(chrom_start[j]) + except: + invalid_lines += 1 + break + this_read_seq = read_seq[j] + this_chrom_seq = chrom_seq[j] + + if not this_read_seq.isalpha(): + continue + if not this_chrom_seq.isalpha(): + continue + + # brut force to check coverage + for k in range(this_block_size): + cur_index = this_chrom_start+k + sub_a = this_read_seq[k:(k+1)].lower() + sub_b = this_chrom_seq[k:(k+1)].lower() + if not diff_hash.has_key((chrom, cur_index)): + try: + diff_hash[(chrom, cur_index)] = [0,0,0,0,sub_b.upper()] # a, t, c, g, ref. nuc. + except Exception, e: + stop_err( str( e ) ) + if sub_a in ['a','t','c','g']: + diff_hash[(chrom, cur_index)][nuc_index[(sub_a)]] += 1 + else: + invalid_chars += 1 + + outputfh = open(outfile, 'w') + outputfh.write( "##title\tlocation\tref.\tcov.\tA\tT\tC\tG\n" ) + keys = diff_hash.keys() + keys.sort() + for i in keys: + (chrom, location) = i + sum = diff_hash[ (i) ][ 0 ] + diff_hash[ ( i ) ][ 1 ] + diff_hash[ ( i ) ][ 2 ] + diff_hash[ ( i ) ][ 3 ] # did not include N's + if sum == 0: + continue + ratio_A = diff_hash[ ( i ) ][ 0 ] * 100.0 / sum + ratio_T = diff_hash[ ( i ) ][ 1 ] * 100.0 / sum + ratio_C = diff_hash[ ( i ) ][ 2 ] * 100.0 / sum + ratio_G = diff_hash[ ( i ) ][ 3 ] * 100.0 / sum + (title_head, title_tail) = os.path.split(chrom) + result = "%s\t%s\t%s\t%d\tA(%0.0f)\tT(%0.0f)\tC(%0.0f)\tG(%0.0f)\n" % ( title_tail, location, diff_hash[(i)][4], sum, ratio_A, ratio_T, ratio_C, ratio_G ) + outputfh.write(result) + outputfh.close() + + if invalid_lines: + print 'Skipped %d invalid lines. ' % ( invalid_lines ) + if invalid_chars: + print 'Skipped %d invalid characters in the alignment. ' % (invalid_chars) + +if __name__ == '__main__': __main__() \ No newline at end of file |
b |
diff -r 000000000000 -r 30f0948c649c blat_coverage_report.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blat_coverage_report.xml Mon May 19 12:34:01 2014 -0400 |
b |
@@ -0,0 +1,75 @@ +<tool id="generate_coverage_report" name="Polymorphism of the Reads" version="1.0.0"> + <description>the percentage of reads supporting each nucleotide at each location</description> + <command interpreter="python">blat_coverage_report.py $input1 $output1</command> + <inputs> + <param name="input1" type="data" format="tabular" label="Alignment result"/> + </inputs> + <outputs> + <data name="output1" format="tabular"/> + </outputs> + <tests> + <test> + <param name="input1" value="blat_coverage_report_test1.txt" ftype="tabular" /> + <output name="output1" file="blat_coverage_report_test1.out" /> + </test> + </tests> + <help> + +.. class:: warningmark + +**IMPORTANT**. Only works for BLAT **standard** or **pslx** output formats (hint: to output pslx format, add **-out=pslx** in the command). + +----- + +**What it does** + + The tool will generate a table of 6 columns as following: + +- 1st column: chromosome id. + +- 2nd column: chromosome location. + +- 3rd column: the nucleotide from reference genome at the chromosome location (2nd column). + +- 4th column: total coverage of the reads (number of reads that were mapped to the chromosome location). + +- 5th column: percentage of reads that support nucleotide **A** at this location. + +- 6th column: percentage of reads that support nucleotide **T** at this location. + +- 7th column: percentage of reads that support nucleotide **C** at this location. + +- 8th column: percentage of reads that support nucleotide **G** at this location. + + +----- + +**Example** + +- The BLAT pslx results look like the following (tab separated with sequence at the end):: + + 30 0 0 0 0 0 0 0 + seq0 30 0 30 chr 4639675 4549207 4549237 1 30, 0, 4549207, cggacagcgccgccaccaacaaagccacca, cggacagcgccgccaccaacaaagccacca, + 30 0 0 0 0 0 0 0 + seq1 30 0 30 chr 4639675 614777 614807 1 30, 0, 614777, aaaacaccggatgctccggcgctggcagat, aaaacaccggatgctccggcgctggcagat, + 28 1 0 0 0 0 0 0 + seq2 30 0 29 chr 4639675 3289283 3289312 1 29, 0, 3289283, tttgcttttagtacaccggattcagaacc, tttgctttcagtacaccggattcagaacc, + 30 0 0 0 0 0 0 0 + seq4 30 0 30 chr 4639675 2665584 2665614 1 30, 0, 2665584, cacgctacgtgcgcccccgcccagaaggcg, cacgctacgtgcgcccccgcccagaaggcg, + + The 14th column is the chromosome id, and the 16th and 17th columns shows the reads were mapped to chromosome start and end locations. + +- The report showed overall coverage of reads on each chromosome location (partial result):: + + +-------+----------+------+------+--------+------+--------+------+ + | title | location | ref. | cov. | A | T | C | G | + +-------+----------+------+------+--------+------+--------+------+ + | chr | 614777 | A | 1 | A(100) | T(0) | C(0) | G(0) | + | chr | 614778 | A | 1 | A(100) | T(0) | C(0) | G(0) | + | chr | 614779 | A | 1 | A(100) | T(0) | C(0) | G(0) | + +-------+----------+------+------+--------+------+--------+------+ + +----- + +**Reference** + + **BLAT**: Kent, W James, BLAT--the BLAST-like alignment tool. (2002) Genome Research:12(4) 656-664. + + </help> +</tool> |
b |
diff -r 000000000000 -r 30f0948c649c test-data/blat_coverage_report_test1.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blat_coverage_report_test1.out Mon May 19 12:34:01 2014 -0400 |
b |
b'@@ -0,0 +1,1908 @@\n+##title\tlocation\tref.\tcov.\tA\tT\tC\tG\n+chr\t159366\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159367\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159368\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159369\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159370\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159371\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159372\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159373\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159374\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159375\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159376\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159377\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159378\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159379\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159380\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159381\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159382\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159383\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159384\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159385\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159386\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159387\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159388\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159389\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159390\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159391\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159392\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159393\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159394\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159395\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256715\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256716\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256717\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256718\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256719\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256720\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256721\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256722\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t256723\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256724\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256725\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t256726\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256727\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256728\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256729\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256730\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256731\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t256732\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256733\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256734\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256735\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256736\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256737\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256738\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256739\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256740\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256741\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t256742\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256743\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t256744\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421739\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421740\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421741\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421742\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421743\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421744\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421745\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421746\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421747\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421748\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421749\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421750\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421751\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421752\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421753\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421754\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421755\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421756\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421757\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421758\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421759\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421760\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421761\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421762\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421763\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421764\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421765\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421766\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421767\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421768\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609915\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t609916\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t609917\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609918\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t609919\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609920\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609921\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t609922\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t609923\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t609924\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t609925\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609926\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t609927\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609928\tT\t1\tA(0)\tT(100)\tC('..b'(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549225\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549226\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4549227\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549228\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549229\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549230\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4549231\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4549232\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4549233\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549234\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4549235\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4549236\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553565\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553566\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553567\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553568\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553569\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553570\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553571\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553572\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4553573\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553574\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553575\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553576\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553577\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553578\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4553579\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553580\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553581\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553582\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553583\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553584\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4553585\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553586\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553587\tA\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553588\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4553589\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553590\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553591\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553592\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553593\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553594\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4555547\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555548\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555549\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555550\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555551\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555552\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555553\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555554\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555555\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555556\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555557\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4555558\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555559\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555560\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555561\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555562\tG\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4555563\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555564\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4555565\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555566\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555567\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555568\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555569\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555570\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555571\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555572\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4555573\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555574\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555575\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555576\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590702\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590703\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590704\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590705\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590706\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590707\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590708\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590709\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590710\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590711\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590712\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590713\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590714\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590715\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590716\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4590717\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590718\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590719\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590720\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590721\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590722\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590723\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4590724\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590725\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590726\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590727\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4590728\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590729\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590730\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590731\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n' |
b |
diff -r 000000000000 -r 30f0948c649c test-data/blat_coverage_report_test1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blat_coverage_report_test1.txt Mon May 19 12:34:01 2014 -0400 |
b |
b'@@ -0,0 +1,64 @@\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t0_0.666667\t30\t0\t30\tchr\t4639675\t4549207\t4549237\t1\t30,\t0,\t4549207,\tcggacagcgccgccaccaacaaagccacca,\tcggacagcgccgccaccaacaaagccacca,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t1_0.600000\t30\t0\t30\tchr\t4639675\t614777\t614807\t1\t30,\t0,\t614777,\taaaacaccggatgctccggcgctggcagat,\taaaacaccggatgctccggcgctggcagat,\n+28\t1\t0\t0\t0\t0\t0\t0\t+\t2_0.400000\t30\t0\t29\tchr\t4639675\t3289283\t3289312\t1\t29,\t0,\t3289283,\ttttgcttttagtacaccggattcagaacc,\ttttgctttcagtacaccggattcagaacc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t4_0.766667\t30\t0\t30\tchr\t4639675\t2665584\t2665614\t1\t30,\t0,\t2665584,\tcacgctacgtgcgcccccgcccagaaggcg,\tcacgctacgtgcgcccccgcccagaaggcg,\n+20\t0\t0\t0\t0\t0\t0\t0\t+\t5_0.533333\t30\t4\t24\tchr\t4639675\t3375780\t3375800\t1\t20,\t4,\t3375780,\taaaacccgccgaagcgggtt,\taaaacccgccgaagcgggtt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t6_0.533333\t30\t0\t30\tchr\t4639675\t965481\t965511\t1\t30,\t0,\t965481,\ttaagccgttactggcagcaagtgcaggcaa,\ttaagccgttactggcagcaagtgcaggcaa,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t7_0.400000\t30\t0\t30\tchr\t4639675\t1781311\t1781341\t1\t30,\t0,\t1781311,\ttgaatttaccgttatctatcttgcctgcct,\ttgaatttaccgttatctatcttgcctgcct,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t9_0.400000\t30\t0\t30\tchr\t4639675\t4400977\t4401007\t1\t30,\t0,\t4400977,\tgcgttttgctaaacttctgccggaatataa,\tgcgctttgctaaacttctgccggaatataa,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t11_0.533333\t30\t0\t30\tchr\t4639675\t3923551\t3923581\t1\t30,\t0,\t3923551,\tcaaaagcagagtctgttgacccatacgcgc,\tcaaaagcagagtctgttgacccatacgcgc,\n+28\t2\t0\t0\t0\t0\t0\t0\t+\t12_0.700000\t30\t0\t30\tchr\t4639675\t3304453\t3304483\t1\t30,\t0,\t3304453,\tgcggacgatcttcacggtcgccacgcggac,\tgcggacgatcttcacggtcaccacgcgggc,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t13_0.533333\t30\t0\t30\tchr\t4639675\t4555547\t4555577\t1\t30,\t0,\t4555547,\tttcttgttggatggcatactccggcagcca,\tttcttgttggatggcgtactccggcagcca,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t14_0.666667\t30\t0\t30\tchr\t4639675\t3811672\t3811702\t1\t30,\t0,\t3811672,\taccccgatatcgtcgcaggcgttgccgcac,\taccccgatatcgtcgcaggcgttgccgcac,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t16_0.566667\t30\t0\t30\tchr\t4639675\t1916143\t1916173\t1\t30,\t0,\t1916143,\tatgtcctgatcgagcggcgttttaccgacc,\tatgtcctgatcgagcggcgttttaccgatc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t18_0.566667\t30\t0\t30\tchr\t4639675\t1860631\t1860661\t1\t30,\t0,\t1860631,\tgtggtctcaagcccaaaggaagagtgaggc,\tgtggtctcaagcccaaaggaagagtgaggc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t19_0.566667\t30\t0\t30\tchr\t4639675\t1018466\t1018496\t1\t30,\t0,\t1018466,\tgttgtaagcgtcagaaccgatgcggtcggt,\tgttgtaagcgtcagaaccgatgcggtcggt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t22_0.566667\t30\t0\t30\tchr\t4639675\t2006716\t2006746\t1\t30,\t0,\t2006716,\tcggtattcctcagttctcgctgcatgcctg,\tcggtattcctcagttctcgctgcatgcctg,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t23_0.600000\t30\t0\t30\tchr\t4639675\t4516584\t4516614\t1\t30,\t0,\t4516584,\tcgctactgaaggcgtggtgcgtaacggcaa,\tcgctactgaaggcgtggtgcgtaacggcaa,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t24_0.733333\t30\t0\t30\tchr\t4639675\t1535123\t1535153\t1\t30,\t0,\t1535123,\tgaacagcagcagcgacgtggtgcgcccgcg,\tgaacagcagcagcgacgtggtgcgcccgcg,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t27_0.600000\t30\t0\t30\tchr\t4639675\t3442475\t3442505\t1\t30,\t0,\t3442475,\tttaccgaggccagaaccgataccacgaccc,\tttaccgaggccagaaccgataccacgaccc,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t29_0.700000\t30\t0\t30\tchr\t4639675\t2807120\t2807150\t1\t30,\t0,\t2807120,\tctcgtccggcgggcggttttgccgacaagg,\tctcgtccggcgggcggttttgccgataagg,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t34_0.533333\t30\t0\t30\tchr\t4639675\t421739\t421769\t1\t30,\t0,\t421739,\ttgatgttaaatgcatggcacctgccggtgc,\ttgatgttaaatgcatggcacctgccggtgc,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t35_0.466667\t30\t0\t30\tchr\t4639675\t1334399\t1334429\t1\t30,\t0,\t1334399,\tggttaacctcgaatatctcggcaaagcagt,\tggttaacctcgaatatctcggcaaagcagt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t36_0.433333\t30\t0\t30\tchr\t4639675\t2906502\t2906532\t1\t30,\t0,\t2906502,\ttgttggcaacatggcgagcgtaatcaatta,\ttgttggcaacatggcgagcgtaatcaatta,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t38_0.533333\t30\t0\t30\tchr\t4639675\t3438288\t3438318\t1\t30,\t0,\t3438288,\tgcagcaggatcggatcgaactctggtttct,\tgcagcaggatcggatcgaactctggtttct,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t39_0.400000\t30\t0\t30\tchr\t4639675\t2893756\t2893786\t1\t30,\t0,\t2893756,\ttaatgcggcattctcctgatttattgtcac,\ttattgcggcattctcctgatttattgtcac,\n+29\t0\t0\t0\t0\t0\t0\t0\t+\t41_0.566667\t30\t1\t30\tchr\t4639675\t708504\t708533\t1\t29,\t1,\t708504,\tgctgacggtcagcagggatacttcctgca,\tgctgacggtcagcagggatacttcctgca,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t43_0.466667\t30\t0\t30\tchr\t4639675\t3483475\t3483505\t1\t30,\t0,\t3483475,\tgctttttccagcatcaacgccactgaacaa,\tgctttttc'..b'\tchr\t4639675\t2724848\t2724878\t1\t30,\t0,\t2724848,\ttcacgctcgcagtcaagctggcttatgcca,\ttcacgctcgcagtcaagctggcttatgcca,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t63_0.566667\t30\t0\t30\tchr\t4639675\t4210393\t4210423\t1\t30,\t0,\t4210393,\ttggcataagccagcttgactgcgagcgtga,\ttggcataagccagcttgactgcgagcgtga,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t63_0.566667\t30\t0\t30\tchr\t4639675\t3944054\t3944084\t1\t30,\t0,\t3944054,\ttggcataagccagcttgactgcgagcgtga,\ttggcataagccagcttgactgcgagcgtga,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t64_0.500000\t30\t0\t30\tchr\t4639675\t4553565\t4553595\t1\t30,\t0,\t4553565,\tgagatgacggttgcagagtcatgcgtttga,\tgagatgacggttgcagagtcatacgtttga,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t65_0.533333\t30\t0\t30\tchr\t4639675\t3154209\t3154239\t1\t30,\t0,\t3154209,\ttctggatcacgcgcaaacactggctatcgt,\ttctggatcacgcgcaaacactggctatcgt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t66_0.466667\t30\t0\t30\tchr\t4639675\t3330950\t3330980\t1\t30,\t0,\t3330950,\ttctgctttagcaaacagagtgtggtcacga,\ttctgctttagcaaacagagtgtggtcacga,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t68_0.566667\t30\t0\t30\tchr\t4639675\t699412\t699442\t1\t30,\t0,\t699412,\tgaaggatagttggtcagcaacaccagcggc,\tgaaggatagttggtcagcaacaccagcggc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t69_0.733333\t30\t0\t30\tchr\t4639675\t3218070\t3218100\t1\t30,\t0,\t3218070,\tggcgcgctgtcggccacggcgaaatcgacc,\tggcgcgctgtcggccacggcgaaatcgacc,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t72_0.533333\t30\t0\t30\tchr\t4639675\t2811982\t2812012\t1\t30,\t0,\t2811982,\taactggaagggcttgggatgacacaacagc,\taactggaagggcttgggatgacgcaacagc,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t73_0.500000\t30\t0\t30\tchr\t4639675\t2480740\t2480770\t1\t30,\t0,\t2480740,\ttttaagcgccaaccaggcttctttggttgc,\ttttaagcgccaaccaggcttctttagttgc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t74_0.433333\t30\t0\t30\tchr\t4639675\t1904984\t1905014\t1\t30,\t0,\t1904984,\tacccttctttcgccatatcaaactgatgtc,\tacccttctttcgccatatcaaactgatgtc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t78_0.566667\t30\t0\t30\tchr\t4639675\t4535164\t4535194\t1\t30,\t0,\t4535164,\tcggagtatccgttccccaacgacaagcatc,\tcggagtatccgttccccaacgacaagcatc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t79_0.500000\t30\t0\t30\tchr\t4639675\t3598974\t3599004\t1\t30,\t0,\t3598974,\taataccgggaagagacaacggggtctcttt,\taataccgggaagagacaacggggtctcttt,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t80_0.500000\t30\t0\t30\tchr\t4639675\t1333814\t1333844\t1\t30,\t0,\t1333814,\tgagaatcagggcttcgcaaccctgtcatta,\tgagaatcagggcttcgcaaccctgtcatta,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t82_0.500000\t30\t0\t30\tchr\t4639675\t962544\t962574\t1\t30,\t0,\t962544,\tactgggttgctctgaacaagaaaggcgcta,\tactgggttgctctgaacaagaaaggcgcta,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t83_0.533333\t30\t0\t30\tchr\t4639675\t4543338\t4543368\t1\t30,\t0,\t4543338,\tcgccagggacgtatcgcgtcgatatctatt,\tcgccagggacgtatcgcgtcgatatctatt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t86_0.566667\t30\t0\t30\tchr\t4639675\t159366\t159396\t1\t30,\t0,\t159366,\tggcaacgcaggcgcatgattctgcttggaa,\tggcaacgcaggcgcatgattctgcttggaa,\n+29\t0\t0\t1\t0\t0\t0\t0\t+\t87_0.433333\t30\t0\t30\tchr\t4639675\t2137315\t2137345\t1\t30,\t0,\t2137315,\tcgctggatgaaaancgtgaatatcacacca,\tcgctggatgaaaaacgtgaatatcacacca,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t88_0.533333\t30\t0\t30\tchr\t4639675\t2263609\t2263639\t1\t30,\t0,\t2263609,\tcgtaccgggctgaaagtagaagagcgtttc,\tcgtaccgggctgaaagtagaagagcgtttc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t90_0.466667\t30\t0\t30\tchr\t4639675\t609915\t609945\t1\t30,\t0,\t609915,\tatcaccgtttcgctaaccggtacgtttaac,\tatcaccgtttcgctaaccggtacgtttaac,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t91_0.566667\t30\t0\t30\tchr\t4639675\t2506977\t2507007\t1\t30,\t0,\t2506977,\tttcgcccggcaagcttacccaacgcttatc,\tttcgcctggcaagcttacccaacgcttatc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t92_0.533333\t30\t0\t30\tchr\t4639675\t4109771\t4109801\t1\t30,\t0,\t4109771,\tttttccccgccgttagtagcgactgcagtt,\tttttccccgccgttagtagcgactgcagtt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t93_0.400000\t30\t0\t30\tchr\t4639675\t2250545\t2250575\t1\t30,\t0,\t2250545,\tttatttgcccgatgagtcagtttattgcag,\tttatttgcccgatgagtcagtttattgcag,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t94_0.466667\t30\t0\t30\tchr\t4639675\t1274949\t1274979\t1\t30,\t0,\t1274949,\tccacggtgatatctggtgccatactgataa,\tccacggtgatatctggtgccatactgataa,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t95_0.466667\t30\t0\t30\tchr\t4639675\t3279675\t3279705\t1\t30,\t0,\t3279675,\ttcccccgtaaggcctttctttttctttcgt,\ttcccccgtaaggcctttctttttctttcgt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t97_0.566667\t30\t0\t30\tchr\t4639675\t2779110\t2779140\t1\t30,\t0,\t2779110,\ttgcgccgccggattgttgctcaacatgctt,\ttgcgccgccggattgttgctcaacatgctt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t98_0.366667\t30\t0\t30\tchr\t4639675\t3729759\t3729789\t1\t30,\t0,\t3729759,\taacgcgctaaccgccaataataacaaaatt,\taacgcgctaaccgccaataataacaaaatt,\n' |