Repository 'blat_coverage_report'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/blat_coverage_report

Changeset 0:30f0948c649c (2014-05-19)
Commit message:
Imported from capsule None
added:
blat_coverage_report.py
blat_coverage_report.xml
test-data/blat_coverage_report_test1.out
test-data/blat_coverage_report_test1.txt
b
diff -r 000000000000 -r 30f0948c649c blat_coverage_report.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blat_coverage_report.py Mon May 19 12:34:01 2014 -0400
[
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+
+import os, sys
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+    sys.stderr.write( "%s\n" % msg )
+    sys.exit()
+
+def reverse_complement(s):
+    complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."}
+    reversed_s = []
+    for i in s:
+        reversed_s.append(complement_dna[i])
+    reversed_s.reverse()
+    return "".join(reversed_s)
+
+def __main__():
+    nuc_index = {'a':0,'t':1,'c':2,'g':3}
+    diff_hash = {}    # key = (chrom, index)
+    infile = sys.argv[1]
+    outfile = sys.argv[2]
+    invalid_lines = 0
+    invalid_chars = 0
+    data_id = ''
+    data_seq = ''
+
+    for i, line in enumerate( open( infile ) ):
+        line = line.rstrip( '\r\n' )
+        if not line or line.startswith( '#' ):
+            continue
+        fields = line.split()
+        if len(fields) != 23:    # standard number of pslx columns
+            invalid_lines += 1
+            continue
+        if not fields[0].isdigit():
+            invalid_lines += 1
+            continue
+        read_id = fields[9]
+        chrom = fields[13]
+        try:
+            block_count = int(fields[17])
+        except:
+            invalid_lines += 1
+            continue
+        block_size = fields[18].split(',')
+        read_start = fields[19].split(',')
+        chrom_start = fields[20].split(',')
+        read_seq = fields[21].split(',')
+        chrom_seq = fields[22].split(',')
+
+        for j in range(block_count):
+            try:
+                this_block_size = int(block_size[j])
+                this_read_start = int(read_start[j])
+                this_chrom_start = int(chrom_start[j])
+            except:
+                invalid_lines += 1
+                break
+            this_read_seq = read_seq[j]
+            this_chrom_seq = chrom_seq[j]
+            
+            if not this_read_seq.isalpha():
+                continue
+            if not this_chrom_seq.isalpha():
+                continue
+            
+            # brut force to check coverage                
+            for k in range(this_block_size):
+                cur_index = this_chrom_start+k
+                sub_a = this_read_seq[k:(k+1)].lower()
+                sub_b = this_chrom_seq[k:(k+1)].lower()
+                if not diff_hash.has_key((chrom, cur_index)):
+                    try:
+                        diff_hash[(chrom, cur_index)] = [0,0,0,0,sub_b.upper()]    # a, t, c, g, ref. nuc.
+                    except Exception, e:
+                        stop_err( str( e ) )
+                if sub_a in ['a','t','c','g']:
+                    diff_hash[(chrom, cur_index)][nuc_index[(sub_a)]] += 1
+                else:
+                    invalid_chars += 1
+                        
+    outputfh = open(outfile, 'w')
+    outputfh.write( "##title\tlocation\tref.\tcov.\tA\tT\tC\tG\n" )
+    keys = diff_hash.keys()
+    keys.sort()
+    for i in keys:
+        (chrom, location) = i
+        sum = diff_hash[ (i) ][ 0 ] + diff_hash[ ( i ) ][ 1 ] + diff_hash[ ( i ) ][ 2 ] + diff_hash[ ( i ) ][ 3 ]    # did not include N's
+        if sum == 0:
+            continue
+        ratio_A = diff_hash[ ( i ) ][ 0 ] * 100.0 / sum
+        ratio_T = diff_hash[ ( i ) ][ 1 ] * 100.0 / sum
+        ratio_C = diff_hash[ ( i ) ][ 2 ] * 100.0 / sum
+        ratio_G = diff_hash[ ( i ) ][ 3 ] * 100.0 / sum
+        (title_head, title_tail) = os.path.split(chrom)
+        result = "%s\t%s\t%s\t%d\tA(%0.0f)\tT(%0.0f)\tC(%0.0f)\tG(%0.0f)\n" % ( title_tail, location, diff_hash[(i)][4], sum, ratio_A, ratio_T, ratio_C, ratio_G ) 
+        outputfh.write(result)
+    outputfh.close()
+
+    if invalid_lines:
+        print 'Skipped %d invalid lines. ' % ( invalid_lines )
+    if invalid_chars:
+        print 'Skipped %d invalid characters in the alignment. ' % (invalid_chars)
+        
+if __name__ == '__main__': __main__()
\ No newline at end of file
b
diff -r 000000000000 -r 30f0948c649c blat_coverage_report.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/blat_coverage_report.xml Mon May 19 12:34:01 2014 -0400
b
@@ -0,0 +1,75 @@
+<tool id="generate_coverage_report" name="Polymorphism of the Reads" version="1.0.0">
+ <description>the percentage of reads supporting each nucleotide at each location</description>
+ <command interpreter="python">blat_coverage_report.py $input1 $output1</command>
+ <inputs>
+ <param name="input1" type="data" format="tabular" label="Alignment result"/>
+ </inputs>
+ <outputs>
+ <data name="output1" format="tabular"/>
+ </outputs> 
+ <tests>
+ <test>
+ <param name="input1" value="blat_coverage_report_test1.txt" ftype="tabular" />
+ <output name="output1" file="blat_coverage_report_test1.out" />
+ </test>
+ </tests>
+ <help>
+
+.. class:: warningmark
+
+**IMPORTANT**. Only works for BLAT **standard** or **pslx** output formats (hint: to output pslx format, add **-out=pslx** in the command).
+
+-----
+
+**What it does**

+ The tool will generate a table of 6 columns as following:

+- 1st column: chromosome id.
+
+- 2nd column: chromosome location.
+
+- 3rd column: the nucleotide from reference genome at the chromosome location (2nd column).
+
+- 4th column: total coverage of the reads (number of reads that were mapped to the chromosome location).
+
+- 5th column: percentage of reads that support nucleotide **A** at this location.
+
+- 6th column: percentage of reads that support nucleotide **T** at this location.
+
+- 7th column: percentage of reads that support nucleotide **C** at this location.
+
+- 8th column: percentage of reads that support nucleotide **G** at this location.


+-----
+
+**Example**
+
+- The BLAT pslx results look like the following (tab separated with sequence at the end)::
+
+ 30 0 0 0 0 0 0 0 + seq0 30 0 30 chr 4639675 4549207 4549237 1 30, 0, 4549207, cggacagcgccgccaccaacaaagccacca, cggacagcgccgccaccaacaaagccacca,
+ 30 0 0 0 0 0 0 0 + seq1 30 0 30 chr 4639675 614777 614807 1 30, 0, 614777, aaaacaccggatgctccggcgctggcagat, aaaacaccggatgctccggcgctggcagat,
+ 28 1 0 0 0 0 0 0 + seq2 30 0 29 chr 4639675 3289283 3289312 1 29, 0, 3289283, tttgcttttagtacaccggattcagaacc, tttgctttcagtacaccggattcagaacc,
+ 30 0 0 0 0 0 0 0 + seq4 30 0 30 chr 4639675 2665584 2665614 1 30, 0, 2665584, cacgctacgtgcgcccccgcccagaaggcg, cacgctacgtgcgcccccgcccagaaggcg,
+
+ The 14th column is the chromosome id, and the 16th and 17th columns shows the reads were mapped to chromosome start and end locations.
+
+- The report showed overall coverage of reads on each chromosome location (partial result)::

+   +-------+----------+------+------+--------+------+--------+------+
+   | title | location | ref. | cov. |   A    |  T   |   C    |  G   |
+   +-------+----------+------+------+--------+------+--------+------+
+   |   chr |   614777 |  A   |  1   | A(100) | T(0) | C(0) | G(0) |
+   |   chr |   614778 |  A   | 1   | A(100) | T(0) |   C(0) | G(0) |
+   |   chr |   614779 |  A   |  1   | A(100) | T(0) |   C(0) | G(0) |
+   +-------+----------+------+------+--------+------+--------+------+
+
+-----
+
+**Reference**

+ **BLAT**: Kent, W James, BLAT--the BLAST-like alignment tool. (2002) Genome Research:12(4) 656-664.
+
+ </help>
+</tool>
b
diff -r 000000000000 -r 30f0948c649c test-data/blat_coverage_report_test1.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blat_coverage_report_test1.out Mon May 19 12:34:01 2014 -0400
b
b'@@ -0,0 +1,1908 @@\n+##title\tlocation\tref.\tcov.\tA\tT\tC\tG\n+chr\t159366\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159367\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159368\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159369\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159370\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159371\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159372\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159373\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159374\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159375\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159376\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159377\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159378\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159379\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159380\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159381\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159382\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159383\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159384\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159385\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159386\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159387\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159388\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159389\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t159390\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159391\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t159392\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159393\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t159394\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t159395\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256715\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256716\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256717\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256718\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256719\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256720\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256721\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256722\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t256723\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256724\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256725\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t256726\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256727\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256728\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256729\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256730\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256731\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t256732\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256733\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256734\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256735\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256736\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256737\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256738\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256739\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t256740\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t256741\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t256742\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t256743\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t256744\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421739\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421740\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421741\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421742\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421743\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421744\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421745\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421746\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421747\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421748\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421749\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421750\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421751\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421752\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421753\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421754\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421755\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421756\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421757\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t421758\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421759\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421760\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421761\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421762\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421763\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t421764\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421765\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421766\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t421767\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t421768\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609915\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t609916\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t609917\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609918\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t609919\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609920\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609921\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t609922\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t609923\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t609924\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t609925\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609926\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t609927\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t609928\tT\t1\tA(0)\tT(100)\tC('..b'(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549225\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549226\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4549227\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549228\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549229\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549230\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4549231\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4549232\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4549233\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4549234\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4549235\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4549236\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553565\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553566\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553567\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553568\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553569\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553570\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553571\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553572\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4553573\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553574\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553575\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553576\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553577\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553578\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4553579\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553580\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553581\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553582\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553583\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553584\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4553585\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4553586\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553587\tA\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553588\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4553589\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553590\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553591\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553592\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4553593\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4553594\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4555547\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555548\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555549\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555550\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555551\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555552\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555553\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555554\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555555\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555556\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555557\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4555558\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555559\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555560\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555561\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555562\tG\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4555563\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555564\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4555565\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555566\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4555567\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555568\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555569\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555570\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555571\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555572\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4555573\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4555574\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555575\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4555576\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590702\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590703\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590704\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590705\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590706\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590707\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590708\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590709\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590710\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590711\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590712\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590713\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590714\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590715\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590716\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4590717\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590718\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590719\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590720\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590721\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590722\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590723\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4590724\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590725\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n+chr\t4590726\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590727\tC\t1\tA(0)\tT(0)\tC(100)\tG(0)\n+chr\t4590728\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590729\tA\t1\tA(100)\tT(0)\tC(0)\tG(0)\n+chr\t4590730\tT\t1\tA(0)\tT(100)\tC(0)\tG(0)\n+chr\t4590731\tG\t1\tA(0)\tT(0)\tC(0)\tG(100)\n'
b
diff -r 000000000000 -r 30f0948c649c test-data/blat_coverage_report_test1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blat_coverage_report_test1.txt Mon May 19 12:34:01 2014 -0400
b
b'@@ -0,0 +1,64 @@\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t0_0.666667\t30\t0\t30\tchr\t4639675\t4549207\t4549237\t1\t30,\t0,\t4549207,\tcggacagcgccgccaccaacaaagccacca,\tcggacagcgccgccaccaacaaagccacca,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t1_0.600000\t30\t0\t30\tchr\t4639675\t614777\t614807\t1\t30,\t0,\t614777,\taaaacaccggatgctccggcgctggcagat,\taaaacaccggatgctccggcgctggcagat,\n+28\t1\t0\t0\t0\t0\t0\t0\t+\t2_0.400000\t30\t0\t29\tchr\t4639675\t3289283\t3289312\t1\t29,\t0,\t3289283,\ttttgcttttagtacaccggattcagaacc,\ttttgctttcagtacaccggattcagaacc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t4_0.766667\t30\t0\t30\tchr\t4639675\t2665584\t2665614\t1\t30,\t0,\t2665584,\tcacgctacgtgcgcccccgcccagaaggcg,\tcacgctacgtgcgcccccgcccagaaggcg,\n+20\t0\t0\t0\t0\t0\t0\t0\t+\t5_0.533333\t30\t4\t24\tchr\t4639675\t3375780\t3375800\t1\t20,\t4,\t3375780,\taaaacccgccgaagcgggtt,\taaaacccgccgaagcgggtt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t6_0.533333\t30\t0\t30\tchr\t4639675\t965481\t965511\t1\t30,\t0,\t965481,\ttaagccgttactggcagcaagtgcaggcaa,\ttaagccgttactggcagcaagtgcaggcaa,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t7_0.400000\t30\t0\t30\tchr\t4639675\t1781311\t1781341\t1\t30,\t0,\t1781311,\ttgaatttaccgttatctatcttgcctgcct,\ttgaatttaccgttatctatcttgcctgcct,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t9_0.400000\t30\t0\t30\tchr\t4639675\t4400977\t4401007\t1\t30,\t0,\t4400977,\tgcgttttgctaaacttctgccggaatataa,\tgcgctttgctaaacttctgccggaatataa,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t11_0.533333\t30\t0\t30\tchr\t4639675\t3923551\t3923581\t1\t30,\t0,\t3923551,\tcaaaagcagagtctgttgacccatacgcgc,\tcaaaagcagagtctgttgacccatacgcgc,\n+28\t2\t0\t0\t0\t0\t0\t0\t+\t12_0.700000\t30\t0\t30\tchr\t4639675\t3304453\t3304483\t1\t30,\t0,\t3304453,\tgcggacgatcttcacggtcgccacgcggac,\tgcggacgatcttcacggtcaccacgcgggc,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t13_0.533333\t30\t0\t30\tchr\t4639675\t4555547\t4555577\t1\t30,\t0,\t4555547,\tttcttgttggatggcatactccggcagcca,\tttcttgttggatggcgtactccggcagcca,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t14_0.666667\t30\t0\t30\tchr\t4639675\t3811672\t3811702\t1\t30,\t0,\t3811672,\taccccgatatcgtcgcaggcgttgccgcac,\taccccgatatcgtcgcaggcgttgccgcac,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t16_0.566667\t30\t0\t30\tchr\t4639675\t1916143\t1916173\t1\t30,\t0,\t1916143,\tatgtcctgatcgagcggcgttttaccgacc,\tatgtcctgatcgagcggcgttttaccgatc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t18_0.566667\t30\t0\t30\tchr\t4639675\t1860631\t1860661\t1\t30,\t0,\t1860631,\tgtggtctcaagcccaaaggaagagtgaggc,\tgtggtctcaagcccaaaggaagagtgaggc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t19_0.566667\t30\t0\t30\tchr\t4639675\t1018466\t1018496\t1\t30,\t0,\t1018466,\tgttgtaagcgtcagaaccgatgcggtcggt,\tgttgtaagcgtcagaaccgatgcggtcggt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t22_0.566667\t30\t0\t30\tchr\t4639675\t2006716\t2006746\t1\t30,\t0,\t2006716,\tcggtattcctcagttctcgctgcatgcctg,\tcggtattcctcagttctcgctgcatgcctg,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t23_0.600000\t30\t0\t30\tchr\t4639675\t4516584\t4516614\t1\t30,\t0,\t4516584,\tcgctactgaaggcgtggtgcgtaacggcaa,\tcgctactgaaggcgtggtgcgtaacggcaa,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t24_0.733333\t30\t0\t30\tchr\t4639675\t1535123\t1535153\t1\t30,\t0,\t1535123,\tgaacagcagcagcgacgtggtgcgcccgcg,\tgaacagcagcagcgacgtggtgcgcccgcg,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t27_0.600000\t30\t0\t30\tchr\t4639675\t3442475\t3442505\t1\t30,\t0,\t3442475,\tttaccgaggccagaaccgataccacgaccc,\tttaccgaggccagaaccgataccacgaccc,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t29_0.700000\t30\t0\t30\tchr\t4639675\t2807120\t2807150\t1\t30,\t0,\t2807120,\tctcgtccggcgggcggttttgccgacaagg,\tctcgtccggcgggcggttttgccgataagg,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t34_0.533333\t30\t0\t30\tchr\t4639675\t421739\t421769\t1\t30,\t0,\t421739,\ttgatgttaaatgcatggcacctgccggtgc,\ttgatgttaaatgcatggcacctgccggtgc,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t35_0.466667\t30\t0\t30\tchr\t4639675\t1334399\t1334429\t1\t30,\t0,\t1334399,\tggttaacctcgaatatctcggcaaagcagt,\tggttaacctcgaatatctcggcaaagcagt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t36_0.433333\t30\t0\t30\tchr\t4639675\t2906502\t2906532\t1\t30,\t0,\t2906502,\ttgttggcaacatggcgagcgtaatcaatta,\ttgttggcaacatggcgagcgtaatcaatta,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t38_0.533333\t30\t0\t30\tchr\t4639675\t3438288\t3438318\t1\t30,\t0,\t3438288,\tgcagcaggatcggatcgaactctggtttct,\tgcagcaggatcggatcgaactctggtttct,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t39_0.400000\t30\t0\t30\tchr\t4639675\t2893756\t2893786\t1\t30,\t0,\t2893756,\ttaatgcggcattctcctgatttattgtcac,\ttattgcggcattctcctgatttattgtcac,\n+29\t0\t0\t0\t0\t0\t0\t0\t+\t41_0.566667\t30\t1\t30\tchr\t4639675\t708504\t708533\t1\t29,\t1,\t708504,\tgctgacggtcagcagggatacttcctgca,\tgctgacggtcagcagggatacttcctgca,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t43_0.466667\t30\t0\t30\tchr\t4639675\t3483475\t3483505\t1\t30,\t0,\t3483475,\tgctttttccagcatcaacgccactgaacaa,\tgctttttc'..b'\tchr\t4639675\t2724848\t2724878\t1\t30,\t0,\t2724848,\ttcacgctcgcagtcaagctggcttatgcca,\ttcacgctcgcagtcaagctggcttatgcca,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t63_0.566667\t30\t0\t30\tchr\t4639675\t4210393\t4210423\t1\t30,\t0,\t4210393,\ttggcataagccagcttgactgcgagcgtga,\ttggcataagccagcttgactgcgagcgtga,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t63_0.566667\t30\t0\t30\tchr\t4639675\t3944054\t3944084\t1\t30,\t0,\t3944054,\ttggcataagccagcttgactgcgagcgtga,\ttggcataagccagcttgactgcgagcgtga,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t64_0.500000\t30\t0\t30\tchr\t4639675\t4553565\t4553595\t1\t30,\t0,\t4553565,\tgagatgacggttgcagagtcatgcgtttga,\tgagatgacggttgcagagtcatacgtttga,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t65_0.533333\t30\t0\t30\tchr\t4639675\t3154209\t3154239\t1\t30,\t0,\t3154209,\ttctggatcacgcgcaaacactggctatcgt,\ttctggatcacgcgcaaacactggctatcgt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t66_0.466667\t30\t0\t30\tchr\t4639675\t3330950\t3330980\t1\t30,\t0,\t3330950,\ttctgctttagcaaacagagtgtggtcacga,\ttctgctttagcaaacagagtgtggtcacga,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t68_0.566667\t30\t0\t30\tchr\t4639675\t699412\t699442\t1\t30,\t0,\t699412,\tgaaggatagttggtcagcaacaccagcggc,\tgaaggatagttggtcagcaacaccagcggc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t69_0.733333\t30\t0\t30\tchr\t4639675\t3218070\t3218100\t1\t30,\t0,\t3218070,\tggcgcgctgtcggccacggcgaaatcgacc,\tggcgcgctgtcggccacggcgaaatcgacc,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t72_0.533333\t30\t0\t30\tchr\t4639675\t2811982\t2812012\t1\t30,\t0,\t2811982,\taactggaagggcttgggatgacacaacagc,\taactggaagggcttgggatgacgcaacagc,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t73_0.500000\t30\t0\t30\tchr\t4639675\t2480740\t2480770\t1\t30,\t0,\t2480740,\ttttaagcgccaaccaggcttctttggttgc,\ttttaagcgccaaccaggcttctttagttgc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t74_0.433333\t30\t0\t30\tchr\t4639675\t1904984\t1905014\t1\t30,\t0,\t1904984,\tacccttctttcgccatatcaaactgatgtc,\tacccttctttcgccatatcaaactgatgtc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t78_0.566667\t30\t0\t30\tchr\t4639675\t4535164\t4535194\t1\t30,\t0,\t4535164,\tcggagtatccgttccccaacgacaagcatc,\tcggagtatccgttccccaacgacaagcatc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t79_0.500000\t30\t0\t30\tchr\t4639675\t3598974\t3599004\t1\t30,\t0,\t3598974,\taataccgggaagagacaacggggtctcttt,\taataccgggaagagacaacggggtctcttt,\n+30\t0\t0\t0\t0\t0\t0\t0\t-\t80_0.500000\t30\t0\t30\tchr\t4639675\t1333814\t1333844\t1\t30,\t0,\t1333814,\tgagaatcagggcttcgcaaccctgtcatta,\tgagaatcagggcttcgcaaccctgtcatta,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t82_0.500000\t30\t0\t30\tchr\t4639675\t962544\t962574\t1\t30,\t0,\t962544,\tactgggttgctctgaacaagaaaggcgcta,\tactgggttgctctgaacaagaaaggcgcta,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t83_0.533333\t30\t0\t30\tchr\t4639675\t4543338\t4543368\t1\t30,\t0,\t4543338,\tcgccagggacgtatcgcgtcgatatctatt,\tcgccagggacgtatcgcgtcgatatctatt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t86_0.566667\t30\t0\t30\tchr\t4639675\t159366\t159396\t1\t30,\t0,\t159366,\tggcaacgcaggcgcatgattctgcttggaa,\tggcaacgcaggcgcatgattctgcttggaa,\n+29\t0\t0\t1\t0\t0\t0\t0\t+\t87_0.433333\t30\t0\t30\tchr\t4639675\t2137315\t2137345\t1\t30,\t0,\t2137315,\tcgctggatgaaaancgtgaatatcacacca,\tcgctggatgaaaaacgtgaatatcacacca,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t88_0.533333\t30\t0\t30\tchr\t4639675\t2263609\t2263639\t1\t30,\t0,\t2263609,\tcgtaccgggctgaaagtagaagagcgtttc,\tcgtaccgggctgaaagtagaagagcgtttc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t90_0.466667\t30\t0\t30\tchr\t4639675\t609915\t609945\t1\t30,\t0,\t609915,\tatcaccgtttcgctaaccggtacgtttaac,\tatcaccgtttcgctaaccggtacgtttaac,\n+29\t1\t0\t0\t0\t0\t0\t0\t+\t91_0.566667\t30\t0\t30\tchr\t4639675\t2506977\t2507007\t1\t30,\t0,\t2506977,\tttcgcccggcaagcttacccaacgcttatc,\tttcgcctggcaagcttacccaacgcttatc,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t92_0.533333\t30\t0\t30\tchr\t4639675\t4109771\t4109801\t1\t30,\t0,\t4109771,\tttttccccgccgttagtagcgactgcagtt,\tttttccccgccgttagtagcgactgcagtt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t93_0.400000\t30\t0\t30\tchr\t4639675\t2250545\t2250575\t1\t30,\t0,\t2250545,\tttatttgcccgatgagtcagtttattgcag,\tttatttgcccgatgagtcagtttattgcag,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t94_0.466667\t30\t0\t30\tchr\t4639675\t1274949\t1274979\t1\t30,\t0,\t1274949,\tccacggtgatatctggtgccatactgataa,\tccacggtgatatctggtgccatactgataa,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t95_0.466667\t30\t0\t30\tchr\t4639675\t3279675\t3279705\t1\t30,\t0,\t3279675,\ttcccccgtaaggcctttctttttctttcgt,\ttcccccgtaaggcctttctttttctttcgt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t97_0.566667\t30\t0\t30\tchr\t4639675\t2779110\t2779140\t1\t30,\t0,\t2779110,\ttgcgccgccggattgttgctcaacatgctt,\ttgcgccgccggattgttgctcaacatgctt,\n+30\t0\t0\t0\t0\t0\t0\t0\t+\t98_0.366667\t30\t0\t30\tchr\t4639675\t3729759\t3729789\t1\t30,\t0,\t3729759,\taacgcgctaaccgccaataataacaaaatt,\taacgcgctaaccgccaataataacaaaatt,\n'