changeset 0:807e3e50845a draft default tip

Imported from capsule None
author devteam
date Mon, 19 May 2014 12:33:35 -0400
parents
children
files blat_mapping.py blat_mapping.xml blat_mapping_example.png test-data/blat_mapping_test1.out test-data/blat_mapping_test1.txt
diffstat 5 files changed, 142 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blat_mapping.py	Mon May 19 12:33:35 2014 -0400
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+import os, sys
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def reverse_complement(s):
+    complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."}
+    reversed_s = []
+    for i in s:
+        reversed_s.append(complement_dna[i])
+    reversed_s.reverse()
+    return "".join(reversed_s)
+    
+def __main__():
+    nuc_index = {'a':0,'t':1,'c':2,'g':3,'n':4}
+    coverage = {}        # key = (chrom, index)
+    invalid_lines = 0
+    invalid_chrom = 0
+    infile = sys.argv[1]
+    outfile = sys.argv[2]
+
+    for i, line in enumerate( open( infile ) ):
+        line = line.rstrip('\r\n')
+        if not line or line.startswith('#'):
+            continue
+        fields = line.split()
+        if len(fields) < 21:                # standard number of pslx columns
+            invalid_lines += 1
+            continue 
+        if not fields[0].isdigit():
+            invalid_lines += 1
+            continue
+        chrom = fields[13]
+        if not chrom.startswith( 'chr' ):
+            invalid_lines += 1
+            invalid_chrom += 1
+            continue
+        try:
+            block_count = int(fields[17])
+        except:
+            invalid_lines += 1
+            continue
+        block_size = fields[18].split(',')
+        chrom_start = fields[20].split(',')
+
+        for j in range( block_count ):
+            try:
+                this_block_size = int(block_size[j])
+                this_chrom_start = int(chrom_start[j])
+            except:
+                invalid_lines += 1
+                break
+            # brut force coverage                
+            for k in range( this_block_size ):
+                cur_index = this_chrom_start + k
+                if coverage.has_key( ( chrom, cur_index ) ):
+                    coverage[(chrom, cur_index)] += 1
+                else:
+                    coverage[(chrom, cur_index)] = 1
+                
+    # generate a index file
+    outputfh = open(outfile, 'w')
+    keys = coverage.keys()
+    keys.sort()
+    previous_chrom = ''
+    for i in keys:
+        (chrom, location) = i
+        sum = coverage[(i)]
+        if chrom != previous_chrom:
+            outputfh.write( 'variableStep chrom=%s\n' % ( chrom ) )
+            previous_chrom = chrom
+        outputfh.write( "%s\t%s\n" % ( location, sum ) )
+    outputfh.close()
+    
+    if invalid_lines:
+        invalid_msg = "Skipped %d invalid lines" % invalid_lines
+        if invalid_chrom:
+            invalid_msg += ", including %d lines with chrom id errors which must begin with 'chr' to map correctly to the UCSC Genome Browser. "
+        
+if __name__ == '__main__': __main__()
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blat_mapping.xml	Mon May 19 12:33:35 2014 -0400
@@ -0,0 +1,42 @@
+<tool id="blat2wig" name="Coverage of the Reads" version="1.0.0">
+  <description>in wiggle format</description>
+  <command interpreter="python">blat_mapping.py $input1 $output1</command>
+  <inputs>	
+    <param name="input1" type="data" format="tabular" label="Alignment result"/>
+  </inputs>
+  <outputs>
+    <data name="output1" format="wig"/>
+  </outputs> 
+  <tests>
+    <test>
+      <param name="input1" value="blat_mapping_test1.txt" ftype="tabular" />
+      <output name="output1" file="blat_mapping_test1.out" />
+    </test>
+  </tests>
+  <help>
+
+.. class:: warningmark
+
+ To generate acceptable files, please use alignment program **BLAT** with option **-out=pslx**. 
+
+.. class:: warningmark
+
+ Please edit the database information by click on the pencil icon next to your dataset. Select the corresponding genome build.
+
+-----
+	
+**What it does**
+ 
+ This tool takes **BLAT pslx** output and returns a wig-like file showing the number of reads (coverage) mapped at each chromosome location. Use **Graph/Display Data --> Build custom track** tool to show the coverage mapping in UCSC Genome Browser.
+
+-----
+
+**Example**
+
+ Showing reads coverage on human chromosome 22 (partial result) in UCSC Genome Browser Custom Track:
+ 
+ .. image:: blat_mapping_example.png
+ 	:width: 600
+ 	
+  </help>
+</tool>
Binary file blat_mapping_example.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blat_mapping_test1.out	Mon May 19 12:33:35 2014 -0400
@@ -0,0 +1,17 @@
+variableStep chrom=chrI
+9704438	1
+9704439	1
+9704440	1
+9704441	1
+9704442	1
+9704443	1
+9704444	1
+9704445	1
+9704446	1
+9704447	1
+variableStep chrom=chrII
+9704440	1
+9704441	1
+9704442	1
+9704443	1
+9704444	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blat_mapping_test1.txt	Mon May 19 12:33:35 2014 -0400
@@ -0,0 +1,2 @@
+10 1 0 0 0 0 0 0 + seq1 10 1 10 chrI 15080483 9704438 9704448 1 10, 1, 9704438, ggtttttttt, ggtttttatt,
+5 1 0 0 0 0 0 0 + seq2 5 1 5 chrII 15080483 9704440 9704445 1 5, 1, 9704440, ttttt, tttta,