Mercurial > repos > devteam > blat_mapping
diff blat_mapping.py @ 0:807e3e50845a draft default tip
Imported from capsule None
author | devteam |
---|---|
date | Mon, 19 May 2014 12:33:35 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blat_mapping.py Mon May 19 12:33:35 2014 -0400 @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +import os, sys + +assert sys.version_info[:2] >= ( 2, 4 ) + +def reverse_complement(s): + complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."} + reversed_s = [] + for i in s: + reversed_s.append(complement_dna[i]) + reversed_s.reverse() + return "".join(reversed_s) + +def __main__(): + nuc_index = {'a':0,'t':1,'c':2,'g':3,'n':4} + coverage = {} # key = (chrom, index) + invalid_lines = 0 + invalid_chrom = 0 + infile = sys.argv[1] + outfile = sys.argv[2] + + for i, line in enumerate( open( infile ) ): + line = line.rstrip('\r\n') + if not line or line.startswith('#'): + continue + fields = line.split() + if len(fields) < 21: # standard number of pslx columns + invalid_lines += 1 + continue + if not fields[0].isdigit(): + invalid_lines += 1 + continue + chrom = fields[13] + if not chrom.startswith( 'chr' ): + invalid_lines += 1 + invalid_chrom += 1 + continue + try: + block_count = int(fields[17]) + except: + invalid_lines += 1 + continue + block_size = fields[18].split(',') + chrom_start = fields[20].split(',') + + for j in range( block_count ): + try: + this_block_size = int(block_size[j]) + this_chrom_start = int(chrom_start[j]) + except: + invalid_lines += 1 + break + # brut force coverage + for k in range( this_block_size ): + cur_index = this_chrom_start + k + if coverage.has_key( ( chrom, cur_index ) ): + coverage[(chrom, cur_index)] += 1 + else: + coverage[(chrom, cur_index)] = 1 + + # generate a index file + outputfh = open(outfile, 'w') + keys = coverage.keys() + keys.sort() + previous_chrom = '' + for i in keys: + (chrom, location) = i + sum = coverage[(i)] + if chrom != previous_chrom: + outputfh.write( 'variableStep chrom=%s\n' % ( chrom ) ) + previous_chrom = chrom + outputfh.write( "%s\t%s\n" % ( location, sum ) ) + outputfh.close() + + if invalid_lines: + invalid_msg = "Skipped %d invalid lines" % invalid_lines + if invalid_chrom: + invalid_msg += ", including %d lines with chrom id errors which must begin with 'chr' to map correctly to the UCSC Genome Browser. " + +if __name__ == '__main__': __main__() \ No newline at end of file