Mercurial > repos > devteam > featurecounter
comparison featureCounter.py @ 0:ac6218e2b686 draft default tip
Imported from capsule None
| author | devteam |
|---|---|
| date | Tue, 01 Apr 2014 10:51:34 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ac6218e2b686 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 #Guruprasad Ananda | |
| 3 """ | |
| 4 Calculate count and coverage of one query on another, and append the Coverage and counts to | |
| 5 the last four columns as bases covered, percent coverage, number of completely present features, number of partially present/overlapping features. | |
| 6 | |
| 7 usage: %prog bed_file_1 bed_file_2 out_file | |
| 8 -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file | |
| 9 -2, --cols2=N,N,N,N: Columns for chr, start, end, strand in second file | |
| 10 """ | |
| 11 import sys, fileinput | |
| 12 from bx.intervals.io import * | |
| 13 from bx.cookbook import doc_optparse | |
| 14 from bx.intervals.operations import quicksect | |
| 15 from galaxy.tools.util.galaxyops import * | |
| 16 | |
| 17 assert sys.version_info[:2] >= ( 2, 4 ) | |
| 18 | |
| 19 def stop_err(msg): | |
| 20 sys.stderr.write(msg) | |
| 21 sys.exit() | |
| 22 | |
| 23 def counter(node, start, end): | |
| 24 global full, partial | |
| 25 if node.start <= start and node.maxend > start: | |
| 26 if node.end >= end or (node.start == start and end > node.end > start): | |
| 27 full += 1 | |
| 28 elif end > node.end > start: | |
| 29 partial += 1 | |
| 30 if node.left and node.left.maxend > start: | |
| 31 counter(node.left, start, end) | |
| 32 if node.right: | |
| 33 counter(node.right, start, end) | |
| 34 elif start < node.start < end: | |
| 35 if node.end <= end: | |
| 36 full += 1 | |
| 37 else: | |
| 38 partial += 1 | |
| 39 if node.left and node.left.maxend > start: | |
| 40 counter(node.left, start, end) | |
| 41 if node.right: | |
| 42 counter(node.right, start, end) | |
| 43 else: | |
| 44 if node.left: | |
| 45 counter(node.left, start, end) | |
| 46 | |
| 47 def count_coverage( readers, comments=True ): | |
| 48 primary = readers[0] | |
| 49 secondary = readers[1] | |
| 50 secondary_copy = readers[2] | |
| 51 | |
| 52 rightTree = quicksect.IntervalTree() | |
| 53 for item in secondary: | |
| 54 if type( item ) is GenomicInterval: | |
| 55 rightTree.insert( item, secondary.linenum, item.fields ) | |
| 56 | |
| 57 bitsets = secondary_copy.binned_bitsets() | |
| 58 | |
| 59 global full, partial | |
| 60 | |
| 61 for interval in primary: | |
| 62 if type( interval ) is Header: | |
| 63 yield interval | |
| 64 if type( interval ) is Comment and comments: | |
| 65 yield interval | |
| 66 elif type( interval ) == GenomicInterval: | |
| 67 chrom = interval.chrom | |
| 68 start = int(interval.start) | |
| 69 end = int(interval.end) | |
| 70 full = 0 | |
| 71 partial = 0 | |
| 72 if chrom not in bitsets: | |
| 73 bases_covered = 0 | |
| 74 percent = 0.0 | |
| 75 full = 0 | |
| 76 partial = 0 | |
| 77 else: | |
| 78 bases_covered = bitsets[ chrom ].count_range( start, end-start ) | |
| 79 if (end - start) == 0: | |
| 80 percent = 0 | |
| 81 else: | |
| 82 percent = float(bases_covered) / float(end - start) | |
| 83 if bases_covered: | |
| 84 root = rightTree.chroms[chrom] #root node for the chrom tree | |
| 85 counter(root, start, end) | |
| 86 interval.fields.append(str(bases_covered)) | |
| 87 interval.fields.append(str(percent)) | |
| 88 interval.fields.append(str(full)) | |
| 89 interval.fields.append(str(partial)) | |
| 90 yield interval | |
| 91 | |
| 92 | |
| 93 def main(): | |
| 94 options, args = doc_optparse.parse( __doc__ ) | |
| 95 | |
| 96 try: | |
| 97 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 ) | |
| 98 chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 ) | |
| 99 in1_fname, in2_fname, out_fname = args | |
| 100 except: | |
| 101 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) | |
| 102 | |
| 103 g1 = NiceReaderWrapper( fileinput.FileInput( in1_fname ), | |
| 104 chrom_col=chr_col_1, | |
| 105 start_col=start_col_1, | |
| 106 end_col=end_col_1, | |
| 107 strand_col=strand_col_1, | |
| 108 fix_strand=True ) | |
| 109 g2 = NiceReaderWrapper( fileinput.FileInput( in2_fname ), | |
| 110 chrom_col=chr_col_2, | |
| 111 start_col=start_col_2, | |
| 112 end_col=end_col_2, | |
| 113 strand_col=strand_col_2, | |
| 114 fix_strand=True ) | |
| 115 g2_copy = NiceReaderWrapper( fileinput.FileInput( in2_fname ), | |
| 116 chrom_col=chr_col_2, | |
| 117 start_col=start_col_2, | |
| 118 end_col=end_col_2, | |
| 119 strand_col=strand_col_2, | |
| 120 fix_strand=True ) | |
| 121 | |
| 122 | |
| 123 out_file = open( out_fname, "w" ) | |
| 124 | |
| 125 try: | |
| 126 for line in count_coverage([g1, g2, g2_copy]): | |
| 127 if type( line ) is GenomicInterval: | |
| 128 out_file.write( "%s\n" % "\t".join( line.fields ) ) | |
| 129 else: | |
| 130 out_file.write( "%s\n" % line ) | |
| 131 except ParseError, exc: | |
| 132 out_file.close() | |
| 133 fail( str( exc ) ) | |
| 134 | |
| 135 out_file.close() | |
| 136 | |
| 137 if g1.skipped > 0: | |
| 138 print skipped( g1, filedesc=" of 1st dataset" ) | |
| 139 if g2.skipped > 0: | |
| 140 print skipped( g2, filedesc=" of 2nd dataset" ) | |
| 141 elif g2_copy.skipped > 0: | |
| 142 print skipped( g2_copy, filedesc=" of 2nd dataset" ) | |
| 143 | |
| 144 | |
| 145 if __name__ == "__main__": | |
| 146 main() |
