Mercurial > repos > iuc > cwpair2
comparison cwpair2_util.py @ 2:abc464ca7260 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cwpair2 commit 0e7c1b37cf73425c6637b4e196fdeb290e042bc1
author | iuc |
---|---|
date | Tue, 26 Jul 2016 06:06:17 -0400 |
parents | 8600bfe7ed52 |
children | 436dc65bd902 |
comparison
equal
deleted
inserted
replaced
1:d4db13c9dd7f | 2:abc464ca7260 |
---|---|
1 import bisect | 1 import bisect |
2 import csv | 2 import csv |
3 import os | 3 import os |
4 import sys | 4 import sys |
5 import traceback | 5 import traceback |
6 | |
6 import matplotlib | 7 import matplotlib |
7 matplotlib.use('Agg') | 8 matplotlib.use('Agg') |
8 from matplotlib import pyplot | 9 from matplotlib import pyplot # noqa: E402 |
9 | 10 |
10 # Data outputs | 11 # Data outputs |
11 DETAILS = 'D' | 12 DETAILS = 'D' |
12 MATCHED_PAIRS = 'MP' | 13 MATCHED_PAIRS = 'MP' |
13 ORPHANS = 'O' | 14 ORPHANS = 'O' |
45 | 46 |
46 def get_bin(self, x): | 47 def get_bin(self, x): |
47 """ | 48 """ |
48 Returns the bin in which a data point falls | 49 Returns the bin in which a data point falls |
49 """ | 50 """ |
50 return self.start + (x-self.start) // self.binsize * self.binsize + self.binsize/2.0 | 51 return self.start + (x - self.start) // self.binsize * self.binsize + self.binsize / 2.0 |
51 | 52 |
52 def add(self, x): | 53 def add(self, x): |
53 x = self.get_bin(x) | 54 x = self.get_bin(x) |
54 self.dist[x] = self.dist.get(x, 0) + 1 | 55 self.dist[x] = self.dist.get(x, 0) + 1 |
55 | 56 |
73 sys.stderr.write(msg) | 74 sys.stderr.write(msg) |
74 sys.exit(1) | 75 sys.exit(1) |
75 | 76 |
76 | 77 |
77 def distance(peak1, peak2): | 78 def distance(peak1, peak2): |
78 return (peak2[1]+peak2[2])/2 - (peak1[1]+peak1[2])/2 | 79 return (peak2[1] + peak2[2]) / 2 - (peak1[1] + peak1[2]) / 2 |
79 | 80 |
80 | 81 |
81 def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}): | 82 def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}): |
82 return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs)) | 83 return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs)) |
83 | 84 |
112 for peaks in chromosomes.values(): | 113 for peaks in chromosomes.values(): |
113 for peak in peaks: | 114 for peak in peaks: |
114 values.append(peak[3]) | 115 values.append(peak[3]) |
115 values.sort() | 116 values.sort() |
116 # Get 95% value | 117 # Get 95% value |
117 return values[int(len(values)*0.95)] | 118 return values[int(len(values) * 0.95)] |
118 | 119 |
119 | 120 |
120 def filter(chromosomes, threshold=0.05): | 121 def filter(chromosomes, threshold=0.05): |
121 """ | 122 """ |
122 Filters the peaks to those above a threshold. Threshold < 1.0 is interpreted | 123 Filters the peaks to those above a threshold. Threshold < 1.0 is interpreted |
147 dist.add(distance(peak, cpeak)) | 148 dist.add(distance(peak, cpeak)) |
148 return dist | 149 return dist |
149 | 150 |
150 | 151 |
151 def make_keys(crick): | 152 def make_keys(crick): |
152 return [(data[1] + data[2])//2 for data in crick] | 153 return [(data[1] + data[2]) // 2 for data in crick] |
153 | 154 |
154 | 155 |
155 def get_window(crick, peak, up_distance, down_distance, keys=None): | 156 def get_window(crick, peak, up_distance, down_distance, keys=None): |
156 """ | 157 """ |
157 Returns a window of all crick peaks within a distance of a watson peak. | 158 Returns a window of all crick peaks within a distance of a watson peak. |
188 | 189 |
189 | 190 |
190 def match_mode(window, peak, mode): | 191 def match_mode(window, peak, mode): |
191 if not window: | 192 if not window: |
192 return None | 193 return None |
193 return min(window, key=lambda cpeak: abs(distance(peak, cpeak)-mode)) | 194 return min(window, key=lambda cpeak: abs(distance(peak, cpeak) - mode)) |
194 | 195 |
195 METHODS = {'mode': match_mode, 'closest': match_closest, 'largest': match_largest} | 196 METHODS = {'mode': match_mode, 'closest': match_closest, 'largest': match_largest} |
196 | 197 |
197 | 198 |
198 def frequency_plot(freqs, fname, labels=[], title=''): | 199 def frequency_plot(freqs, fname, labels=[], title=''): |
349 cname, | 350 cname, |
350 match[1], | 351 match[1], |
351 match[2], | 352 match[2], |
352 match[3], '-', | 353 match[3], '-', |
353 midpoint, | 354 midpoint, |
354 peak[3]+match[3], | 355 peak[3] + match[3], |
355 d)) | 356 d)) |
356 i = bisect.bisect_left(keys, (match[1]+match[2])/2) | 357 i = bisect.bisect_left(keys, (match[1] + match[2]) / 2) |
357 del crick[i] | 358 del crick[i] |
358 del keys[i] | 359 del keys[i] |
359 else: | 360 else: |
360 if output_orphans: | 361 if output_orphans: |
361 orphan_output.writerow((cname, peak[0], peak[1], peak[2], peak[3])) | 362 orphan_output.writerow((cname, peak[0], peak[1], peak[2], peak[3])) |