comparison cwpair2_util.py @ 2:abc464ca7260 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cwpair2 commit 0e7c1b37cf73425c6637b4e196fdeb290e042bc1
author iuc
date Tue, 26 Jul 2016 06:06:17 -0400
parents 8600bfe7ed52
children 436dc65bd902
comparison
equal deleted inserted replaced
1:d4db13c9dd7f 2:abc464ca7260
1 import bisect 1 import bisect
2 import csv 2 import csv
3 import os 3 import os
4 import sys 4 import sys
5 import traceback 5 import traceback
6
6 import matplotlib 7 import matplotlib
7 matplotlib.use('Agg') 8 matplotlib.use('Agg')
8 from matplotlib import pyplot 9 from matplotlib import pyplot # noqa: E402
9 10
10 # Data outputs 11 # Data outputs
11 DETAILS = 'D' 12 DETAILS = 'D'
12 MATCHED_PAIRS = 'MP' 13 MATCHED_PAIRS = 'MP'
13 ORPHANS = 'O' 14 ORPHANS = 'O'
45 46
46 def get_bin(self, x): 47 def get_bin(self, x):
47 """ 48 """
48 Returns the bin in which a data point falls 49 Returns the bin in which a data point falls
49 """ 50 """
50 return self.start + (x-self.start) // self.binsize * self.binsize + self.binsize/2.0 51 return self.start + (x - self.start) // self.binsize * self.binsize + self.binsize / 2.0
51 52
52 def add(self, x): 53 def add(self, x):
53 x = self.get_bin(x) 54 x = self.get_bin(x)
54 self.dist[x] = self.dist.get(x, 0) + 1 55 self.dist[x] = self.dist.get(x, 0) + 1
55 56
73 sys.stderr.write(msg) 74 sys.stderr.write(msg)
74 sys.exit(1) 75 sys.exit(1)
75 76
76 77
77 def distance(peak1, peak2): 78 def distance(peak1, peak2):
78 return (peak2[1]+peak2[2])/2 - (peak1[1]+peak1[2])/2 79 return (peak2[1] + peak2[2]) / 2 - (peak1[1] + peak1[2]) / 2
79 80
80 81
81 def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}): 82 def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}):
82 return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs)) 83 return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs))
83 84
112 for peaks in chromosomes.values(): 113 for peaks in chromosomes.values():
113 for peak in peaks: 114 for peak in peaks:
114 values.append(peak[3]) 115 values.append(peak[3])
115 values.sort() 116 values.sort()
116 # Get 95% value 117 # Get 95% value
117 return values[int(len(values)*0.95)] 118 return values[int(len(values) * 0.95)]
118 119
119 120
120 def filter(chromosomes, threshold=0.05): 121 def filter(chromosomes, threshold=0.05):
121 """ 122 """
122 Filters the peaks to those above a threshold. Threshold < 1.0 is interpreted 123 Filters the peaks to those above a threshold. Threshold < 1.0 is interpreted
147 dist.add(distance(peak, cpeak)) 148 dist.add(distance(peak, cpeak))
148 return dist 149 return dist
149 150
150 151
151 def make_keys(crick): 152 def make_keys(crick):
152 return [(data[1] + data[2])//2 for data in crick] 153 return [(data[1] + data[2]) // 2 for data in crick]
153 154
154 155
155 def get_window(crick, peak, up_distance, down_distance, keys=None): 156 def get_window(crick, peak, up_distance, down_distance, keys=None):
156 """ 157 """
157 Returns a window of all crick peaks within a distance of a watson peak. 158 Returns a window of all crick peaks within a distance of a watson peak.
188 189
189 190
190 def match_mode(window, peak, mode): 191 def match_mode(window, peak, mode):
191 if not window: 192 if not window:
192 return None 193 return None
193 return min(window, key=lambda cpeak: abs(distance(peak, cpeak)-mode)) 194 return min(window, key=lambda cpeak: abs(distance(peak, cpeak) - mode))
194 195
195 METHODS = {'mode': match_mode, 'closest': match_closest, 'largest': match_largest} 196 METHODS = {'mode': match_mode, 'closest': match_closest, 'largest': match_largest}
196 197
197 198
198 def frequency_plot(freqs, fname, labels=[], title=''): 199 def frequency_plot(freqs, fname, labels=[], title=''):
349 cname, 350 cname,
350 match[1], 351 match[1],
351 match[2], 352 match[2],
352 match[3], '-', 353 match[3], '-',
353 midpoint, 354 midpoint,
354 peak[3]+match[3], 355 peak[3] + match[3],
355 d)) 356 d))
356 i = bisect.bisect_left(keys, (match[1]+match[2])/2) 357 i = bisect.bisect_left(keys, (match[1] + match[2]) / 2)
357 del crick[i] 358 del crick[i]
358 del keys[i] 359 del keys[i]
359 else: 360 else:
360 if output_orphans: 361 if output_orphans:
361 orphan_output.writerow((cname, peak[0], peak[1], peak[2], peak[3])) 362 orphan_output.writerow((cname, peak[0], peak[1], peak[2], peak[3]))