Mercurial > repos > iuc > jbrowse
comparison jbrowse.py @ 1:497c6bb3b717 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit 0887009a23d176b21536c9fd8a18c4fecc417d4f
author | iuc |
---|---|
date | Thu, 18 Jun 2015 12:10:51 -0400 |
parents | 2c9e5136b416 |
children | 7342f467507b |
comparison
equal
deleted
inserted
replaced
0:2c9e5136b416 | 1:497c6bb3b717 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 from string import Template | |
2 import os | 3 import os |
3 import shutil | |
4 import argparse | 4 import argparse |
5 import subprocess | 5 import subprocess |
6 import hashlib | 6 import hashlib |
7 import tempfile | |
8 import json | |
9 import yaml | |
10 import logging | |
11 logging.basicConfig() | |
12 log = logging.getLogger(__name__) | |
13 | |
14 COLOR_FUNCTION_TEMPLATE = Template(""" | |
15 function(feature, variableName, glyphObject, track) { | |
16 var score = ${score}; | |
17 ${opacity} | |
18 return 'rgba(${red}, ${green}, ${blue}, ' + opacity + ')'; | |
19 } | |
20 """) | |
21 | |
22 BLAST_OPACITY_MATH = """ | |
23 var opacity = 0; | |
24 if(score == 0.0) { | |
25 opacity = 1; | |
26 } else{ | |
27 opacity = (20 - Math.log10(score)) / 180; | |
28 } | |
29 """ | |
30 | |
31 BREWER_COLOUR_IDX = 0 | |
32 BREWER_COLOUR_SCHEMES = [ | |
33 (228, 26, 28), | |
34 (55, 126, 184), | |
35 (77, 175, 74), | |
36 (152, 78, 163), | |
37 (255, 127, 0), | |
38 ] | |
39 | |
40 | |
41 # score comes from feature._parent.get('score') or feature.get('score') | |
42 # Opacity math | |
7 | 43 |
8 TN_TABLE = { | 44 TN_TABLE = { |
9 'gff3': '--gff', | 45 'gff3': '--gff', |
10 'gff': '--gff', | 46 'gff': '--gff', |
11 'bed': '--bed', | 47 'bed': '--bed', |
12 # 'genbank': '--gbk', | 48 # 'genbank': '--gbk', |
13 } | 49 } |
14 | 50 |
15 | 51 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) |
16 def process_genome(jbrowse_dir, genome): | 52 |
17 subprocess.check_output(['perl', 'bin/prepare-refseqs.pl', '--fasta', genome], cwd=jbrowse_dir) | 53 |
18 | 54 class JbrowseConnector(object): |
19 | 55 |
20 def process_annotations(jbrowse_dir, annot_file, annot_label, annot_format, | 56 def __init__(self, jbrowse, jbrowse_dir, outdir, genome): |
21 **kwargs): | 57 self.jbrowse = jbrowse |
22 key = hashlib.md5(annot_file).hexdigest() | 58 self.jbrowse_dir = jbrowse_dir |
23 | 59 self.outdir = outdir |
24 cmd = ['perl', 'bin/flatfile-to-json.pl', TN_TABLE.get(annot_format, 'gff'), | 60 self.genome_path = genome |
25 annot_file, '--trackLabel', key, '--key', annot_label] | 61 self.brewer_colour_idx = 0 |
26 subprocess.check_output(cmd, cwd=jbrowse_dir) | 62 |
63 self.clone_jbrowse(self.jbrowse, self.outdir) | |
64 self.process_genome() | |
65 | |
66 def subprocess_check_call(self, command): | |
67 log.debug('cd %s && %s', self.jbrowse_dir, ' '.join(command)) | |
68 subprocess.check_call(command, cwd=self.jbrowse_dir) | |
69 | |
70 def _get_colours(self): | |
71 r, g, b = BREWER_COLOUR_SCHEMES[self.brewer_colour_idx] | |
72 self.brewer_colour_idx += 1 | |
73 return r, g, b | |
74 | |
75 def process_genome(self): | |
76 self.subprocess_check_call(['perl', 'bin/prepare-refseqs.pl', | |
77 '--fasta', self.genome_path]) | |
78 | |
79 def _add_json(self, json_data): | |
80 if len(json_data.keys()) == 0: | |
81 return | |
82 | |
83 tmp = tempfile.NamedTemporaryFile(delete=False) | |
84 tmp.write(json.dumps(json_data)) | |
85 tmp.close() | |
86 cmd = ['perl', 'bin/add-track-json.pl', tmp.name, | |
87 os.path.join('data', 'trackList.json')] | |
88 self.subprocess_check_call(cmd) | |
89 os.unlink(tmp.name) | |
90 | |
91 def add_blastxml(self, data, key, **kwargs): | |
92 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) | |
93 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), | |
94 '--trim_end', '--min_gap', str(kwargs['min_gap']), data] | |
95 subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_unrebased) | |
96 gff3_unrebased.close() | |
97 | |
98 gff3_rebased = tempfile.NamedTemporaryFile(delete=False) | |
99 cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')] | |
100 if kwargs['protein']: | |
101 cmd.append('--protein2dna') | |
102 cmd.extend([kwargs['parent'], gff3_unrebased.name]) | |
103 subprocess.check_call(cmd, cwd=self.jbrowse_dir, stdout=gff3_rebased) | |
104 gff3_rebased.close() | |
105 | |
106 label = hashlib.md5(data).hexdigest() | |
107 | |
108 red, green, blue = self._get_colours() | |
109 color_function = COLOR_FUNCTION_TEMPLATE.substitute({ | |
110 'score': "feature._parent.get('score')", | |
111 'opacity': BLAST_OPACITY_MATH, | |
112 'red': red, | |
113 'green': green, | |
114 'blue': blue, | |
115 }) | |
116 | |
117 clientConfig = { | |
118 'label': 'description', | |
119 'color': color_function.replace('\n', ''), | |
120 'description': 'Hit_titles', | |
121 } | |
122 config = {'glyph': 'JBrowse/View/FeatureGlyph/Segments'} | |
123 if 'category' in kwargs: | |
124 config['category'] = kwargs['category'] | |
125 | |
126 cmd = ['perl', 'bin/flatfile-to-json.pl', | |
127 '--gff', gff3_rebased.name, | |
128 '--trackLabel', label, | |
129 '--key', key, | |
130 '--clientConfig', json.dumps(clientConfig), | |
131 '--config', json.dumps(config), | |
132 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
133 ] | |
134 | |
135 self.subprocess_check_call(cmd) | |
136 os.unlink(gff3_rebased.name) | |
137 os.unlink(gff3_unrebased.name) | |
138 | |
139 def _min_max_gff(self, gff_file): | |
140 min_val = None | |
141 max_val = None | |
142 with open(gff_file, 'r') as handle: | |
143 for line in handle: | |
144 try: | |
145 value = float(line.split('\t')[5]) | |
146 min_val = min(value, (min_val or value)) | |
147 max_val = max(value, (max_val or value)) | |
148 | |
149 if value < min_val: | |
150 min_val = value | |
151 | |
152 if value > max_val: | |
153 max_val = value | |
154 except Exception: | |
155 pass | |
156 return min_val, max_val | |
157 | |
158 def add_bigwig(self, data, key, **kwargs): | |
159 label = hashlib.md5(data).hexdigest() | |
160 dest = os.path.join('data', 'raw', os.path.basename(data)) | |
161 cmd = ['ln', '-s', data, dest] | |
162 self.subprocess_check_call(cmd) | |
163 | |
164 track_data = { | |
165 "label": label, | |
166 "urlTemplate": os.path.join('..', dest), | |
167 "bicolor_pivot": "zero", | |
168 "storeClass": "JBrowse/Store/SeqFeature/BigWig", | |
169 "type": "JBrowse/View/Track/Wiggle/Density", | |
170 "key": key, | |
171 } | |
172 track_data.update(kwargs) | |
173 | |
174 if 'min' not in track_data and 'max' not in track_data \ | |
175 and 'autoscale' not in track_data: | |
176 track_data['autoscale'] = 'local' | |
177 | |
178 self._add_json(track_data) | |
179 | |
180 def add_bam(self, data, key, **kwargs): | |
181 label = hashlib.md5(data).hexdigest() | |
182 dest = os.path.join('data', 'raw', os.path.basename(data)) | |
183 # ln? | |
184 cmd = ['ln', '-s', data, dest] | |
185 self.subprocess_check_call(cmd) | |
186 | |
187 bai_source = kwargs['bam_index'] | |
188 cmd = ['ln', '-s', bai_source, dest + '.bai'] | |
189 self.subprocess_check_call(cmd) | |
190 | |
191 track_data = { | |
192 "urlTemplate": os.path.join('..', dest), | |
193 "key": key, | |
194 "label": label, | |
195 "type": "JBrowse/View/Track/Alignments2", | |
196 "storeClass": "JBrowse/Store/SeqFeature/BAM", | |
197 } | |
198 if 'category' in kwargs: | |
199 track_data['category'] = kwargs['category'] | |
200 self._add_json(track_data) | |
201 | |
202 if kwargs.get('auto_snp', False): | |
203 track_data = { | |
204 "storeClass": "JBrowse/Store/SeqFeature/BAM", | |
205 "urlTemplate": os.path.join('..', dest), | |
206 "type": "JBrowse/View/Track/SNPCoverage", | |
207 "key": key + " - SNPs/Coverage", | |
208 "label": label + "_autosnp", | |
209 } | |
210 if 'category' in kwargs: | |
211 track_data['category'] = kwargs['category'] | |
212 self._add_json(track_data) | |
213 | |
214 def add_vcf(self, data, key, **kwargs): | |
215 label = hashlib.md5(data).hexdigest() | |
216 dest = os.path.join('data', 'raw', os.path.basename(data)) | |
217 # ln? | |
218 cmd = ['ln', '-s', data, dest] | |
219 self.subprocess_check_call(cmd) | |
220 cmd = ['bgzip', dest] | |
221 self.subprocess_check_call(cmd) | |
222 cmd = ['tabix', '-p', 'vcf', dest + '.gz'] | |
223 self.subprocess_check_call(cmd) | |
224 | |
225 track_data = { | |
226 "key": key, | |
227 "label": label, | |
228 "urlTemplate": os.path.join('..', dest + '.gz'), | |
229 "type": "JBrowse/View/Track/HTMLVariants", | |
230 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix", | |
231 } | |
232 track_data.update(kwargs) | |
233 self._add_json(track_data) | |
234 | |
235 def add_features(self, data, key, format, **kwargs): | |
236 label = hashlib.md5(data).hexdigest() | |
237 cmd = ['perl', 'bin/flatfile-to-json.pl', | |
238 TN_TABLE.get(format, 'gff'), data, | |
239 '--trackLabel', label, | |
240 '--key', key] | |
241 | |
242 config = {} | |
243 if 'category' in kwargs: | |
244 config['category'] = kwargs['category'] | |
245 | |
246 if kwargs.get('match', False): | |
247 clientConfig = { | |
248 'label': 'description', | |
249 'description': 'Hit_titles', | |
250 } | |
251 | |
252 # Get min/max and build a scoring function since JBrowse doesn't | |
253 min_val, max_val = self._min_max_gff(data) | |
254 | |
255 if min_val is not None and max_val is not None: | |
256 MIN_MAX_OPACITY_MATH = Template(""" | |
257 var opacity = (score - ${min}) * (1/(${max} - ${min})); | |
258 """).substitute({ | |
259 'max': max_val, | |
260 'min': min_val, | |
261 }) | |
262 | |
263 red, green, blue = self._get_colours() | |
264 color_function = COLOR_FUNCTION_TEMPLATE.substitute({ | |
265 'score': "feature.get('score')", | |
266 'opacity': MIN_MAX_OPACITY_MATH, | |
267 'red': red, | |
268 'green': green, | |
269 'blue': blue, | |
270 }) | |
271 | |
272 clientConfig['color'] = color_function.replace('\n', '') | |
273 | |
274 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' | |
275 | |
276 cmd += ['--clientConfig', json.dumps(clientConfig), | |
277 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
278 ] | |
279 | |
280 cmd.extend(['--config', json.dumps(config)]) | |
281 | |
282 self.subprocess_check_call(cmd) | |
283 | |
284 def process_annotations(self, data, key, format, **kwargs): | |
285 if format in ('gff', 'gff3', 'bed'): | |
286 self.add_features(data, key, format, **kwargs) | |
287 elif format == 'bigwig': | |
288 self.add_bigwig(data, key, **kwargs) | |
289 elif format == 'bam': | |
290 self.add_bam(data, key, **kwargs) | |
291 elif format == 'blastxml': | |
292 self.add_blastxml(data, key, **kwargs) | |
293 elif format == 'vcf': | |
294 self.add_vcf(data, key, **kwargs) | |
295 | |
296 def clone_jbrowse(self, jbrowse_dir, destination): | |
297 # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks | |
298 # unlike copytree | |
299 cmd = ['mkdir', '-p', destination] | |
300 subprocess.check_call(cmd) | |
301 cmd = ['cp', '-r', jbrowse_dir, destination] | |
302 subprocess.check_call(cmd) | |
303 cmd = ['mkdir', '-p', os.path.join(destination, 'JBrowse-1.11.6', | |
304 'data', 'raw')] | |
305 subprocess.check_call(cmd) | |
306 | |
307 # http://unix.stackexchange.com/a/38691/22785 | |
308 # JBrowse releases come with some broken symlinks | |
309 cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+'] | |
310 subprocess.check_call(cmd) | |
27 | 311 |
28 | 312 |
29 if __name__ == '__main__': | 313 if __name__ == '__main__': |
30 parser = argparse.ArgumentParser(description="", epilog="") | 314 parser = argparse.ArgumentParser(description="", epilog="") |
31 parser.add_argument('genome', type=file, help='Input genome file') | 315 parser.add_argument('genome', type=file, help='Input genome file') |
32 | 316 parser.add_argument('yaml', type=file, help='Track Configuration') |
33 parser.add_argument('--gff3', type=file, nargs='*', help='GFF3/BED/GBK File') | |
34 parser.add_argument('--gff3_format', choices=['gff3', 'gff', 'bed', 'gbk'], nargs='*', help='GFF3/BED/GBK Format') | |
35 parser.add_argument('--gff3_label', type=str, nargs='*', help='GFF3/BED/GBK Label') | |
36 | 317 |
37 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') | 318 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') |
38 parser.add_argument('--outdir', help='Output directory', default='out') | 319 parser.add_argument('--outdir', help='Output directory', default='out') |
39 args = parser.parse_args() | 320 args = parser.parse_args() |
40 | 321 |
41 jbrowse_dir = os.path.join(args.outdir, 'JBrowse-1.11.6') | 322 jc = JbrowseConnector( |
42 shutil.copytree(args.jbrowse, jbrowse_dir) | 323 jbrowse=args.jbrowse, |
43 | 324 jbrowse_dir=os.path.join(args.outdir, 'JBrowse-1.11.6'), |
44 process_genome(jbrowse_dir, os.path.realpath(args.genome.name)) | 325 outdir=args.outdir, |
45 | 326 genome=os.path.realpath(args.genome.name), |
46 for gff3, gff3_format, gff3_label in zip(args.gff3, args.gff3_format, args.gff3_label): | 327 ) |
47 gff3_path = os.path.realpath(gff3.name) | 328 |
48 process_annotations(jbrowse_dir, gff3_path, gff3_label, gff3_format) | 329 track_data = yaml.load(args.yaml) |
330 for track in track_data: | |
331 path = os.path.realpath(track['file']) | |
332 extra = track.get('options', {}) | |
333 if '__unused__' in extra: | |
334 del extra['__unused__'] | |
335 | |
336 for possible_partial_path in ('bam_index', 'parent'): | |
337 if possible_partial_path in extra: | |
338 extra[possible_partial_path] = os.path.realpath( | |
339 extra[possible_partial_path]) | |
340 extra['category'] = track.get('category', 'Default') | |
341 | |
342 jc.process_annotations(path, track['label'], track['ext'], **extra) | |
49 | 343 |
50 print """ | 344 print """ |
51 <html> | 345 <html> |
52 <body> | 346 <body> |
53 <script type="text/javascript"> | 347 <script type="text/javascript"> |
54 window.location=JBrowse-1.11.6/index.html | 348 window.location=JBrowse-1.11.6/index.html |
55 </script> | 349 </script> |
56 <a href="JBrowse-1.11.6/index.html">Go to JBrowse</a> | 350 <a href="JBrowse-1.11.6/index.html">Go to JBrowse</a> |
351 <p>Please note that JBrowse functions best on production Galaxy | |
352 instances. The paste server used in development instances has issues | |
353 serving the volumes of data regularly involved in JBrowse</p> | |
57 </body> | 354 </body> |
58 </html> | 355 </html> |
59 """ | 356 """ |