Mercurial > repos > iuc > jbrowse
comparison jbrowse.py @ 25:1cfc579079a6 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse commit b6f9a87b6091cc881a49e0b6acfadc5e7786967f
author | iuc |
---|---|
date | Tue, 29 Jan 2019 05:34:16 -0500 |
parents | fa30df9b79c2 |
children | 61ce21e36cb5 |
comparison
equal
deleted
inserted
replaced
24:fa30df9b79c2 | 25:1cfc579079a6 |
---|---|
261 trackConfig['style']['color'] = color_function.replace('\n', '') | 261 trackConfig['style']['color'] = color_function.replace('\n', '') |
262 return trackConfig | 262 return trackConfig |
263 | 263 |
264 | 264 |
265 def etree_to_dict(t): | 265 def etree_to_dict(t): |
266 if t is None: | |
267 return {} | |
268 | |
266 d = {t.tag: {} if t.attrib else None} | 269 d = {t.tag: {} if t.attrib else None} |
267 children = list(t) | 270 children = list(t) |
268 if children: | 271 if children: |
269 dd = defaultdict(list) | 272 dd = defaultdict(list) |
270 for dc in map(etree_to_dict, children): | 273 for dc in map(etree_to_dict, children): |
379 | 382 |
380 def subprocess_check_call(self, command): | 383 def subprocess_check_call(self, command): |
381 log.debug('cd %s && %s', self.outdir, ' '.join(command)) | 384 log.debug('cd %s && %s', self.outdir, ' '.join(command)) |
382 subprocess.check_call(command, cwd=self.outdir) | 385 subprocess.check_call(command, cwd=self.outdir) |
383 | 386 |
387 def subprocess_popen(self, command): | |
388 log.debug('cd %s && %s', self.outdir, command) | |
389 p = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
390 output, err = p.communicate() | |
391 retcode = p.returncode | |
392 if retcode != 0: | |
393 log.error('cd %s && %s', self.outdir, command) | |
394 log.error(output) | |
395 log.error(err) | |
396 raise RuntimeError("Command failed with exit code %s" % (retcode)) | |
397 | |
384 def _jbrowse_bin(self, command): | 398 def _jbrowse_bin(self, command): |
385 return os.path.realpath(os.path.join(self.jbrowse, 'bin', command)) | 399 return os.path.realpath(os.path.join(self.jbrowse, 'bin', command)) |
386 | 400 |
387 def process_genomes(self): | 401 def process_genomes(self): |
388 for genome_node in self.genome_paths: | 402 for genome_node in self.genome_paths: |
389 # TODO: Waiting on https://github.com/GMOD/jbrowse/pull/884 | 403 # We only expect one input genome per run. This for loop is just |
404 # easier to write than the alternative / catches any possible | |
405 # issues. | |
406 | |
407 # Copy the file in workdir, prepare-refseqs.pl will copy it to jbrowse's data dir | |
408 local_genome = os.path.realpath('./genome.fasta') | |
409 shutil.copy(genome_node['path'], local_genome) | |
410 | |
411 cmd = ['samtools', 'faidx', local_genome] | |
412 self.subprocess_check_call(cmd) | |
413 | |
390 self.subprocess_check_call([ | 414 self.subprocess_check_call([ |
391 'perl', self._jbrowse_bin('prepare-refseqs.pl'), | 415 'perl', self._jbrowse_bin('prepare-refseqs.pl'), |
392 '--fasta', genome_node['path']]) | 416 '--trackConfig', json.dumps({'metadata': genome_node['meta']}), |
417 '--indexed_fasta', os.path.realpath(local_genome)]) | |
418 | |
419 os.unlink(local_genome) | |
420 os.unlink(local_genome + '.fai') | |
393 | 421 |
394 def generate_names(self): | 422 def generate_names(self): |
395 # Generate names | 423 # Generate names |
396 args = [ | 424 args = [ |
397 'perl', self._jbrowse_bin('generate-names.pl'), | 425 'perl', self._jbrowse_bin('generate-names.pl'), |
418 def _add_track_json(self, json_data): | 446 def _add_track_json(self, json_data): |
419 if len(json_data) == 0: | 447 if len(json_data) == 0: |
420 return | 448 return |
421 | 449 |
422 tmp = tempfile.NamedTemporaryFile(delete=False) | 450 tmp = tempfile.NamedTemporaryFile(delete=False) |
423 tmp.write(json.dumps(json_data)) | 451 json.dump(json_data, tmp) |
424 tmp.close() | 452 tmp.close() |
425 cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name, | 453 cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name, |
426 os.path.join('data', 'trackList.json')] | 454 os.path.join('data', 'trackList.json')] |
427 self.subprocess_check_call(cmd) | 455 self.subprocess_check_call(cmd) |
428 os.unlink(tmp.name) | 456 os.unlink(tmp.name) |
429 | 457 |
430 def _blastxml_to_gff3(self, xml, min_gap=10): | 458 def _blastxml_to_gff3(self, xml, min_gap=10): |
431 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) | 459 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) |
432 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), | 460 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), |
433 '--trim', '--trim_end', '--min_gap', str(min_gap), xml] | 461 '--trim', '--trim_end', '--include_seq', '--min_gap', str(min_gap), xml] |
434 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name) | 462 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name) |
435 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) | 463 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) |
436 gff3_unrebased.close() | 464 gff3_unrebased.close() |
437 return gff3_unrebased.name | 465 return gff3_unrebased.name |
438 | 466 |
451 | 479 |
452 # Replace original gff3 file | 480 # Replace original gff3 file |
453 shutil.copy(gff3_rebased.name, gff3) | 481 shutil.copy(gff3_rebased.name, gff3) |
454 os.unlink(gff3_rebased.name) | 482 os.unlink(gff3_rebased.name) |
455 | 483 |
456 config = { | 484 dest = os.path.join(self.outdir, 'data', 'raw', trackData['label'] + '.gff') |
457 'glyph': 'JBrowse/View/FeatureGlyph/Segments', | 485 |
458 "category": trackData['category'], | 486 self._sort_gff(gff3, dest) |
459 } | 487 |
460 | 488 url = os.path.join('raw', trackData['label'] + '.gff.gz') |
461 clientConfig = trackData['style'] | 489 trackData.update({ |
462 | 490 "urlTemplate": url, |
463 cmd = ['perl', self._jbrowse_bin('flatfile-to-json.pl'), | 491 "storeClass": "JBrowse/Store/SeqFeature/GFF3Tabix", |
464 '--gff', gff3, | 492 }) |
465 '--trackLabel', trackData['label'], | 493 |
466 '--key', trackData['key'], | 494 trackData['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' |
467 '--clientConfig', json.dumps(clientConfig), | 495 |
468 '--config', json.dumps(config), | 496 trackData['trackType'] = 'BlastView/View/Track/CanvasFeatures' |
469 '--trackType', 'BlastView/View/Track/CanvasFeatures' | 497 trackData['type'] = 'BlastView/View/Track/CanvasFeatures' |
470 ] | 498 |
471 | 499 self._add_track_json(trackData) |
472 # className in --clientConfig is ignored, it needs to be set with --className | 500 |
473 if 'className' in trackData['style']: | |
474 cmd += ['--className', trackData['style']['className']] | |
475 | |
476 self.subprocess_check_call(cmd) | |
477 os.unlink(gff3) | 501 os.unlink(gff3) |
478 | 502 |
479 if blastOpts.get('index', 'false') == 'true': | 503 if blastOpts.get('index', 'false') == 'true': |
480 self.tracksToIndex.append("%s" % trackData['label']) | 504 self.tracksToIndex.append("%s" % trackData['label']) |
481 | 505 |
502 | 526 |
503 trackData['scale'] = wiggleOpts['scale'] | 527 trackData['scale'] = wiggleOpts['scale'] |
504 | 528 |
505 self._add_track_json(trackData) | 529 self._add_track_json(trackData) |
506 | 530 |
531 def add_bigwig_multiple(self, data, trackData, wiggleOpts, **kwargs): | |
532 | |
533 urls = [] | |
534 for idx, bw in enumerate(data): | |
535 dest = os.path.join('data', 'raw', trackData['label'] + '_' + str(idx) + '.bw') | |
536 cmd = ['ln', '-s', bw[1], dest] | |
537 self.subprocess_check_call(cmd) | |
538 | |
539 urls.append({"url": os.path.join('raw', trackData['label'] + '_' + str(idx) + '.bw'), "name": str(idx + 1) + ' - ' + bw[0]}) | |
540 | |
541 trackData.update({ | |
542 "urlTemplates": urls, | |
543 "showTooltips": "true", | |
544 "storeClass": "MultiBigWig/Store/SeqFeature/MultiBigWig", | |
545 "type": "MultiBigWig/View/Track/MultiWiggle/MultiDensity", | |
546 }) | |
547 if 'XYPlot' in wiggleOpts['type']: | |
548 trackData['type'] = "MultiBigWig/View/Track/MultiWiggle/MultiXYPlot" | |
549 | |
550 trackData['variance_band'] = True if wiggleOpts['variance_band'] == 'true' else False | |
551 | |
552 if 'min' in wiggleOpts and 'max' in wiggleOpts: | |
553 trackData['min_score'] = wiggleOpts['min'] | |
554 trackData['max_score'] = wiggleOpts['max'] | |
555 else: | |
556 trackData['autoscale'] = wiggleOpts.get('autoscale', 'local') | |
557 | |
558 trackData['scale'] = wiggleOpts['scale'] | |
559 | |
560 self._add_track_json(trackData) | |
561 | |
507 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): | 562 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): |
508 dest = os.path.join('data', 'raw', trackData['label'] + '.bam') | 563 dest = os.path.join('data', 'raw', trackData['label'] + '.bam') |
509 cmd = ['ln', '-s', os.path.realpath(data), dest] | 564 cmd = ['ln', '-s', os.path.realpath(data), dest] |
510 self.subprocess_check_call(cmd) | 565 self.subprocess_check_call(cmd) |
511 | 566 |
515 url = os.path.join('raw', trackData['label'] + '.bam') | 570 url = os.path.join('raw', trackData['label'] + '.bam') |
516 trackData.update({ | 571 trackData.update({ |
517 "urlTemplate": url, | 572 "urlTemplate": url, |
518 "type": "JBrowse/View/Track/Alignments2", | 573 "type": "JBrowse/View/Track/Alignments2", |
519 "storeClass": "JBrowse/Store/SeqFeature/BAM", | 574 "storeClass": "JBrowse/Store/SeqFeature/BAM", |
575 "chunkSizeLimit": bamOpts.get('chunkSizeLimit', '5000000') | |
520 }) | 576 }) |
521 | 577 |
522 # Apollo will only switch to the (prettier) 'bam-read' className if it's not set explicitly in the track config | 578 # Apollo will only switch to the (prettier) 'bam-read' className if it's not set explicitly in the track config |
523 # So remove the default 'feature' value for these bam tracks | 579 # So remove the default 'feature' value for these bam tracks |
524 if 'className' in trackData['style'] and trackData['style']['className'] == 'feature': | 580 if 'className' in trackData['style'] and trackData['style']['className'] == 'feature': |
530 trackData2 = copy.copy(trackData) | 586 trackData2 = copy.copy(trackData) |
531 trackData2.update({ | 587 trackData2.update({ |
532 "type": "JBrowse/View/Track/SNPCoverage", | 588 "type": "JBrowse/View/Track/SNPCoverage", |
533 "key": trackData['key'] + " - SNPs/Coverage", | 589 "key": trackData['key'] + " - SNPs/Coverage", |
534 "label": trackData['label'] + "_autosnp", | 590 "label": trackData['label'] + "_autosnp", |
591 "chunkSizeLimit": bamOpts.get('chunkSizeLimit', '5000000') | |
535 }) | 592 }) |
536 self._add_track_json(trackData2) | 593 self._add_track_json(trackData2) |
537 | 594 |
538 def add_vcf(self, data, trackData, vcfOpts={}, **kwargs): | 595 def add_vcf(self, data, trackData, vcfOpts={}, **kwargs): |
539 dest = os.path.join('data', 'raw', trackData['label'] + '.vcf') | 596 dest = os.path.join('data', 'raw', trackData['label'] + '.vcf') |
543 cmd = ['bgzip', dest] | 600 cmd = ['bgzip', dest] |
544 self.subprocess_check_call(cmd) | 601 self.subprocess_check_call(cmd) |
545 cmd = ['tabix', '-p', 'vcf', dest + '.gz'] | 602 cmd = ['tabix', '-p', 'vcf', dest + '.gz'] |
546 self.subprocess_check_call(cmd) | 603 self.subprocess_check_call(cmd) |
547 | 604 |
548 url = os.path.join('raw', trackData['label'] + '.vcf') | 605 url = os.path.join('raw', trackData['label'] + '.vcf.gz') |
549 trackData.update({ | 606 trackData.update({ |
550 "urlTemplate": url, | 607 "urlTemplate": url, |
551 "type": "JBrowse/View/Track/HTMLVariants", | 608 "type": "JBrowse/View/Track/HTMLVariants", |
552 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix", | 609 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix", |
553 }) | 610 }) |
554 self._add_track_json(trackData) | 611 self._add_track_json(trackData) |
555 | 612 |
556 def add_features(self, data, format, trackData, gffOpts, metadata=None, **kwargs): | 613 def _sort_gff(self, data, dest): |
557 cmd = [ | 614 |
558 'perl', self._jbrowse_bin('flatfile-to-json.pl'), | 615 if not os.path.exists(dest): |
559 self.TN_TABLE.get(format, 'gff'), | 616 # Only index if not already done |
560 data, | 617 cmd = "grep ^\"#\" '%s' > '%s'" % (data, dest) |
561 '--trackLabel', trackData['label'], | 618 self.subprocess_popen(cmd) |
562 '--key', trackData['key'] | 619 |
563 ] | 620 cmd = "grep -v ^\"#\" '%s' | grep -v \"^$\" | grep \"\t\" | sort -k1,1 -k4,4n >> '%s'" % (data, dest) |
564 | 621 self.subprocess_popen(cmd) |
565 # className in --clientConfig is ignored, it needs to be set with --className | 622 |
566 if 'className' in trackData['style']: | 623 cmd = ['bgzip', '-f', dest] |
567 cmd += ['--className', trackData['style']['className']] | 624 self.subprocess_popen(' '.join(cmd)) |
568 | 625 cmd = ['tabix', '-f', '-p', 'gff', dest + '.gz'] |
569 config = copy.copy(trackData) | 626 self.subprocess_popen(' '.join(cmd)) |
570 clientConfig = trackData['style'] | 627 |
571 del config['style'] | 628 def add_features(self, data, format, trackData, gffOpts, **kwargs): |
629 | |
630 dest = os.path.join(self.outdir, 'data', 'raw', trackData['label'] + '.gff') | |
631 | |
632 self._sort_gff(data, dest) | |
633 | |
634 url = os.path.join('raw', trackData['label'] + '.gff.gz') | |
635 trackData.update({ | |
636 "urlTemplate": url, | |
637 "storeClass": "JBrowse/Store/SeqFeature/GFF3Tabix", | |
638 }) | |
572 | 639 |
573 if 'match' in gffOpts: | 640 if 'match' in gffOpts: |
574 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' | 641 trackData['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' |
575 if bool(gffOpts['match']): | |
576 # Can be empty for CanvasFeatures = will take all by default | |
577 cmd += ['--type', gffOpts['match']] | |
578 | |
579 cmd += ['--clientConfig', json.dumps(clientConfig), | |
580 ] | |
581 | 642 |
582 trackType = 'JBrowse/View/Track/CanvasFeatures' | 643 trackType = 'JBrowse/View/Track/CanvasFeatures' |
583 if 'trackType' in gffOpts: | 644 if 'trackType' in gffOpts: |
584 trackType = gffOpts['trackType'] | 645 trackType = gffOpts['trackType'] |
646 trackData['trackType'] = trackType | |
585 | 647 |
586 if trackType == 'JBrowse/View/Track/CanvasFeatures': | 648 if trackType == 'JBrowse/View/Track/CanvasFeatures': |
587 if 'transcriptType' in gffOpts and gffOpts['transcriptType']: | 649 if 'transcriptType' in gffOpts and gffOpts['transcriptType']: |
588 config['transcriptType'] = gffOpts['transcriptType'] | 650 trackData['transcriptType'] = gffOpts['transcriptType'] |
589 if 'subParts' in gffOpts and gffOpts['subParts']: | 651 if 'subParts' in gffOpts and gffOpts['subParts']: |
590 config['subParts'] = gffOpts['subParts'] | 652 trackData['subParts'] = gffOpts['subParts'] |
591 if 'impliedUTRs' in gffOpts and gffOpts['impliedUTRs']: | 653 if 'impliedUTRs' in gffOpts and gffOpts['impliedUTRs']: |
592 config['impliedUTRs'] = gffOpts['impliedUTRs'] | 654 trackData['impliedUTRs'] = gffOpts['impliedUTRs'] |
593 elif trackType == 'JBrowse/View/Track/HTMLFeatures': | 655 |
594 if 'transcriptType' in gffOpts and gffOpts['transcriptType']: | 656 self._add_track_json(trackData) |
595 cmd += ['--type', gffOpts['transcriptType']] | |
596 | |
597 cmd += [ | |
598 '--trackType', gffOpts['trackType'] | |
599 ] | |
600 | |
601 if metadata: | |
602 config.update({'metadata': metadata}) | |
603 cmd.extend(['--config', json.dumps(config)]) | |
604 | |
605 self.subprocess_check_call(cmd) | |
606 | 657 |
607 if gffOpts.get('index', 'false') == 'true': | 658 if gffOpts.get('index', 'false') == 'true': |
608 self.tracksToIndex.append("%s" % trackData['label']) | 659 self.tracksToIndex.append("%s" % trackData['label']) |
609 | 660 |
610 def add_rest(self, url, trackData): | 661 def add_rest(self, url, trackData): |
612 "label": trackData['label'], | 663 "label": trackData['label'], |
613 "key": trackData['key'], | 664 "key": trackData['key'], |
614 "category": trackData['category'], | 665 "category": trackData['category'], |
615 "type": "JBrowse/View/Track/HTMLFeatures", | 666 "type": "JBrowse/View/Track/HTMLFeatures", |
616 "storeClass": "JBrowse/Store/SeqFeature/REST", | 667 "storeClass": "JBrowse/Store/SeqFeature/REST", |
617 "baseUrl": url, | 668 "baseUrl": url |
618 "query": { | 669 } |
619 "organism": "tyrannosaurus" | 670 self._add_track_json(data) |
620 } | 671 |
672 def add_sparql(self, url, query, trackData): | |
673 data = { | |
674 "label": trackData['label'], | |
675 "key": trackData['key'], | |
676 "category": trackData['category'], | |
677 "type": "JBrowse/View/Track/CanvasFeatures", | |
678 "storeClass": "JBrowse/Store/SeqFeature/SPARQL", | |
679 "urlTemplate": url, | |
680 "queryTemplate": query | |
621 } | 681 } |
622 self._add_track_json(data) | 682 self._add_track_json(data) |
623 | 683 |
624 def process_annotations(self, track): | 684 def process_annotations(self, track): |
625 category = track['category'].replace('__pd__date__pd__', TODAY) | 685 category = track['category'].replace('__pd__date__pd__', TODAY) |
643 '[': '__ob__', | 703 '[': '__ob__', |
644 ']': '__cb__', | 704 ']': '__cb__', |
645 '{': '__oc__', | 705 '{': '__oc__', |
646 '}': '__cc__', | 706 '}': '__cc__', |
647 '@': '__at__', | 707 '@': '__at__', |
648 '#': '__pd__' | 708 '#': '__pd__', |
709 "": '__cn__' | |
649 } | 710 } |
650 | 711 |
651 for i, (dataset_path, dataset_ext, track_human_label, extra_metadata) in enumerate(track['trackfiles']): | 712 for i, (dataset_path, dataset_ext, track_human_label, extra_metadata) in enumerate(track['trackfiles']): |
652 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) | 713 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) |
653 for key, value in mapped_chars.items(): | 714 for key, value in mapped_chars.items(): |
654 track_human_label = track_human_label.replace(value, key) | 715 track_human_label = track_human_label.replace(value, key) |
655 | 716 |
656 log.info('Processing %s / %s', category, track_human_label) | 717 log.info('Processing %s / %s', category, track_human_label) |
657 outputTrackConfig['key'] = track_human_label | 718 outputTrackConfig['key'] = track_human_label |
658 # We add extra data to hash for the case of REST + SPARQL. | 719 # We add extra data to hash for the case of REST + SPARQL. |
659 try: | 720 if 'conf' in track and 'options' in track['conf'] and 'url' in track['conf']['options']: |
660 rest_url = track['conf']['options']['url'] | 721 rest_url = track['conf']['options']['url'] |
661 except KeyError: | 722 else: |
662 rest_url = '' | 723 rest_url = '' |
663 | 724 |
664 # I chose to use track['category'] instead of 'category' here. This | 725 # I chose to use track['category'] instead of 'category' here. This |
665 # is intentional. This way re-running the tool on a different date | 726 # is intentional. This way re-running the tool on a different date |
666 # will not generate different hashes and make comparison of outputs | 727 # will not generate different hashes and make comparison of outputs |
667 # much simpler. | 728 # much simpler. |
668 hashData = [dataset_path, track_human_label, track['category'], rest_url] | 729 hashData = [str(dataset_path), track_human_label, track['category'], rest_url] |
669 hashData = '|'.join(hashData).encode('utf-8') | 730 hashData = '|'.join(hashData).encode('utf-8') |
670 outputTrackConfig['label'] = hashlib.md5(hashData).hexdigest() + '_%s' % i | 731 outputTrackConfig['label'] = hashlib.md5(hashData).hexdigest() + '_%s' % i |
732 outputTrackConfig['metadata'] = extra_metadata | |
671 | 733 |
672 # Colour parsing is complex due to different track types having | 734 # Colour parsing is complex due to different track types having |
673 # different colour options. | 735 # different colour options. |
674 colourOptions = self.cs.parse_colours(track['conf']['options'], track['format'], gff3=dataset_path) | 736 colourOptions = self.cs.parse_colours(track['conf']['options'], track['format'], gff3=dataset_path) |
675 # This used to be done with a dict.update() call, however that wiped out any previous style settings... | 737 # This used to be done with a dict.update() call, however that wiped out any previous style settings... |
686 | 748 |
687 # import pprint; pprint.pprint(track) | 749 # import pprint; pprint.pprint(track) |
688 # import sys; sys.exit() | 750 # import sys; sys.exit() |
689 if dataset_ext in ('gff', 'gff3', 'bed'): | 751 if dataset_ext in ('gff', 'gff3', 'bed'): |
690 self.add_features(dataset_path, dataset_ext, outputTrackConfig, | 752 self.add_features(dataset_path, dataset_ext, outputTrackConfig, |
691 track['conf']['options']['gff'], metadata=extra_metadata) | 753 track['conf']['options']['gff']) |
692 elif dataset_ext == 'bigwig': | 754 elif dataset_ext == 'bigwig': |
693 self.add_bigwig(dataset_path, outputTrackConfig, | 755 self.add_bigwig(dataset_path, outputTrackConfig, |
694 track['conf']['options']['wiggle'], metadata=extra_metadata) | 756 track['conf']['options']['wiggle']) |
757 elif dataset_ext == 'bigwig_multiple': | |
758 self.add_bigwig_multiple(dataset_path, outputTrackConfig, | |
759 track['conf']['options']['wiggle']) | |
695 elif dataset_ext == 'bam': | 760 elif dataset_ext == 'bam': |
696 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index'] | 761 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index'] |
697 if not isinstance(real_indexes, list): | 762 if not isinstance(real_indexes, list): |
698 # <bam_indices> | 763 # <bam_indices> |
699 # <bam_index>/path/to/a.bam.bai</bam_index> | 764 # <bam_index>/path/to/a.bam.bai</bam_index> |
704 # becomes a list. Fun! | 769 # becomes a list. Fun! |
705 real_indexes = [real_indexes] | 770 real_indexes = [real_indexes] |
706 | 771 |
707 self.add_bam(dataset_path, outputTrackConfig, | 772 self.add_bam(dataset_path, outputTrackConfig, |
708 track['conf']['options']['pileup'], | 773 track['conf']['options']['pileup'], |
709 bam_index=real_indexes[i], metadata=extra_metadata) | 774 bam_index=real_indexes[i]) |
710 elif dataset_ext == 'blastxml': | 775 elif dataset_ext == 'blastxml': |
711 self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast'], metadata=extra_metadata) | 776 self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast']) |
712 elif dataset_ext == 'vcf': | 777 elif dataset_ext == 'vcf': |
713 self.add_vcf(dataset_path, outputTrackConfig, metadata=extra_metadata) | 778 self.add_vcf(dataset_path, outputTrackConfig) |
714 elif dataset_ext == 'rest': | 779 elif dataset_ext == 'rest': |
715 self.add_rest(track['conf']['options']['url'], outputTrackConfig, metadata=extra_metadata) | 780 self.add_rest(track['conf']['options']['rest']['url'], outputTrackConfig) |
781 elif dataset_ext == 'sparql': | |
782 sparql_query = track['conf']['options']['sparql']['query'] | |
783 for key, value in mapped_chars.items(): | |
784 sparql_query = sparql_query.replace(value, key) | |
785 self.add_sparql(track['conf']['options']['sparql']['url'], sparql_query, outputTrackConfig) | |
716 else: | 786 else: |
717 log.warn('Do not know how to handle %s', dataset_ext) | 787 log.warn('Do not know how to handle %s', dataset_ext) |
718 | 788 |
719 # Return non-human label for use in other fields | 789 # Return non-human label for use in other fields |
720 yield outputTrackConfig['label'] | 790 yield outputTrackConfig['label'] |
748 viz_data.update(generalData) | 818 viz_data.update(generalData) |
749 self._add_json(viz_data) | 819 self._add_json(viz_data) |
750 | 820 |
751 if 'GCContent' in data['plugins_python']: | 821 if 'GCContent' in data['plugins_python']: |
752 self._add_track_json({ | 822 self._add_track_json({ |
753 "storeClass": "JBrowse/Store/SeqFeature/SequenceChunks", | 823 "storeClass": "JBrowse/Store/SeqFeature/IndexedFasta", |
754 "type": "GCContent/View/Track/GCContentXY", | 824 "type": "GCContent/View/Track/GCContentXY", |
755 "label": "GCContentXY", | 825 "label": "GC Content", |
756 "urlTemplate": "seq/{refseq_dirpath}/{refseq}-", | 826 "key": "GCContentXY", |
757 "bicolor_pivot": 0.5 | 827 "urlTemplate": "seq/genome.fasta", |
828 "bicolor_pivot": 0.5, | |
829 "category": "GC Content", | |
830 "metadata": { | |
831 "tool_tool": '<a target="_blank" href="https://github.com/elsiklab/gccontent/commit/030180e75a19fad79478d43a67c566ec6">elsiklab/gccontent</a>', | |
832 "tool_tool_version": "5c8b0582ecebf9edf684c76af8075fb3d30ec3fa", | |
833 "dataset_edam_format": "", | |
834 "dataset_size": "", | |
835 "history_display_name": "", | |
836 "history_user_email": "", | |
837 "metadata_dbkey": "", | |
838 } | |
839 # TODO: Expose params for everyone. | |
840 }) | |
841 self._add_track_json({ | |
842 "storeClass": "JBrowse/Store/SeqFeature/IndexedFasta", | |
843 "type": "GCContent/View/Track/GCContentXY", | |
844 "label": "GC skew", | |
845 "key": "GCSkew", | |
846 "urlTemplate": "seq/genome.fasta", | |
847 "gcMode": "skew", | |
848 "min_score": -1, | |
849 "bicolor_pivot": 0, | |
850 "category": "GC Content", | |
851 "metadata": { | |
852 "tool_tool": '<a target="_blank" href="https://github.com/elsiklab/gccontent/commit/030180e75a19fad79478d43a67c566ec6">elsiklab/gccontent</a>', | |
853 "tool_tool_version": "5c8b0582ecebf9edf684c76af8075fb3d30ec3fa", | |
854 "dataset_edam_format": "", | |
855 "dataset_size": "", | |
856 "history_display_name": "", | |
857 "history_user_email": "", | |
858 "metadata_dbkey": "", | |
859 } | |
758 # TODO: Expose params for everyone. | 860 # TODO: Expose params for everyone. |
759 }) | 861 }) |
760 | 862 |
761 if 'ComboTrackSelector' in data['plugins_python']: | 863 if 'ComboTrackSelector' in data['plugins_python']: |
762 with open(os.path.join(self.outdir, 'data', 'trackList.json'), 'r') as handle: | 864 with open(os.path.join(self.outdir, 'data', 'trackList.json'), 'r') as handle: |
783 "history_user_email", | 885 "history_user_email", |
784 "metadata_dbkey", | 886 "metadata_dbkey", |
785 ], | 887 ], |
786 "type": "Faceted", | 888 "type": "Faceted", |
787 "title": ["Galaxy Metadata"], | 889 "title": ["Galaxy Metadata"], |
890 "icon": "https://galaxyproject.org/images/logos/galaxy-icon-square.png", | |
788 "escapeHTMLInData": False | 891 "escapeHTMLInData": False |
789 }, | 892 }, |
790 "trackMetadata": { | 893 "trackMetadata": { |
791 "indexFacets": [ | 894 "indexFacets": [ |
792 "category", | 895 "category", |
831 parser.add_argument('xml', type=argparse.FileType('r'), help='Track Configuration') | 934 parser.add_argument('xml', type=argparse.FileType('r'), help='Track Configuration') |
832 | 935 |
833 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') | 936 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') |
834 parser.add_argument('--outdir', help='Output directory', default='out') | 937 parser.add_argument('--outdir', help='Output directory', default='out') |
835 parser.add_argument('--standalone', help='Standalone mode includes a copy of JBrowse', action='store_true') | 938 parser.add_argument('--standalone', help='Standalone mode includes a copy of JBrowse', action='store_true') |
836 parser.add_argument('--version', '-V', action='version', version="%(prog)s 0.7.0") | 939 parser.add_argument('--version', '-V', action='version', version="%(prog)s 0.8.0") |
837 args = parser.parse_args() | 940 args = parser.parse_args() |
838 | 941 |
839 tree = ET.parse(args.xml.name) | 942 tree = ET.parse(args.xml.name) |
840 root = tree.getroot() | 943 root = tree.getroot() |
944 | |
945 # This should be done ASAP | |
946 GALAXY_INFRASTRUCTURE_URL = root.find('metadata/galaxyUrl').text | |
947 # Sometimes this comes as `localhost` without a protocol | |
948 if not GALAXY_INFRASTRUCTURE_URL.startswith('http'): | |
949 # so we'll prepend `http://` and hope for the best. Requests *should* | |
950 # be GET and not POST so it should redirect OK | |
951 GALAXY_INFRASTRUCTURE_URL = 'http://' + GALAXY_INFRASTRUCTURE_URL | |
841 | 952 |
842 jc = JbrowseConnector( | 953 jc = JbrowseConnector( |
843 jbrowse=args.jbrowse, | 954 jbrowse=args.jbrowse, |
844 outdir=args.outdir, | 955 outdir=args.outdir, |
845 genomes=[ | 956 genomes=[ |
869 'show_nav': root.find('metadata/general/show_nav').text, | 980 'show_nav': root.find('metadata/general/show_nav').text, |
870 'show_overview': root.find('metadata/general/show_overview').text, | 981 'show_overview': root.find('metadata/general/show_overview').text, |
871 'show_menu': root.find('metadata/general/show_menu').text, | 982 'show_menu': root.find('metadata/general/show_menu').text, |
872 'hideGenomeOptions': root.find('metadata/general/hideGenomeOptions').text, | 983 'hideGenomeOptions': root.find('metadata/general/hideGenomeOptions').text, |
873 }, | 984 }, |
874 'plugins': [{ | 985 'plugins': [], |
875 'location': 'https://cdn.jsdelivr.net/gh/TAMU-CPT/blastview@97572a21b7f011c2b4d9a0b5af40e292d694cbef/', | 986 'plugins_python': [], |
876 'name': 'BlastView' | |
877 }], | |
878 'plugins_python': ['BlastView'], | |
879 } | 987 } |
880 | 988 |
881 plugins = root.find('plugins').attrib | 989 plugins = root.find('plugins').attrib |
882 if plugins['GCContent'] == 'True': | 990 if plugins['GCContent'] == 'True': |
883 extra_data['plugins_python'].append('GCContent') | 991 extra_data['plugins_python'].append('GCContent') |
884 extra_data['plugins'].append({ | 992 extra_data['plugins'].append({ |
885 'location': 'https://cdn.jsdelivr.net/gh/elsiklab/gccontent@5c8b0582ecebf9edf684c76af8075fb3d30ec3fa/', | 993 'location': 'https://cdn.jsdelivr.net/gh/elsiklab/gccontent@5c8b0582ecebf9edf684c76af8075fb3d30ec3fa/', |
886 'name': 'GCContent' | 994 'name': 'GCContent' |
887 }) | 995 }) |
888 | 996 |
889 if plugins['Bookmarks'] == 'True': | 997 # Not needed in 1.16.1: it's built in the conda package now, and this plugin doesn't need to be enabled anywhere |
890 extra_data['plugins'].append({ | 998 # if plugins['Bookmarks'] == 'True': |
891 'location': 'https://cdn.jsdelivr.net/gh/TAMU-CPT/bookmarks-jbrowse@5242694120274c86e1ccd5cb0e5e943e78f82393/', | 999 # extra_data['plugins'].append({ |
892 'name': 'Bookmarks' | 1000 # 'location': 'https://cdn.jsdelivr.net/gh/TAMU-CPT/bookmarks-jbrowse@5242694120274c86e1ccd5cb0e5e943e78f82393/', |
893 }) | 1001 # 'name': 'Bookmarks' |
894 | 1002 # }) |
1003 | |
1004 # Not needed in 1.16.1: it's built in the conda package now, and this plugin doesn't need to be enabled anywhere | |
895 if plugins['ComboTrackSelector'] == 'True': | 1005 if plugins['ComboTrackSelector'] == 'True': |
896 extra_data['plugins_python'].append('ComboTrackSelector') | 1006 extra_data['plugins_python'].append('ComboTrackSelector') |
897 extra_data['plugins'].append({ | 1007 # Not needed in 1.16.1: it's built in the conda package now, and this plugin doesn't need to be enabled anywhere |
898 'location': 'https://cdn.jsdelivr.net/gh/Arabidopsis-Information-Portal/ComboTrackSelector@52403928d5ccbe2e3a86b0fa5eb8e61c0f2e2f57', | 1008 # extra_data['plugins'].append({ |
899 'icon': 'https://galaxyproject.org/images/logos/galaxy-icon-square.png', | 1009 # 'location': 'https://cdn.jsdelivr.net/gh/Arabidopsis-Information-Portal/ComboTrackSelector@52403928d5ccbe2e3a86b0fa5eb8e61c0f2e2f57/', |
900 'name': 'ComboTrackSelector' | 1010 # 'icon': 'https://galaxyproject.org/images/logos/galaxy-icon-square.png', |
901 }) | 1011 # 'name': 'ComboTrackSelector' |
1012 # }) | |
902 | 1013 |
903 if plugins['theme'] == 'Minimalist': | 1014 if plugins['theme'] == 'Minimalist': |
904 extra_data['plugins'].append({ | 1015 extra_data['plugins'].append({ |
905 'location': 'https://cdn.jsdelivr.net/gh/erasche/jbrowse-minimalist-theme@d698718442da306cf87f033c72ddb745f3077775/', | 1016 'location': 'https://cdn.jsdelivr.net/gh/erasche/jbrowse-minimalist-theme@d698718442da306cf87f033c72ddb745f3077775/', |
906 'name': 'MinimalistTheme' | 1017 'name': 'MinimalistTheme' |
909 extra_data['plugins'].append({ | 1020 extra_data['plugins'].append({ |
910 'location': 'https://cdn.jsdelivr.net/gh/erasche/jbrowse-dark-theme@689eceb7e33bbc1b9b15518d45a5a79b2e5d0a26/', | 1021 'location': 'https://cdn.jsdelivr.net/gh/erasche/jbrowse-dark-theme@689eceb7e33bbc1b9b15518d45a5a79b2e5d0a26/', |
911 'name': 'DarkTheme' | 1022 'name': 'DarkTheme' |
912 }) | 1023 }) |
913 | 1024 |
914 GALAXY_INFRASTRUCTURE_URL = root.find('metadata/galaxyUrl').text | 1025 if plugins['BlastView'] == 'True': |
915 # Sometimes this comes as `localhost` without a protocol | 1026 extra_data['plugins_python'].append('BlastView') |
916 if not GALAXY_INFRASTRUCTURE_URL.startswith('http'): | 1027 extra_data['plugins'].append({ |
917 # so we'll prepend `http://` and hope for the best. Requests *should* | 1028 'location': 'https://cdn.jsdelivr.net/gh/TAMU-CPT/blastview@97572a21b7f011c2b4d9a0b5af40e292d694cbef/', |
918 # be GET and not POST so it should redirect OK | 1029 'name': 'BlastView' |
919 GALAXY_INFRASTRUCTURE_URL = 'http://' + GALAXY_INFRASTRUCTURE_URL | 1030 }) |
920 | 1031 |
921 for track in root.findall('tracks/track'): | 1032 for track in root.findall('tracks/track'): |
922 track_conf = {} | 1033 track_conf = {} |
923 track_conf['trackfiles'] = [] | 1034 track_conf['trackfiles'] = [] |
924 | 1035 |
925 for x in track.findall('files/trackFile'): | 1036 is_multi_bigwig = False |
1037 try: | |
1038 if track.find('options/wiggle/multibigwig') and (track.find('options/wiggle/multibigwig').text == 'True'): | |
1039 is_multi_bigwig = True | |
1040 multi_bigwig_paths = [] | |
1041 except KeyError: | |
1042 pass | |
1043 | |
1044 trackfiles = track.findall('files/trackFile') | |
1045 if trackfiles: | |
1046 for x in track.findall('files/trackFile'): | |
1047 if is_multi_bigwig: | |
1048 multi_bigwig_paths.append((x.attrib['label'], os.path.realpath(x.attrib['path']))) | |
1049 else: | |
1050 if trackfiles: | |
1051 metadata = metadata_from_node(x.find('metadata')) | |
1052 | |
1053 track_conf['trackfiles'].append(( | |
1054 os.path.realpath(x.attrib['path']), | |
1055 x.attrib['ext'], | |
1056 x.attrib['label'], | |
1057 metadata | |
1058 )) | |
1059 else: | |
1060 # For tracks without files (rest, sparql) | |
1061 track_conf['trackfiles'].append(( | |
1062 '', # N/A, no path for rest or sparql | |
1063 track.attrib['format'], | |
1064 track.find('options/label').text, | |
1065 {} | |
1066 )) | |
1067 | |
1068 if is_multi_bigwig: | |
926 metadata = metadata_from_node(x.find('metadata')) | 1069 metadata = metadata_from_node(x.find('metadata')) |
927 | 1070 |
928 track_conf['trackfiles'].append(( | 1071 track_conf['trackfiles'].append(( |
929 os.path.realpath(x.attrib['path']), | 1072 multi_bigwig_paths, # Passing an array of paths to represent as one track |
930 x.attrib['ext'], | 1073 'bigwig_multiple', |
931 x.attrib['label'], | 1074 'MultiBigWig', # Giving an hardcoded name for now |
932 metadata | 1075 {} # No metadata for multiple bigwig |
933 )) | 1076 )) |
934 | 1077 |
935 track_conf['category'] = track.attrib['cat'] | 1078 track_conf['category'] = track.attrib['cat'] |
936 track_conf['format'] = track.attrib['format'] | 1079 track_conf['format'] = track.attrib['format'] |
937 try: | 1080 try: |
938 # Only pertains to gff3 + blastxml. TODO? | 1081 # Only pertains to gff3 + blastxml. TODO? |
939 track_conf['style'] = {t.tag: t.text for t in track.find('options/style')} | 1082 track_conf['style'] = {t.tag: t.text for t in track.find('options/style')} |
940 except TypeError as te: | 1083 except TypeError: |
941 track_conf['style'] = {} | 1084 track_conf['style'] = {} |
942 pass | 1085 pass |
943 track_conf['conf'] = etree_to_dict(track.find('options')) | 1086 track_conf['conf'] = etree_to_dict(track.find('options')) |
944 keys = jc.process_annotations(track_conf) | 1087 keys = jc.process_annotations(track_conf) |
945 | 1088 |