# HG changeset patch # User fubar # Date 1706168381 0 # Node ID cce8dacb240f63d145be81926828b8e9fd772db9 # Parent 7c2e28e144f32d17e2df3af7cb5dc822a41f1a93 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 1a20cf06627d429a211427753f223467188dbe7f-dirty diff -r 7c2e28e144f3 -r cce8dacb240f Galaxy-History-jbrowse2samples.tar.gz Binary file Galaxy-History-jbrowse2samples.tar.gz has changed diff -r 7c2e28e144f3 -r cce8dacb240f abjbrowse2.py --- a/abjbrowse2.py Mon Jan 22 12:05:09 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1272 +0,0 @@ -#!/usr/bin/env python -import argparse -import binascii -import datetime -import hashlib -import json -import logging -import os -import re -import shutil -import struct -import subprocess -import tempfile -import xml.etree.ElementTree as ET -from collections import defaultdict - -logging.basicConfig(level=logging.INFO) -log = logging.getLogger("jbrowse") -TODAY = datetime.datetime.now().strftime("%Y-%m-%d") -GALAXY_INFRASTRUCTURE_URL = None - - -class ColorScaling(object): - - COLOR_FUNCTION_TEMPLATE = """ - function(feature, variableName, glyphObject, track) {{ - var score = {score}; - {opacity} - return 'rgba({red}, {green}, {blue}, ' + opacity + ')'; - }} - """ - - COLOR_FUNCTION_TEMPLATE_QUAL = r""" - function(feature, variableName, glyphObject, track) {{ - var search_up = function self(sf, attr){{ - if(sf.get(attr) !== undefined){{ - return sf.get(attr); - }} - if(sf.parent() === undefined) {{ - return; - }}else{{ - return self(sf.parent(), attr); - }} - }}; - - var search_down = function self(sf, attr){{ - if(sf.get(attr) !== undefined){{ - return sf.get(attr); - }} - if(sf.children() === undefined) {{ - return; - }}else{{ - var kids = sf.children(); - for(var child_idx in kids){{ - var x = self(kids[child_idx], attr); - if(x !== undefined){{ - return x; - }} - }} - return; - }} - }}; - - var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color}); - var score = (search_up(feature, 'score') || search_down(feature, 'score')); - {opacity} - if(score === undefined){{ opacity = 1; }} - var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color); - var red = parseInt(result[1], 16); - var green = parseInt(result[2], 16); - var blue = parseInt(result[3], 16); - if(isNaN(opacity) || opacity < 0){{ opacity = 0; }} - return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')'; - }} - """ - - OPACITY_MATH = { - "linear": """ - var opacity = (score - ({min})) / (({max}) - ({min})); - """, - "logarithmic": """ - var opacity = Math.log10(score - ({min})) / Math.log10(({max}) - ({min})); - """, - "blast": """ - var opacity = 0; - if(score == 0.0) {{ - opacity = 1; - }} else {{ - opacity = (20 - Math.log10(score)) / 180; - }} - """, - } - - BREWER_COLOUR_IDX = 0 - BREWER_COLOUR_SCHEMES = [ - (166, 206, 227), - (31, 120, 180), - (178, 223, 138), - (51, 160, 44), - (251, 154, 153), - (227, 26, 28), - (253, 191, 111), - (255, 127, 0), - (202, 178, 214), - (106, 61, 154), - (255, 255, 153), - (177, 89, 40), - (228, 26, 28), - (55, 126, 184), - (77, 175, 74), - (152, 78, 163), - (255, 127, 0), - ] - - BREWER_DIVERGING_PALLETES = { - "BrBg": ("#543005", "#003c30"), - "PiYg": ("#8e0152", "#276419"), - "PRGn": ("#40004b", "#00441b"), - "PuOr": ("#7f3b08", "#2d004b"), - "RdBu": ("#67001f", "#053061"), - "RdGy": ("#67001f", "#1a1a1a"), - "RdYlBu": ("#a50026", "#313695"), - "RdYlGn": ("#a50026", "#006837"), - "Spectral": ("#9e0142", "#5e4fa2"), - } - - def __init__(self): - self.brewer_colour_idx = 0 - - def rgb_from_hex(self, hexstr): - # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back - return struct.unpack("BBB", binascii.unhexlify(hexstr)) - - def min_max_gff(self, gff_file): - min_val = None - max_val = None - with open(gff_file, "r") as handle: - for line in handle: - try: - value = float(line.split("\t")[5]) - min_val = min(value, (min_val or value)) - max_val = max(value, (max_val or value)) - - if value < min_val: - min_val = value - - if value > max_val: - max_val = value - except Exception: - pass - return min_val, max_val - - def hex_from_rgb(self, r, g, b): - return "#%02x%02x%02x" % (r, g, b) - - def _get_colours(self): - r, g, b = self.BREWER_COLOUR_SCHEMES[ - self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES) - ] - self.brewer_colour_idx += 1 - return r, g, b - - def parse_menus(self, track): - trackConfig = {"menuTemplate": [{}, {}, {}, {}]} - - if "menu" in track["menus"]: - menu_list = [track["menus"]["menu"]] - if isinstance(track["menus"]["menu"], list): - menu_list = track["menus"]["menu"] - - for m in menu_list: - tpl = { - "action": m["action"], - "label": m.get("label", "{name}"), - "iconClass": m.get("iconClass", "dijitIconBookmark"), - } - if "url" in m: - tpl["url"] = m["url"] - if "content" in m: - tpl["content"] = m["content"] - if "title" in m: - tpl["title"] = m["title"] - - trackConfig["menuTemplate"].append(tpl) - - return trackConfig - - def parse_colours(self, track, trackFormat, gff3=None): - # Wiggle tracks have a bicolor pallete - trackConfig = {"style": {}} - if trackFormat == "wiggle": - - trackConfig["style"]["pos_color"] = track["wiggle"]["color_pos"] - trackConfig["style"]["neg_color"] = track["wiggle"]["color_neg"] - - if trackConfig["style"]["pos_color"] == "__auto__": - trackConfig["style"]["neg_color"] = self.hex_from_rgb( - *self._get_colours() - ) - trackConfig["style"]["pos_color"] = self.hex_from_rgb( - *self._get_colours() - ) - - # Wiggle tracks can change colour at a specified place - bc_pivot = track["wiggle"]["bicolor_pivot"] - if bc_pivot not in ("mean", "zero"): - # The values are either one of those two strings - # or a number - bc_pivot = float(bc_pivot) - trackConfig["bicolor_pivot"] = bc_pivot - elif "scaling" in track: - if track["scaling"]["method"] == "ignore": - if track["scaling"]["scheme"]["color"] != "__auto__": - trackConfig["style"]["color"] = track["scaling"]["scheme"]["color"] - else: - trackConfig["style"]["color"] = self.hex_from_rgb( - *self._get_colours() - ) - else: - # Scored method - algo = track["scaling"]["algo"] - # linear, logarithmic, blast - scales = track["scaling"]["scales"] - # type __auto__, manual (min, max) - scheme = track["scaling"]["scheme"] - # scheme -> (type (opacity), color) - # ================================== - # GENE CALLS OR BLAST - # ================================== - if trackFormat == "blast": - red, green, blue = self._get_colours() - color_function = self.COLOR_FUNCTION_TEMPLATE.format( - **{ - "score": "feature._parent.get('score')", - "opacity": self.OPACITY_MATH["blast"], - "red": red, - "green": green, - "blue": blue, - } - ) - trackConfig["style"]["color"] = color_function.replace("\n", "") - elif trackFormat == "gene_calls": - # Default values, based on GFF3 spec - min_val = 0 - max_val = 1000 - # Get min/max and build a scoring function since JBrowse doesn't - if scales["type"] == "automatic" or scales["type"] == "__auto__": - min_val, max_val = self.min_max_gff(gff3) - else: - min_val = scales.get("min", 0) - max_val = scales.get("max", 1000) - - if scheme["color"] == "__auto__": - user_color = "undefined" - auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) - elif scheme["color"].startswith("#"): - user_color = "'%s'" % self.hex_from_rgb( - *self.rgb_from_hex(scheme["color"][1:]) - ) - auto_color = "undefined" - else: - user_color = "undefined" - auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) - - color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format( - **{ - "opacity": self.OPACITY_MATH[algo].format( - **{"max": max_val, "min": min_val} - ), - "user_spec_color": user_color, - "auto_gen_color": auto_color, - } - ) - - trackConfig["style"]["color"] = color_function.replace("\n", "") - return trackConfig - - -def etree_to_dict(t): - if t is None: - return {} - - d = {t.tag: {} if t.attrib else None} - children = list(t) - if children: - dd = defaultdict(list) - for dc in map(etree_to_dict, children): - for k, v in dc.items(): - dd[k].append(v) - d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}} - if t.attrib: - d[t.tag].update(("@" + k, v) for k, v in t.attrib.items()) - if t.text: - text = t.text.strip() - if children or t.attrib: - if text: - d[t.tag]["#text"] = text - else: - d[t.tag] = text - return d - - -# score comes from feature._parent.get('score') or feature.get('score') - -INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) - - -def metadata_from_node(node): - metadata = {} - try: - if len(node.findall("dataset")) != 1: - # exit early - return metadata - except Exception: - return {} - - for (key, value) in node.findall("dataset")[0].attrib.items(): - metadata["dataset_%s" % key] = value - - for (key, value) in node.findall("history")[0].attrib.items(): - metadata["history_%s" % key] = value - - for (key, value) in node.findall("metadata")[0].attrib.items(): - metadata["metadata_%s" % key] = value - - for (key, value) in node.findall("tool")[0].attrib.items(): - metadata["tool_%s" % key] = value - - # Additional Mappings applied: - metadata[ - "dataset_edam_format" - ] = '{1}'.format( - metadata["dataset_edam_format"], metadata["dataset_file_ext"] - ) - metadata["history_user_email"] = '{0}'.format( - metadata["history_user_email"] - ) - metadata[ - "history_display_name" - ] = '{hist_name}'.format( - galaxy=GALAXY_INFRASTRUCTURE_URL, - encoded_hist_id=metadata["history_id"], - hist_name=metadata["history_display_name"], - ) - metadata[ - "tool_tool" - ] = '{tool_id}'.format( - galaxy=GALAXY_INFRASTRUCTURE_URL, - encoded_id=metadata["dataset_id"], - tool_id=metadata["tool_tool_id"], - # tool_version=metadata['tool_tool_version'], - ) - return metadata - - -class JbrowseConnector(object): - def __init__(self, jbrowse, outdir, genomes): - self.cs = ColorScaling() - self.jbrowse = jbrowse - self.outdir = outdir - self.genome_paths = genomes - self.tracksToIndex = [] - - # This is the id of the current assembly - self.assembly_ids = {} - self.current_assembly_id = [] - - # If upgrading, look at the existing data - self.check_existing(self.outdir) - - self.clone_jbrowse(self.jbrowse, self.outdir) - - self.process_genomes() - - def subprocess_check_call(self, command, output=None): - if output: - log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) - subprocess.check_call(command, cwd=self.outdir, stdout=output) - else: - log.debug("cd %s && %s", self.outdir, " ".join(command)) - subprocess.check_call(command, cwd=self.outdir) - - def subprocess_popen(self, command): - log.debug("cd %s && %s", self.outdir, command) - p = subprocess.Popen( - command, - shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - output, err = p.communicate() - retcode = p.returncode - if retcode != 0: - log.error("cd %s && %s", self.outdir, command) - log.error(output) - log.error(err) - raise RuntimeError("Command failed with exit code %s" % (retcode)) - - def subprocess_check_output(self, command): - log.debug("cd %s && %s", self.outdir, " ".join(command)) - return subprocess.check_output(command, cwd=self.outdir) - - def symlink_or_copy(self, src, dest): - if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( - os.environ["GALAXY_JBROWSE_SYMLINKS"] - ): - cmd = ["ln", "-s", src, dest] - else: - cmd = ["cp", src, dest] - - return self.subprocess_check_call(cmd) - - def symlink_or_copy_load_action(self): - if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( - os.environ["GALAXY_JBROWSE_SYMLINKS"] - ): - return "symlink" - else: - return "copy" - - def check_existing(self, destination): - existing = os.path.join(destination, "data", "config.json") - if os.path.exists(existing): - with open(existing, "r") as existing_conf: - conf = json.load(existing_conf) - if "assemblies" in conf: - for assembly in conf["assemblies"]: - if "name" in assembly: - self.assembly_ids[assembly["name"]] = None - - def process_genomes(self): - for genome_node in self.genome_paths: - # We only expect one input genome per run. This for loop is just - # easier to write than the alternative / catches any possible - # issues. - self.add_assembly(genome_node["path"], genome_node["label"]) - - def add_assembly(self, path, label, default=True): - # Find a non-existing filename for the new genome - # (to avoid colision when upgrading an existing instance) - rel_seq_path = os.path.join("data", "assembly") - seq_path = os.path.join(self.outdir, rel_seq_path) - fn_try = 1 - while ( - os.path.exists(seq_path + ".fasta") - or os.path.exists(seq_path + ".fasta.gz") - or os.path.exists(seq_path + ".fasta.gz.fai") - or os.path.exists(seq_path + ".fasta.gz.gzi") - ): - rel_seq_path = os.path.join("data", "assembly%s" % fn_try) - seq_path = os.path.join(self.outdir, rel_seq_path) - fn_try += 1 - - # Find a non-existing label for the new genome - # (to avoid colision when upgrading an existing instance) - lab_try = 1 - uniq_label = label - while uniq_label in self.assembly_ids: - uniq_label = label + str(lab_try) - lab_try += 1 - - # Find a default scaffold to display - # TODO this may not be necessary in the future, see https://github.com/GMOD/jbrowse-components/issues/2708 - with open(path, "r") as fa_handle: - fa_header = fa_handle.readline()[1:].strip().split(" ")[0] - - self.assembly_ids[uniq_label] = fa_header - if default: - self.current_assembly_id = uniq_label - - copied_genome = seq_path + ".fasta" - shutil.copy(path, copied_genome) - - # Compress with bgzip - cmd = ["bgzip", copied_genome] - self.subprocess_check_call(cmd) - - # FAI Index - cmd = ["samtools", "faidx", copied_genome + ".gz"] - self.subprocess_check_call(cmd) - - self.subprocess_check_call( - [ - "jbrowse", - "add-assembly", - "--load", - "inPlace", - "--name", - uniq_label, - "--type", - "bgzipFasta", - "--target", - os.path.join(self.outdir, "data"), - "--skipCheck", - rel_seq_path + ".fasta.gz", - ] - ) - - return uniq_label - - def text_index(self): - # Index tracks - args = [ - "jbrowse", - "text-index", - "--target", - os.path.join(self.outdir, "data"), - "--assemblies", - self.current_assembly_id, - ] - - tracks = ",".join(self.tracksToIndex) - if tracks: - args += ["--tracks", tracks] - - self.subprocess_check_call(args) - - def _blastxml_to_gff3(self, xml, min_gap=10): - gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) - cmd = [ - "python", - os.path.join(INSTALLED_TO, "blastxml_to_gapped_gff3.py"), - "--trim", - "--trim_end", - "--include_seq", - "--min_gap", - str(min_gap), - xml, - ] - log.debug("cd %s && %s > %s", self.outdir, " ".join(cmd), gff3_unrebased.name) - subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) - gff3_unrebased.close() - return gff3_unrebased.name - - def _prepare_track_style(self, xml_conf): - - style_data = {"type": "LinearBasicDisplay"} - - if "display" in xml_conf["style"]: - style_data["type"] = xml_conf["style"]["display"] - del xml_conf["style"]["display"] - - style_data["displayId"] = "%s_%s" % (xml_conf["label"], style_data["type"]) - - style_data.update(xml_conf["style"]) - - return {"displays": [style_data]} - - def add_blastxml(self, data, trackData, blastOpts, **kwargs): - gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"]) - - if "parent" in blastOpts and blastOpts["parent"] != "None": - gff3_rebased = tempfile.NamedTemporaryFile(delete=False) - cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")] - if blastOpts.get("protein", "false") == "true": - cmd.append("--protein2dna") - cmd.extend([os.path.realpath(blastOpts["parent"]), gff3]) - log.debug("cd %s && %s > %s", self.outdir, " ".join(cmd), gff3_rebased.name) - subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) - gff3_rebased.close() - - # Replace original gff3 file - shutil.copy(gff3_rebased.name, gff3) - os.unlink(gff3_rebased.name) - - rel_dest = os.path.join("data", trackData["label"] + ".gff") - dest = os.path.join(self.outdir, rel_dest) - - self._sort_gff(gff3, dest) - os.unlink(gff3) - - style_json = self._prepare_track_style(trackData) - - self._add_track( - trackData["label"], - trackData["key"], - trackData["category"], - rel_dest + ".gz", - config=style_json, - ) - - def add_bigwig(self, data, trackData, wiggleOpts, **kwargs): - - rel_dest = os.path.join("data", trackData["label"] + ".bw") - dest = os.path.join(self.outdir, rel_dest) - self.symlink_or_copy(os.path.realpath(data), dest) - - style_json = self._prepare_track_style(trackData) - - self._add_track( - trackData["label"], - trackData["key"], - trackData["category"], - rel_dest, - config=style_json, - ) - - # Anything ending in "am" (Bam or Cram) - def add_xam(self, data, trackData, xamOpts, index=None, ext="bam", **kwargs): - - index_ext = "bai" - if ext == "cram": - index_ext = "crai" - - rel_dest = os.path.join("data", trackData["label"] + ".%s" % ext) - dest = os.path.join(self.outdir, rel_dest) - - self.symlink_or_copy(os.path.realpath(data), dest) - - if index is not None and os.path.exists(os.path.realpath(index)): - # xai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest - self.subprocess_check_call( - ["cp", os.path.realpath(index), dest + ".%s" % index_ext] - ) - else: - # Can happen in exotic condition - # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam - # => no index generated by galaxy, but there might be one next to the symlink target - # this trick allows to skip the bam sorting made by galaxy if already done outside - if os.path.exists(os.path.realpath(data) + ".%s" % index_ext): - self.symlink_or_copy( - os.path.realpath(data) + ".%s" % index_ext, dest + ".%s" % index_ext - ) - else: - log.warn( - "Could not find a bam index (.%s file) for %s", (index_ext, data) - ) - - style_json = self._prepare_track_style(trackData) - - self._add_track( - trackData["label"], - trackData["key"], - trackData["category"], - rel_dest, - config=style_json, - ) - - def add_vcf(self, data, trackData, vcfOpts={}, zipped=False, **kwargs): - - if zipped: - rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz") - dest = os.path.join(self.outdir, rel_dest) - shutil.copy(os.path.realpath(data), dest) - else: - rel_dest = os.path.join("data", trackData["label"] + ".vcf") - dest = os.path.join(self.outdir, rel_dest) - shutil.copy(os.path.realpath(data), dest) - - cmd = ["bgzip", dest] - self.subprocess_check_call(cmd) - cmd = ["tabix", dest + ".gz"] - self.subprocess_check_call(cmd) - - rel_dest = os.path.join("data", trackData["label"] + ".vcf.gz") - - style_json = self._prepare_track_style(trackData) - - self._add_track( - trackData["label"], - trackData["key"], - trackData["category"], - rel_dest, - config=style_json, - ) - - def add_gff(self, data, format, trackData, gffOpts, **kwargs): - rel_dest = os.path.join("data", trackData["label"] + ".gff") - dest = os.path.join(self.outdir, rel_dest) - - self._sort_gff(data, dest) - - style_json = self._prepare_track_style(trackData) - - self._add_track( - trackData["label"], - trackData["key"], - trackData["category"], - rel_dest + ".gz", - config=style_json, - ) - - def add_bed(self, data, format, trackData, gffOpts, **kwargs): - rel_dest = os.path.join("data", trackData["label"] + ".bed") - dest = os.path.join(self.outdir, rel_dest) - - self._sort_bed(data, dest) - - style_json = self._prepare_track_style(trackData) - - self._add_track( - trackData["label"], - trackData["key"], - trackData["category"], - rel_dest + ".gz", - config=style_json, - ) - - def add_paf(self, data, trackData, pafOpts, **kwargs): - rel_dest = os.path.join("data", trackData["label"] + ".paf") - dest = os.path.join(self.outdir, rel_dest) - - self.symlink_or_copy(os.path.realpath(data), dest) - - added_assembly = self.add_assembly( - pafOpts["genome"], pafOpts["genome_label"], default=False - ) - - style_json = self._prepare_track_style(trackData) - - self._add_track( - trackData["label"], - trackData["key"], - trackData["category"], - rel_dest, - assemblies=[self.current_assembly_id, added_assembly], - config=style_json, - ) - - def add_hic(self, data, trackData, hicOpts, **kwargs): - rel_dest = os.path.join("data", trackData["label"] + ".hic") - dest = os.path.join(self.outdir, rel_dest) - - self.symlink_or_copy(os.path.realpath(data), dest) - - style_json = self._prepare_track_style(trackData) - - self._add_track( - trackData["label"], - trackData["key"], - trackData["category"], - rel_dest, - config=style_json, - ) - - def add_sparql(self, url, query, query_refnames, trackData): - - json_track_data = { - "type": "FeatureTrack", - "trackId": id, - "name": trackData["label"], - "adapter": { - "type": "SPARQLAdapter", - "endpoint": {"uri": url, "locationType": "UriLocation"}, - "queryTemplate": query, - }, - "category": [trackData["category"]], - "assemblyNames": [self.current_assembly_id], - } - - if query_refnames: - json_track_data["adapter"]["refNamesQueryTemplate"]: query_refnames - - self.subprocess_check_call( - [ - "jbrowse", - "add-track-json", - "--target", - os.path.join(self.outdir, "data"), - json_track_data, - ] - ) - - # Doesn't work as of 1.6.4, might work in the future - # self.subprocess_check_call([ - # 'jbrowse', 'add-track', - # '--trackType', 'sparql', - # '--name', trackData['label'], - # '--category', trackData['category'], - # '--target', os.path.join(self.outdir, 'data'), - # '--trackId', id, - # '--config', '{"queryTemplate": "%s"}' % query, - # url]) - - def _add_track(self, id, label, category, path, assemblies=[], config=None): - - assemblies_opt = self.current_assembly_id - if assemblies: - assemblies_opt = ",".join(assemblies) - - cmd = [ - "jbrowse", - "add-track", - "--load", - "inPlace", - "--name", - label, - "--category", - category, - "--target", - os.path.join(self.outdir, "data"), - "--trackId", - id, - "--assemblyNames", - assemblies_opt, - ] - - if config: - cmd.append("--config") - cmd.append(json.dumps(config)) - - cmd.append(path) - - self.subprocess_check_call(cmd) - - def _sort_gff(self, data, dest): - # Only index if not already done - if not os.path.exists(dest): - cmd = "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" % (data, dest) - self.subprocess_popen(cmd) - - self.subprocess_check_call(["bgzip", "-f", dest]) - self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"]) - - def _sort_bed(self, data, dest): - # Only index if not already done - if not os.path.exists(dest): - cmd = ["sort", "-k1,1", "-k2,2n", data] - with open(dest, "w") as handle: - self.subprocess_check_call(cmd, output=handle) - - self.subprocess_check_call(["bgzip", "-f", dest]) - self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"]) - - def process_annotations(self, track): - - category = track["category"].replace("__pd__date__pd__", TODAY) - outputTrackConfig = { - "category": category, - } - - mapped_chars = { - ">": "__gt__", - "<": "__lt__", - "'": "__sq__", - '"': "__dq__", - "[": "__ob__", - "]": "__cb__", - "{": "__oc__", - "}": "__cc__", - "@": "__at__", - "#": "__pd__", - "": "__cn__", - } - - for i, ( - dataset_path, - dataset_ext, - track_human_label, - extra_metadata, - ) in enumerate(track["trackfiles"]): - # Unsanitize labels (element_identifiers are always sanitized by Galaxy) - for key, value in mapped_chars.items(): - track_human_label = track_human_label.replace(value, key) - - log.info( - "Processing track %s / %s (%s)", - category, - track_human_label, - dataset_ext, - ) - outputTrackConfig["key"] = track_human_label - # We add extra data to hash for the case of REST + SPARQL. - if ( - "conf" in track - and "options" in track["conf"] - and "url" in track["conf"]["options"] - ): - rest_url = track["conf"]["options"]["url"] - else: - rest_url = "" - - # I chose to use track['category'] instead of 'category' here. This - # is intentional. This way re-running the tool on a different date - # will not generate different hashes and make comparison of outputs - # much simpler. - hashData = [ - str(dataset_path), - track_human_label, - track["category"], - rest_url, - self.current_assembly_id, - ] - hashData = "|".join(hashData).encode("utf-8") - outputTrackConfig["label"] = hashlib.md5(hashData).hexdigest() + "_%s" % i - outputTrackConfig["metadata"] = extra_metadata - - outputTrackConfig["style"] = track["style"] - - if "menus" in track["conf"]["options"]: - menus = self.cs.parse_menus(track["conf"]["options"]) - outputTrackConfig.update(menus) - - if dataset_ext in ("gff", "gff3"): - self.add_gff( - dataset_path, - dataset_ext, - outputTrackConfig, - track["conf"]["options"]["gff"], - ) - elif dataset_ext == "bed": - self.add_bed( - dataset_path, - dataset_ext, - outputTrackConfig, - track["conf"]["options"]["gff"], - ) - elif dataset_ext == "bigwig": - self.add_bigwig( - dataset_path, outputTrackConfig, track["conf"]["options"]["wiggle"] - ) - elif dataset_ext == "bam": - real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][ - "bam_index" - ] - if not isinstance(real_indexes, list): - # - # /path/to/a.bam.bai - # - # - # The above will result in the 'bam_index' key containing a - # string. If there are two or more indices, the container - # becomes a list. Fun! - real_indexes = [real_indexes] - - self.add_xam( - dataset_path, - outputTrackConfig, - track["conf"]["options"]["pileup"], - index=real_indexes[i], - ext="bam", - ) - elif dataset_ext == "cram": - real_indexes = track["conf"]["options"]["cram"]["cram_indices"][ - "cram_index" - ] - if not isinstance(real_indexes, list): - # - # /path/to/a.bam.bai - # - # - # The above will result in the 'bam_index' key containing a - # string. If there are two or more indices, the container - # becomes a list. Fun! - real_indexes = [real_indexes] - - self.add_xam( - dataset_path, - outputTrackConfig, - track["conf"]["options"]["cram"], - index=real_indexes[i], - ext="cram", - ) - elif dataset_ext == "blastxml": - self.add_blastxml( - dataset_path, outputTrackConfig, track["conf"]["options"]["blast"] - ) - elif dataset_ext == "vcf": - self.add_vcf(dataset_path, outputTrackConfig) - elif dataset_ext == "vcf_bgzip": - self.add_vcf(dataset_path, outputTrackConfig, zipped=True) - elif dataset_ext == "rest": - self.add_rest( - track["conf"]["options"]["rest"]["url"], outputTrackConfig - ) - elif dataset_ext == "synteny": - self.add_paf( - dataset_path, outputTrackConfig, track["conf"]["options"]["synteny"] - ) - elif dataset_ext == "hic": - self.add_hic( - dataset_path, outputTrackConfig, track["conf"]["options"]["hic"] - ) - elif dataset_ext == "sparql": - sparql_query = track["conf"]["options"]["sparql"]["query"] - for key, value in mapped_chars.items(): - sparql_query = sparql_query.replace(value, key) - sparql_query_refnames = track["conf"]["options"]["sparql"][ - "query_refnames" - ] - for key, value in mapped_chars.items(): - sparql_query_refnames = sparql_query_refnames.replace(value, key) - self.add_sparql( - track["conf"]["options"]["sparql"]["url"], - sparql_query, - sparql_query_refnames, - outputTrackConfig, - ) - else: - log.warn("Do not know how to handle %s", dataset_ext) - - # Return non-human label for use in other fields - yield outputTrackConfig["label"] - - def add_default_session(self, data): - """ - Add some default session settings: set some assemblies/tracks on/off - """ - tracks_data = [] - - # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 - - # We need to know the track type from the config.json generated just before - config_path = os.path.join(self.outdir, "data", "config.json") - track_types = {} - with open(config_path, "r") as config_file: - config_json = json.load(config_file) - - for track_conf in config_json["tracks"]: - track_types[track_conf["trackId"]] = track_conf["type"] - - for on_track in data["visibility"]["default_on"]: - # TODO several problems with this currently - # - we are forced to copy the same kind of style config as the per track config from _prepare_track_style (not exactly the same though) - # - we get an error when refreshing the page - # - this could be solved by session specs, see https://github.com/GMOD/jbrowse-components/issues/2708 - style_data = {"type": "LinearBasicDisplay", "height": 100} - - if on_track in data["style"]: - if "display" in data["style"][on_track]: - style_data["type"] = data["style"][on_track]["display"] - del data["style"][on_track]["display"] - - style_data.update(data["style"][on_track]) - - if on_track in data["style_labels"]: - # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work - # TODO move this to per track displays? - style_data["labels"] = data["style_labels"][on_track] - - tracks_data.append( - { - "type": track_types[on_track], - "configuration": on_track, - "displays": [style_data], - } - ) - - # The view for the assembly we're adding - view_json = {"type": "LinearGenomeView", "tracks": tracks_data} - - refName = None - if data.get("defaultLocation", ""): - loc_match = re.search(r"^(\w+):(\d+)\.+(\d+)$", data["defaultLocation"]) - if loc_match: - refName = loc_match.group(1) - start = int(loc_match.group(2)) - end = int(loc_match.group(3)) - elif self.assembly_ids[self.current_assembly_id] is not None: - refName = self.assembly_ids[self.current_assembly_id] - start = 0 - end = 1000000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708 - - if refName is not None: - # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome - view_json["displayedRegions"] = [ - { - "refName": refName, - "start": start, - "end": end, - "reversed": False, - "assemblyName": self.current_assembly_id, - } - ] - - session_name = data.get("session_name", "New session") - if not session_name: - session_name = "New session" - - # Merge with possibly existing defaultSession (if upgrading a jbrowse instance) - session_json = {} - if "defaultSession" in config_json: - session_json = config_json["defaultSession"] - - session_json["name"] = session_name - - if "views" not in session_json: - session_json["views"] = [] - - session_json["views"].append(view_json) - - config_json["defaultSession"] = session_json - - with open(config_path, "w") as config_file: - json.dump(config_json, config_file, indent=2) - - def add_general_configuration(self, data): - """ - Add some general configuration to the config.json file - """ - - config_path = os.path.join(self.outdir, "data", "config.json") - with open(config_path, "r") as config_file: - config_json = json.load(config_file) - - config_data = {} - - config_data["disableAnalytics"] = data.get("analytics", "false") == "true" - - config_data["theme"] = { - "palette": { - "primary": {"main": data.get("primary_color", "#0D233F")}, - "secondary": {"main": data.get("secondary_color", "#721E63")}, - "tertiary": {"main": data.get("tertiary_color", "#135560")}, - "quaternary": {"main": data.get("quaternary_color", "#FFB11D")}, - }, - "typography": {"fontSize": int(data.get("font_size", 10))}, - } - - config_json["configuration"].update(config_data) - - with open(config_path, "w") as config_file: - json.dump(config_json, config_file, indent=2) - - def clone_jbrowse(self, jbrowse_dir, destination): - """Clone a JBrowse directory into a destination directory.""" - - copytree(jbrowse_dir, destination) - - try: - shutil.rmtree(os.path.join(destination, "test_data")) - except OSError as e: - log.error("Error: %s - %s." % (e.filename, e.strerror)) - - if not os.path.exists(os.path.join(destination, "data")): - # It can already exist if upgrading an instance - os.makedirs(os.path.join(destination, "data")) - log.info("makedir %s" % (os.path.join(destination, "data"))) - - os.symlink("./data/config.json", os.path.join(destination, "config.json")) - - -def copytree(src, dst, symlinks=False, ignore=None): - for item in os.listdir(src): - s = os.path.join(src, item) - d = os.path.join(dst, item) - if os.path.isdir(s): - shutil.copytree(s, d, symlinks, ignore) - else: - shutil.copy2(s, d) - - -def parse_style_conf(item): - if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]: - if item.attrib["type"] == "boolean": - return item.text in ("yes", "true", "True") - elif item.attrib["type"] == "integer": - return int(item.text) - else: - return item.text - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="", epilog="") - parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration") - - parser.add_argument("--jbrowse", help="Folder containing a jbrowse release") - parser.add_argument("--outdir", help="Output directory", default="out") - parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0") - args = parser.parse_args() - - tree = ET.parse(args.xml.name) - root = tree.getroot() - - # This should be done ASAP - GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text - # Sometimes this comes as `localhost` without a protocol - if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): - # so we'll prepend `http://` and hope for the best. Requests *should* - # be GET and not POST so it should redirect OK - GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL - - jc = JbrowseConnector( - jbrowse=args.jbrowse, - outdir=args.outdir, - genomes=[ - { - "path": os.path.realpath(x.attrib["path"]), - "meta": metadata_from_node(x.find("metadata")), - "label": x.attrib["label"], - } - for x in root.findall("metadata/genomes/genome") - ], - ) - - default_session_data = { - "visibility": { - "default_on": [], - "default_off": [], - }, - "style": {}, - "style_labels": {}, - } - - # TODO add metadata to tracks - for track in root.findall("tracks/track"): - track_conf = {} - track_conf["trackfiles"] = [] - - trackfiles = track.findall("files/trackFile") - if trackfiles: - for x in track.findall("files/trackFile"): - if trackfiles: - metadata = metadata_from_node(x.find("metadata")) - - track_conf["trackfiles"].append( - ( - os.path.realpath(x.attrib["path"]), - x.attrib["ext"], - x.attrib["label"], - metadata, - ) - ) - else: - # For tracks without files (rest, sparql) - track_conf["trackfiles"].append( - ( - "", # N/A, no path for rest or sparql - track.attrib["format"], - track.find("options/label").text, - {}, - ) - ) - - track_conf["category"] = track.attrib["cat"] - track_conf["format"] = track.attrib["format"] - track_conf["style"] = { - item.tag: parse_style_conf(item) for item in track.find("options/style") - } - - track_conf["style"] = { - item.tag: parse_style_conf(item) for item in track.find("options/style") - } - - track_conf["style_labels"] = { - item.tag: parse_style_conf(item) - for item in track.find("options/style_labels") - } - - track_conf["conf"] = etree_to_dict(track.find("options")) - keys = jc.process_annotations(track_conf) - - for key in keys: - default_session_data["visibility"][ - track.attrib.get("visibility", "default_off") - ].append(key) - - default_session_data["style"][key] = track_conf[ - "style" - ] # TODO do we need this anymore? - default_session_data["style_labels"][key] = track_conf["style_labels"] - - default_session_data["defaultLocation"] = root.find( - "metadata/general/defaultLocation" - ).text - default_session_data["session_name"] = root.find( - "metadata/general/session_name" - ).text - - general_data = { - "analytics": root.find("metadata/general/analytics").text, - "primary_color": root.find("metadata/general/primary_color").text, - "secondary_color": root.find("metadata/general/secondary_color").text, - "tertiary_color": root.find("metadata/general/tertiary_color").text, - "quaternary_color": root.find("metadata/general/quaternary_color").text, - "font_size": root.find("metadata/general/font_size").text, - } - - jc.add_default_session(default_session_data) - jc.add_general_configuration(general_data) - jc.text_index() diff -r 7c2e28e144f3 -r cce8dacb240f abjbrowse2.xml --- a/abjbrowse2.xml Mon Jan 22 12:05:09 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1295 +0,0 @@ - - genome browser - - macros.xml - - - topic_3307 - topic_0092 - - - operation_0573 - operation_0564 - - - jbrowse - - - python '${__tool_directory__}/jbrowse2.py' --version - to test the files I want to test. Hmph. -#if str($uglyTestingHack) == "enabled": - cp $trackxml $output -#end if - ]]> - - - - - - #if str($reference_genome.genome_type_select) == "indexed": - - - s - #else - - - - - #else - user_email="anonymous" - user_id="-1" - display_name="Unnamed History"/> - #end if - - - - - #end if - - - ${jbgen.defaultLocation} - ${jbgen.enableAnalytics} - ${jbgen.primary_color} - ${jbgen.secondary_color} - ${jbgen.tertiary_color} - ${jbgen.quaternary_color} - ${jbgen.font_size} - ${jbgen.session_name} - - ${__app__.config.galaxy_infrastructure_url} - - - #for $tg in $track_groups: - #for $track in $tg.data_tracks: - - #if $track.data_format.data_format_select != "sparql": - - #for $dataset in $track.data_format.annotation: - - - - - #else - user_email="anonymous" - user_id="-1" - display_name="Unnamed History"/> - #end if - - - - - #end for - - #end if - - - - - ## TODO other label options: https://github.com/GMOD/jbrowse-components/blob/main/plugins/svg/src/SvgFeatureRenderer/configSchema.ts - #if 'label' in $track.data_format.jbstyle.track_style - ${track.data_format.jbstyle.track_style.label} - #end if - #if 'description' in $track.data_format.jbstyle.track_style - ${track.data_format.jbstyle.track_style.description} - #end if - - #if str($track.data_format.data_format_select) == "gene_calls" or str($track.data_format.data_format_select) == "blast" or str($track.data_format.data_format_select) == "sparql": - - #if str($track.data_format.jbcolor_scale.color_score.color_score_select) == "none": - ignore - - #if str($track.data_format.jbcolor_scale.color_score.color.color_select) == "automatic": - __auto__ - #else - ${track.data_format.jbcolor_scale.color_score.color.style_color} - #end if - - #else - score - ${track.data_format.jbcolor_scale.color_score.score_scaling} - - ${track.data_format.jbcolor_scale.color_score.score_scales.scale_select} - - #if str($track.data_format.jbcolor_scale.color_score.score_scales.scale_select) == "manual": - ${track.data_format.jbcolor_scale.color_score.score_scales.minimum} - ${track.data_format.jbcolor_scale.color_score.score_scales.maximum} - #end if - - - ${track.data_format.jbcolor_scale.color_score.color_scheme.score_scheme} - ## auto_color - #if str($track.data_format.jbcolor_scale.color_score.color_scheme.score_scheme) == "opacity": - #if str($track.data_format.jbcolor_scale.color_score.color_scheme.color.color_select) == "automatic": - __auto__ - #else - ${track.data_format.jbcolor_scale.color_score.color_scheme.color.style_color} - #end if - #end if - - #end if - - - #for $menu_item in $track.data_format.jbmenu.track_menu: - - ${menu_item.menu_action} - #if str($menu_item.menu_label) != "": - - #end if - #if str($menu_item.menu_title) != "": - ${menu_item.menu_title} - #end if - #if str($menu_item.menu_url) != "": - ${menu_item.menu_url.replace("&", "&").replace("\"", """)} - #end if - #if str($menu_item.menu_icon) != "": - ${menu_item.menu_icon} - #end if - - #end for - - #end if - - #if str($track.data_format.data_format_select) == "wiggle": - - ${track.data_format.xyplot} - ${track.data_format.var_band} - #if str($track.data_format.scaling.scale_select) == "auto_local": - local - #else if str($track.data_format.scaling.scale_select) == "auto_global": - global - #else: - ${track.data_format.scaling.minimum} - ${track.data_format.scaling.maximum} - #end if - ${track.data_format.scale_select2} - - ## Wiggle tracks need special color config - #if str($track.data_format.jbcolor.color.color_select) != "automatic": - ${track.data_format.jbcolor.color.style_pos_color} - ${track.data_format.jbcolor.color.style_neg_color} - #else: - __auto__ - __auto__ - #end if - - ## Bicolor pivot config - #if str($track.data_format.jbcolor.bicolor_pivot.bicolor_pivot_select) == "zero": - zero - #else if str($track.data_format.jbcolor.bicolor_pivot.bicolor_pivot_select) == "mean": - mean - #else: - ${track.data_format.jbcolor.bicolor_pivot.pivot_point} - #end if - - #else if str($track.data_format.data_format_select) == "pileup": - - - #for $dataset in $track.data_format.annotation: - ${dataset.metadata.bam_index} - #end for - - - #else if str($track.data_format.data_format_select) == "cram": - - - #for $dataset in $track.data_format.annotation: - ${dataset.metadata.cram_index} - #end for - - - #else if str($track.data_format.data_format_select) == "blast": - - #if str($track.data_format.blast_parent) != "": - ${track.data_format.blast_parent} - #end if - ${track.data_format.is_protein} - ${track.data_format.min_gap} - ${track.data_format.index} - - #else if str($track.data_format.data_format_select) == "gene_calls": - - #if $track.data_format.match_part.match_part_select: - ${track.data_format.match_part.name} - #end if - ${track.data_format.index} - - #else if str($track.data_format.data_format_select) == "synteny": - - ${track.data_format.synteny_genome} - ${track.data_format.synteny_genome.element_identifier} - - #else if str($track.data_format.data_format_select) == "hic": - - - #else if str($track.data_format.data_format_select) == "sparql": - - - ${track.data_format.url} - ${track.data_format.query} - ${track.data_format.query_refnames} - - #end if - - - #end for - #end for - - -]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - -
-
-
-
- - - - - - - - - - -
- - - - - - - -
-
-
-
- - - - - - - - - - -
- - - - - - - - - - - - - -
-
-
- - - - - - - -
- - - - - - - - - - - - - - -
-
-
- - - - - - - -
- - - - - - - - - - - - - - - -
-
-
- - - - - - - -
- - - - - - - - - - - - - - - - -
-
-
-
- - - - - - - - - - -
- - - - - - -
-
-
- - - - - - - - -
- - - - - - -
-
-
-
- - - - - - - - - - - - - - -
- - - - - - - - - - - - - -
- - - - - - -
-
- - - - - - - - - - - - - -
-
-
- - - - - - - - -
- - - - - - -
-
-
-
- - - - - - - - - - -
- - - - - - - - - - - - - - - -
- - - -
-
-
- - - - - - -
-
-
-
- - - - - - - - - -
- - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - `__, who you can help you -with missing features or bugs in the tool. - -Options -------- - -The first option you encounter is the **Fasta Sequence(s)**. This option -now accepts multiple fasta files, allowing you to build JBrowse -instances that contain data for multiple genomes or chrosomomes -(generally known as "landmark features" in gff3 terminology.) Up to 30 -will be shown from the dropdown selector within JBrowse, this is a known -issue. - -**Genetic Code** is a new feature in v0.4 of JiG / v1.12.0 of JBrowse, -which allows users to specify a non standard genetic code, and have -JBrowse highlight the correct start and stop codons. - -**Track Groups** represent a set of tracks in a single category. These -can be used to let your users understand relationships between large -groups of tracks. - -.. image:: sections.png - -Annotation Tracks ------------------ - -Within Track Groups, you have one or more **Annotation Tracks**. Each -Annotation Track is a groups of datasets which have similar styling. -This allows you to rapidly build up JBrowse instances without having to -configure tracks individually. A massive improvement over previous -versions. For example, if you have five different GFF3 files from -various gene callers that you wish to display, you can take advantage of -this feature to style all of them similarly. - -There are a few different types of tracks supported, each with their own -set of options: - -GFF3/BED -~~~~~~~~ - -These are your standard feature tracks. They usually highlight genes, -mRNAs and other features of interest along a genomic region. The -underlying tool and this help documentation focus primarily on GFF3 -data, and have not been tested extensively with other formats. Automatic -min/max detection will fail under BED datasets. - -The data may be of a subclass we call **match/match part** data. This -consists of top level ``match`` features, with a child ``match_part`` -feature, and is often used in displaying alignments. (See "Alignments" -section on the `GFF3 -specification `__ for more -information). If the data is match/match part, you will need to specify -the top level match feature name, as it can be one of a few different SO -terms, and JiG does not yet have the ability to understand SO terms. - -Next up is the **Styling Options** section, which lets you control a few -properties on how the track is styled. Most of these you will not need -to configure and can safely leave on defaults. Occasionally you will -want to change what information is shown in the end product. - -.. image:: styling.png - -In the above image you can see some black text, and some blue text. The -source of the black text is configured with the **style.label** option, -and the source of the blue text is configured with the -**style.description** option. - -Feature Score Scaling & Colouring Options -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -First, you need to choose between ignoring the score attribute of GFF3 -files, or using it. If you choose to ignore it, all features will be -coloured with a solid colour. If you choose to use it, features will -have slightly different colours based on their scores. - -.. image:: opacity.png - -If you choose **Ignore score**, you may choose between automatically -choosing a colour, or manually specifying one. The automatically chosen -colours vary along a brewer palette and generally look quite nice with -no human intervention required. The manual colour choice is somewhat -self explanatory. Clicking on the small coloured square will bring up a -colour palette. - -If you choose **Base on score**, you're faced with a dizzying array of -options. First is the function to map the colour choices to colour -values. JiG comes with a few functions built in such as linear scaling, -logarithmic scaling, and blast scaling. - -The **linear scaling** method says "take these values, and they map -directly to a range of output values". **Logarithmic scaling** says -"please take the log of the score before mapping", and **Blast scaling** -is further specialised to handle blast data more nicely. These are -convenience functions to help transform the wide array of possible -values in the GFF3 score attribute to more meaningful numbers. If you -need more comprehensive score scaling, it is recommended that you -pre-process your GFF3 files somehow. - -Once you've selected a scaling method, you can choose to manually -specify the minimum and maximum expected values, or you can let JiG -determine them for you automatically. - -Finally, opacity is the only mapping we currently provide. Future -iterations will attempt to improve upon this and provide more colour -scales. The Opacity option maps the highest scoring features to full -opacity, and everything else to lower ones. - -BAM Pileups -~~~~~~~~~~~ - -We support BAM files and can automatically generate SNP tracks based on -that bam data. - -.. image:: bam.png - -This is *strongly discouraged* for high coverage density datasets. -Unfortunately there are no other configuration options exposed for bam -files. - -BlastXML -~~~~~~~~ - -.. image:: blast.png - -JiG now supports both blastn and blastp datasets. JiG internally uses a -blastXML to gapped GFF3 tool to convert your blastxml datasets into a -format amenable to visualization in JBrowse. This tool is also -available separately from the IUC on the toolshed. - -**Minimum Gap Size** reflects how long a gap must be before it becomes a -real gap in the processed gff3 file. In the picture above, various sizes -of gaps can be seen. If the minimum gap size was set much higher, say -100nt, many of the smaller gaps would disappear, and the features on -both sides would be merged into one, longer feature. This setting is -inversely proportional to runtime and output file size. *Do not set this -to a low value for large datasets*. By setting this number lower, you -will have extremely large outputs and extremely long runtimes. The -default was configured based off of the author's experience, but the -author only works on small viruses. It is *strongly* recommended that -you filter your blast results before display, e.g. picking out the top -10 hits or so. - -**Protein blast search** option merely informs underlying tools that -they should adjust feature locations by 3x. - -Styling Options -^^^^^^^^^^^^^^^ - -Please see the styling options for GFF3 datasets, they are identical. - -Feature Score Scaling & Coloring Options -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Please see the score scaling and colouring options for GFF3 datasets, -they are identical. Remember to set your score scaling to "blast" method -if you do use it. - -Bigwig XY -~~~~~~~~~ - -.. image:: bigwig.png - -**XYPlot** - -BigWig tracks can be displayed as a "density" plot which is continuous -line which varies in colour, or as an "XYplot." XYplots are preferable -for users to visually identify specific features in a bigwig track, -however density tracks are more visually compact. - -**Variance Band** is an option available to XYPlots, and can be seen in -the third and fourth tracks in the above picture. This overlays a mean -line, and 1 and 2 standard deviation areas. - -**Track Scaling** is different from colour scaling, instead it -configures how the track behaves inside of JBrowse. **Autoscaling -globally** means that JBrowse will determine the minimum and maximum for -the track, and fix the bounds of the viewport to that. E.g. if your -track ranges from 1-1000, and the region you're currently zoomed to only -goes from 0-50, then the viewport range will still show 1-1000. This is -good for global genomic context. However you may wish to consider -**autoscaling locally** instead. In the example of a region which varies -from 0-50, autoscaling locally would cause the individual track's -viewport to re-adjust and show just the 0-50 region. If neither of these -options are palatable, you may manually hardcode the minimum and -maximums for the track to scale to. - -Colour Options -^^^^^^^^^^^^^^ - -BigWig tracks have two colours in JBrowse, a positive and a negative -colour. - -As always you may manually choose a colour, or let JiG choose for you. - -One of the more interesting options is the **Bicolor pivot**. This -option allows you to control the point at which JBrowse switches from -the positive colour to the negative. In the above graphic, you can see -this has been configured to "mean" for the first two (orange and blue) -tracks. - -VCFs/SNPs -~~~~~~~~~ - -These tracks do not support any special configuration. - -@ATTRIBUTION@ -]]> - -
- diff -r 7c2e28e144f3 -r cce8dacb240f config.json.sample --- a/config.json.sample Mon Jan 22 12:05:09 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ -{ - "configuration": { - "rpc": { - "defaultDriver": "WebWorkerRpcDriver", - "drivers": { - "MainThreadRpcDriver": {}, - "WebWorkerRpcDriver": {} - } - }, - "logoPath": { - "locationType": "UriLocation", - "uri": "" - } - }, - "plugins": [], - "assemblies": [], - "tracks": [], - "internetAccounts": [], - "aggregateTextSearchAdapters": [], - "connections": [], - "defaultSession": {} -} diff -r 7c2e28e144f3 -r cce8dacb240f jbrowse2.py --- a/jbrowse2.py Mon Jan 22 12:05:09 2024 +0000 +++ b/jbrowse2.py Thu Jan 25 07:39:41 2024 +0000 @@ -18,7 +18,7 @@ logging.basicConfig(level=logging.INFO) log = logging.getLogger("jbrowse") -JB2VER = "v2.10.0" +JB2VER = "v2.10.1" # version pinned for cloning TODAY = datetime.datetime.now().strftime("%Y-%m-%d") @@ -458,10 +458,10 @@ self.genome_name = ( genome_name # first one for all tracks - other than paf ) - if self.config_json.get("assemblies", None): - self.config_json["assemblies"] += assemblies - else: - self.config_json["assemblies"] = assemblies + if self.config_json.get("assemblies", None): + self.config_json["assemblies"] += assemblies + else: + self.config_json["assemblies"] = assemblies def make_assembly(self, fapath, gname): hashData = [ @@ -727,6 +727,14 @@ os.unlink(gff3) def add_bigwig(self, data, trackData): + """ "type": "LinearWiggleDisplay", + "configuration": {}, + "selectedRendering": "", + "resolution": 1, + "posColor": "rgb(228, 26, 28)", + "negColor": "rgb(255, 255, 51)", + "constraints": {} + """ url = "%s.bigwig" % trackData["label"] # slashes in names cause path trouble dest = os.path.join(self.outdir, url) @@ -756,7 +764,7 @@ trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) - logging.debug("#### wig trackData=%s" % str(trackData)) + logging.info("#### wig trackData=%s" % str(trackData)) def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): tId = trackData["label"] @@ -949,11 +957,11 @@ tname = trackData["name"] tId = trackData["label"] pgname = pafOpts["genome_label"] - if len(pgname.split() > 1): + if len(pgname.split()) > 1: pgname = pgname.split()[ 0 ] # trouble from spacey names in command lines avoidance - asstrack, gname = self.make_assembly(pafOpts["genome"], pgname) + asstrack = self.make_assembly(pafOpts["genome"], pgname) self.genome_names.append(pgname) if self.config_json.get("assemblies", None): self.config_json["assemblies"].append(asstrack) @@ -961,7 +969,6 @@ self.config_json["assemblies"] = [ asstrack, ] - url = "%s.paf" % (trackData["label"]) dest = "%s/%s" % (self.outdir, url) self.symlink_or_copy(os.path.realpath(data), dest) @@ -975,6 +982,16 @@ "pafLocation": {"uri": url}, "assemblyNames": [self.genome_name, pgname], }, + # "displays": [ + # { + # "type": "LinearSyntenyDisplay", + # "displayId": "%s-LinearSyntenyDisplay" % tId, + # }, + # { + # "type": "DotPlotDisplay", + # "displayId": "%s-DotPlotDisplay" % tId, + # }, + # ], } style_json = self._prepare_track_style(trackDict) trackDict["style"] = style_json @@ -1130,6 +1147,10 @@ ) elif dataset_ext == "vcf": self.add_vcf(dataset_path, outputTrackConfig) + elif dataset_ext == "paf": + self.add_paf( + dataset_path, outputTrackConfig, track["conf"]["options"]["synteny"] + ) else: log.warn("Do not know how to handle %s", dataset_ext) # Return non-human label for use in other fields @@ -1177,7 +1198,7 @@ ddl = data["defaultLocation"] loc_match = re.search( r"^([^:]+):(\d+)\.+(\d+)$", ddl - ) # was re.search(r"^(\w.+):(\d+)\.+(\d+)$" + ) if loc_match: refName = loc_match.group(1) start = int(loc_match.group(2)) @@ -1407,6 +1428,15 @@ } track_conf["conf"] = etree_to_dict(track.find("options")) + track_conf["category"] = track.attrib["cat"] + track_conf["format"] = track.attrib["format"] + try: + # Only pertains to gff3 + blastxml. TODO? + track_conf["style"] = {t.tag: t.text for t in track.find("options/style")} + except TypeError: + track_conf["style"] = {} + pass + track_conf["conf"] = etree_to_dict(track.find("options")) keys = jc.process_annotations(track_conf) if keys: @@ -1422,34 +1452,26 @@ default_session_data["style_labels"][key] = track_conf.get( "style_labels", None ) - - default_session_data["defaultLocation"] = root.find( - "metadata/general/defaultLocation" - ).text - default_session_data["session_name"] = root.find( - "metadata/general/session_name" - ).text - - general_data = { - "analytics": root.find("metadata/general/analytics").text, - "primary_color": root.find("metadata/general/primary_color").text, - "secondary_color": root.find("metadata/general/secondary_color").text, - "tertiary_color": root.find("metadata/general/tertiary_color").text, - "quaternary_color": root.find("metadata/general/quaternary_color").text, - "font_size": root.find("metadata/general/font_size").text, - } - track_conf["category"] = track.attrib["cat"] - track_conf["format"] = track.attrib["format"] - try: - # Only pertains to gff3 + blastxml. TODO? - track_conf["style"] = {t.tag: t.text for t in track.find("options/style")} - except TypeError: - track_conf["style"] = {} - pass - track_conf["conf"] = etree_to_dict(track.find("options")) - jc.add_general_configuration(general_data) - jc.config_json["tracks"] = jc.tracksToAdd + default_session_data["defaultLocation"] = root.find( + "metadata/general/defaultLocation" + ).text + default_session_data["session_name"] = root.find( + "metadata/general/session_name" + ).text + general_data = { + "analytics": root.find("metadata/general/analytics").text, + "primary_color": root.find("metadata/general/primary_color").text, + "secondary_color": root.find("metadata/general/secondary_color").text, + "tertiary_color": root.find("metadata/general/tertiary_color").text, + "quaternary_color": root.find("metadata/general/quaternary_color").text, + "font_size": root.find("metadata/general/font_size").text, + } + jc.add_general_configuration(general_data) + trackconf = jc.config_json.get("tracks", None) + if trackconf: + jc.config_json["tracks"].update(jc.tracksToAdd) + else: + jc.config_json["tracks"] = jc.tracksToAdd jc.write_config() jc.add_default_session(default_session_data) - # jc.text_index() not sure what broke here. diff -r 7c2e28e144f3 -r cce8dacb240f jbrowse2.xml --- a/jbrowse2.xml Mon Jan 22 12:05:09 2024 +0000 +++ b/jbrowse2.xml Thu Jan 25 07:39:41 2024 +0000 @@ -33,7 +33,7 @@ #if str($reference_genome.genome_type_select) == "indexed": - s + #else @@ -297,12 +297,12 @@ - - + diff -r 7c2e28e144f3 -r cce8dacb240f macros.xml --- a/macros.xml Mon Jan 22 12:05:09 2024 +0000 +++ b/macros.xml Thu Jan 25 07:39:41 2024 +0000 @@ -1,6 +1,6 @@ - 2.10.0 + 2.10.1 topic_3307 diff -r 7c2e28e144f3 -r cce8dacb240f readme.rst --- a/readme.rst Mon Jan 22 12:05:09 2024 +0000 +++ b/readme.rst Thu Jan 25 07:39:41 2024 +0000 @@ -38,11 +38,15 @@ - 2.10.0+galaxy2 - UPDATED existing JBrowse1.16.11 code to JBrowse 2.10.0 + - was working well enough for VGP when previous PR discovered + - too late to backport all the fixes + - working default session and some other ideas copied instead. - seems to work well with defaults. - need to document and implement track settings by running the browser locally. - works well enough to be useful in workflows such as TreeValGal. - JB2 seems to set defaults wisely. - not yet ideal for users who need fine grained track control. + - synteny works. Wrapper License (MIT/BSD Style)