# HG changeset patch
# User fubar
# Date 1705924339 0
# Node ID 1d86925dbb4cb89d582e16bff34b84d18cfd27fe
# Parent 247e17ce504bc3f5a1e098568a95a183ace7d792
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 873a12803692b0a84814a6dc08331d772d0e5492-dirty
diff -r 247e17ce504b -r 1d86925dbb4c abjbrowse2.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/abjbrowse2.py Mon Jan 22 11:52:19 2024 +0000
@@ -0,0 +1,1097 @@
+#!/usr/bin/env python
+import argparse
+import binascii
+import datetime
+import hashlib
+import json
+import logging
+import os
+import re
+import shutil
+import struct
+import subprocess
+import tempfile
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger('jbrowse')
+TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
+GALAXY_INFRASTRUCTURE_URL = None
+
+
+class ColorScaling(object):
+
+ COLOR_FUNCTION_TEMPLATE = """
+ function(feature, variableName, glyphObject, track) {{
+ var score = {score};
+ {opacity}
+ return 'rgba({red}, {green}, {blue}, ' + opacity + ')';
+ }}
+ """
+
+ COLOR_FUNCTION_TEMPLATE_QUAL = r"""
+ function(feature, variableName, glyphObject, track) {{
+ var search_up = function self(sf, attr){{
+ if(sf.get(attr) !== undefined){{
+ return sf.get(attr);
+ }}
+ if(sf.parent() === undefined) {{
+ return;
+ }}else{{
+ return self(sf.parent(), attr);
+ }}
+ }};
+
+ var search_down = function self(sf, attr){{
+ if(sf.get(attr) !== undefined){{
+ return sf.get(attr);
+ }}
+ if(sf.children() === undefined) {{
+ return;
+ }}else{{
+ var kids = sf.children();
+ for(var child_idx in kids){{
+ var x = self(kids[child_idx], attr);
+ if(x !== undefined){{
+ return x;
+ }}
+ }}
+ return;
+ }}
+ }};
+
+ var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color});
+ var score = (search_up(feature, 'score') || search_down(feature, 'score'));
+ {opacity}
+ if(score === undefined){{ opacity = 1; }}
+ var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color);
+ var red = parseInt(result[1], 16);
+ var green = parseInt(result[2], 16);
+ var blue = parseInt(result[3], 16);
+ if(isNaN(opacity) || opacity < 0){{ opacity = 0; }}
+ return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')';
+ }}
+ """
+
+ OPACITY_MATH = {
+ 'linear': """
+ var opacity = (score - ({min})) / (({max}) - ({min}));
+ """,
+ 'logarithmic': """
+ var opacity = Math.log10(score - ({min})) / Math.log10(({max}) - ({min}));
+ """,
+ 'blast': """
+ var opacity = 0;
+ if(score == 0.0) {{
+ opacity = 1;
+ }} else {{
+ opacity = (20 - Math.log10(score)) / 180;
+ }}
+ """
+ }
+
+ BREWER_COLOUR_IDX = 0
+ BREWER_COLOUR_SCHEMES = [
+ (166, 206, 227),
+ (31, 120, 180),
+ (178, 223, 138),
+ (51, 160, 44),
+ (251, 154, 153),
+ (227, 26, 28),
+ (253, 191, 111),
+ (255, 127, 0),
+ (202, 178, 214),
+ (106, 61, 154),
+ (255, 255, 153),
+ (177, 89, 40),
+ (228, 26, 28),
+ (55, 126, 184),
+ (77, 175, 74),
+ (152, 78, 163),
+ (255, 127, 0),
+ ]
+
+ BREWER_DIVERGING_PALLETES = {
+ 'BrBg': ("#543005", "#003c30"),
+ 'PiYg': ("#8e0152", "#276419"),
+ 'PRGn': ("#40004b", "#00441b"),
+ 'PuOr': ("#7f3b08", "#2d004b"),
+ 'RdBu': ("#67001f", "#053061"),
+ 'RdGy': ("#67001f", "#1a1a1a"),
+ 'RdYlBu': ("#a50026", "#313695"),
+ 'RdYlGn': ("#a50026", "#006837"),
+ 'Spectral': ("#9e0142", "#5e4fa2"),
+ }
+
+ def __init__(self):
+ self.brewer_colour_idx = 0
+
+ def rgb_from_hex(self, hexstr):
+ # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
+ return struct.unpack('BBB', binascii.unhexlify(hexstr))
+
+ def min_max_gff(self, gff_file):
+ min_val = None
+ max_val = None
+ with open(gff_file, 'r') as handle:
+ for line in handle:
+ try:
+ value = float(line.split('\t')[5])
+ min_val = min(value, (min_val or value))
+ max_val = max(value, (max_val or value))
+
+ if value < min_val:
+ min_val = value
+
+ if value > max_val:
+ max_val = value
+ except Exception:
+ pass
+ return min_val, max_val
+
+ def hex_from_rgb(self, r, g, b):
+ return '#%02x%02x%02x' % (r, g, b)
+
+ def _get_colours(self):
+ r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)]
+ self.brewer_colour_idx += 1
+ return r, g, b
+
+ def parse_menus(self, track):
+ trackConfig = {'menuTemplate': [{}, {}, {}, {}]}
+
+ if 'menu' in track['menus']:
+ menu_list = [track['menus']['menu']]
+ if isinstance(track['menus']['menu'], list):
+ menu_list = track['menus']['menu']
+
+ for m in menu_list:
+ tpl = {
+ 'action': m['action'],
+ 'label': m.get('label', '{name}'),
+ 'iconClass': m.get('iconClass', 'dijitIconBookmark'),
+ }
+ if 'url' in m:
+ tpl['url'] = m['url']
+ if 'content' in m:
+ tpl['content'] = m['content']
+ if 'title' in m:
+ tpl['title'] = m['title']
+
+ trackConfig['menuTemplate'].append(tpl)
+
+ return trackConfig
+
+ def parse_colours(self, track, trackFormat, gff3=None):
+ # Wiggle tracks have a bicolor pallete
+ trackConfig = {'style': {}}
+ if trackFormat == 'wiggle':
+
+ trackConfig['style']['pos_color'] = track['wiggle']['color_pos']
+ trackConfig['style']['neg_color'] = track['wiggle']['color_neg']
+
+ if trackConfig['style']['pos_color'] == '__auto__':
+ trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours())
+ trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours())
+
+ # Wiggle tracks can change colour at a specified place
+ bc_pivot = track['wiggle']['bicolor_pivot']
+ if bc_pivot not in ('mean', 'zero'):
+ # The values are either one of those two strings
+ # or a number
+ bc_pivot = float(bc_pivot)
+ trackConfig['bicolor_pivot'] = bc_pivot
+ elif 'scaling' in track:
+ if track['scaling']['method'] == 'ignore':
+ if track['scaling']['scheme']['color'] != '__auto__':
+ trackConfig['style']['color'] = track['scaling']['scheme']['color']
+ else:
+ trackConfig['style']['color'] = self.hex_from_rgb(*self._get_colours())
+ else:
+ # Scored method
+ algo = track['scaling']['algo']
+ # linear, logarithmic, blast
+ scales = track['scaling']['scales']
+ # type __auto__, manual (min, max)
+ scheme = track['scaling']['scheme']
+ # scheme -> (type (opacity), color)
+ # ==================================
+ # GENE CALLS OR BLAST
+ # ==================================
+ if trackFormat == 'blast':
+ red, green, blue = self._get_colours()
+ color_function = self.COLOR_FUNCTION_TEMPLATE.format(**{
+ 'score': "feature._parent.get('score')",
+ 'opacity': self.OPACITY_MATH['blast'],
+ 'red': red,
+ 'green': green,
+ 'blue': blue,
+ })
+ trackConfig['style']['color'] = color_function.replace('\n', '')
+ elif trackFormat == 'gene_calls':
+ # Default values, based on GFF3 spec
+ min_val = 0
+ max_val = 1000
+ # Get min/max and build a scoring function since JBrowse doesn't
+ if scales['type'] == 'automatic' or scales['type'] == '__auto__':
+ min_val, max_val = self.min_max_gff(gff3)
+ else:
+ min_val = scales.get('min', 0)
+ max_val = scales.get('max', 1000)
+
+ if scheme['color'] == '__auto__':
+ user_color = 'undefined'
+ auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
+ elif scheme['color'].startswith('#'):
+ user_color = "'%s'" % self.hex_from_rgb(*self.rgb_from_hex(scheme['color'][1:]))
+ auto_color = 'undefined'
+ else:
+ user_color = 'undefined'
+ auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
+
+ color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{
+ 'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val, 'min': min_val}),
+ 'user_spec_color': user_color,
+ 'auto_gen_color': auto_color,
+ })
+
+ trackConfig['style']['color'] = color_function.replace('\n', '')
+ return trackConfig
+
+
+def etree_to_dict(t):
+ if t is None:
+ return {}
+
+ d = {t.tag: {} if t.attrib else None}
+ children = list(t)
+ if children:
+ dd = defaultdict(list)
+ for dc in map(etree_to_dict, children):
+ for k, v in dc.items():
+ dd[k].append(v)
+ d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
+ if t.attrib:
+ d[t.tag].update(('@' + k, v) for k, v in t.attrib.items())
+ if t.text:
+ text = t.text.strip()
+ if children or t.attrib:
+ if text:
+ d[t.tag]['#text'] = text
+ else:
+ d[t.tag] = text
+ return d
+
+
+# score comes from feature._parent.get('score') or feature.get('score')
+
+INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
+
+
+def metadata_from_node(node):
+ metadata = {}
+ try:
+ if len(node.findall('dataset')) != 1:
+ # exit early
+ return metadata
+ except Exception:
+ return {}
+
+ for (key, value) in node.findall('dataset')[0].attrib.items():
+ metadata['dataset_%s' % key] = value
+
+ for (key, value) in node.findall('history')[0].attrib.items():
+ metadata['history_%s' % key] = value
+
+ for (key, value) in node.findall('metadata')[0].attrib.items():
+ metadata['metadata_%s' % key] = value
+
+ for (key, value) in node.findall('tool')[0].attrib.items():
+ metadata['tool_%s' % key] = value
+
+ # Additional Mappings applied:
+ metadata['dataset_edam_format'] = '{1}'.format(metadata['dataset_edam_format'], metadata['dataset_file_ext'])
+ metadata['history_user_email'] = '{0}'.format(metadata['history_user_email'])
+ metadata['history_display_name'] = '{hist_name}'.format(
+ galaxy=GALAXY_INFRASTRUCTURE_URL,
+ encoded_hist_id=metadata['history_id'],
+ hist_name=metadata['history_display_name']
+ )
+ metadata['tool_tool'] = '{tool_id}'.format(
+ galaxy=GALAXY_INFRASTRUCTURE_URL,
+ encoded_id=metadata['dataset_id'],
+ tool_id=metadata['tool_tool_id'],
+ # tool_version=metadata['tool_tool_version'],
+ )
+ return metadata
+
+
+class JbrowseConnector(object):
+
+ def __init__(self, jbrowse, outdir, genomes):
+ self.cs = ColorScaling()
+ self.jbrowse = jbrowse
+ self.outdir = outdir
+ self.genome_paths = genomes
+ self.tracksToIndex = []
+
+ # This is the id of the current assembly
+ self.assembly_ids = {}
+ self.current_assembly_id = []
+
+ # If upgrading, look at the existing data
+ self.check_existing(self.outdir)
+
+ self.clone_jbrowse(self.jbrowse, self.outdir)
+
+ self.process_genomes()
+
+ def subprocess_check_call(self, command, output=None):
+ if output:
+ log.debug('cd %s && %s > %s', self.outdir, ' '.join(command), output)
+ subprocess.check_call(command, cwd=self.outdir, stdout=output)
+ else:
+ log.debug('cd %s && %s', self.outdir, ' '.join(command))
+ subprocess.check_call(command, cwd=self.outdir)
+
+ def subprocess_popen(self, command):
+ log.debug('cd %s && %s', self.outdir, command)
+ p = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ output, err = p.communicate()
+ retcode = p.returncode
+ if retcode != 0:
+ log.error('cd %s && %s', self.outdir, command)
+ log.error(output)
+ log.error(err)
+ raise RuntimeError("Command failed with exit code %s" % (retcode))
+
+ def subprocess_check_output(self, command):
+ log.debug('cd %s && %s', self.outdir, ' '.join(command))
+ return subprocess.check_output(command, cwd=self.outdir)
+
+ def symlink_or_copy(self, src, dest):
+ if 'GALAXY_JBROWSE_SYMLINKS' in os.environ and bool(os.environ['GALAXY_JBROWSE_SYMLINKS']):
+ cmd = ['ln', '-s', src, dest]
+ else:
+ cmd = ['cp', src, dest]
+
+ return self.subprocess_check_call(cmd)
+
+ def symlink_or_copy_load_action(self):
+ if 'GALAXY_JBROWSE_SYMLINKS' in os.environ and bool(os.environ['GALAXY_JBROWSE_SYMLINKS']):
+ return 'symlink'
+ else:
+ return 'copy'
+
+ def check_existing(self, destination):
+ existing = os.path.join(destination, 'data', "config.json")
+ if os.path.exists(existing):
+ with open(existing, 'r') as existing_conf:
+ conf = json.load(existing_conf)
+ if 'assemblies' in conf:
+ for assembly in conf['assemblies']:
+ if 'name' in assembly:
+ self.assembly_ids[assembly['name']] = None
+
+ def process_genomes(self):
+ for genome_node in self.genome_paths:
+ # We only expect one input genome per run. This for loop is just
+ # easier to write than the alternative / catches any possible
+ # issues.
+ self.add_assembly(genome_node['path'], genome_node['label'])
+
+ def add_assembly(self, path, label, default=True):
+ # Find a non-existing filename for the new genome
+ # (to avoid colision when upgrading an existing instance)
+ rel_seq_path = os.path.join('data', 'assembly')
+ seq_path = os.path.join(self.outdir, rel_seq_path)
+ fn_try = 1
+ while (os.path.exists(seq_path + '.fasta') or os.path.exists(seq_path + '.fasta.gz')
+ or os.path.exists(seq_path + '.fasta.gz.fai') or os.path.exists(seq_path + '.fasta.gz.gzi')):
+ rel_seq_path = os.path.join('data', 'assembly%s' % fn_try)
+ seq_path = os.path.join(self.outdir, rel_seq_path)
+ fn_try += 1
+
+ # Find a non-existing label for the new genome
+ # (to avoid colision when upgrading an existing instance)
+ lab_try = 1
+ uniq_label = label
+ while uniq_label in self.assembly_ids:
+ uniq_label = label + str(lab_try)
+ lab_try += 1
+
+ # Find a default scaffold to display
+ # TODO this may not be necessary in the future, see https://github.com/GMOD/jbrowse-components/issues/2708
+ with open(path, 'r') as fa_handle:
+ fa_header = fa_handle.readline()[1:].strip().split(' ')[0]
+
+ self.assembly_ids[uniq_label] = fa_header
+ if default:
+ self.current_assembly_id = uniq_label
+
+ copied_genome = seq_path + '.fasta'
+ shutil.copy(path, copied_genome)
+
+ # Compress with bgzip
+ cmd = ['bgzip', copied_genome]
+ self.subprocess_check_call(cmd)
+
+ # FAI Index
+ cmd = ['samtools', 'faidx', copied_genome + '.gz']
+ self.subprocess_check_call(cmd)
+
+ self.subprocess_check_call([
+ 'jbrowse', 'add-assembly',
+ '--load', 'inPlace',
+ '--name', uniq_label,
+ '--type', 'bgzipFasta',
+ '--target', os.path.join(self.outdir, 'data'),
+ '--skipCheck',
+ rel_seq_path + '.fasta.gz'])
+
+ return uniq_label
+
+ def text_index(self):
+ # Index tracks
+ args = [
+ 'jbrowse', 'text-index',
+ '--target', os.path.join(self.outdir, 'data'),
+ '--assemblies', self.current_assembly_id,
+ ]
+
+ tracks = ','.join(self.tracksToIndex)
+ if tracks:
+ args += ['--tracks', tracks]
+
+ self.subprocess_check_call(args)
+
+ def _blastxml_to_gff3(self, xml, min_gap=10):
+ gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
+ cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'),
+ '--trim', '--trim_end', '--include_seq', '--min_gap', str(min_gap), xml]
+ log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name)
+ subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
+ gff3_unrebased.close()
+ return gff3_unrebased.name
+
+ def _prepare_track_style(self, xml_conf):
+
+ style_data = {
+ "type": "LinearBasicDisplay"
+ }
+
+ if 'display' in xml_conf['style']:
+ style_data['type'] = xml_conf['style']['display']
+ del xml_conf['style']['display']
+
+ style_data['displayId'] = "%s_%s" % (xml_conf['label'], style_data['type'])
+
+ style_data.update(xml_conf['style'])
+
+ return {'displays': [style_data]}
+
+ def add_blastxml(self, data, trackData, blastOpts, **kwargs):
+ gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts['min_gap'])
+
+ if 'parent' in blastOpts and blastOpts['parent'] != 'None':
+ gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
+ cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')]
+ if blastOpts.get('protein', 'false') == 'true':
+ cmd.append('--protein2dna')
+ cmd.extend([os.path.realpath(blastOpts['parent']), gff3])
+ log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_rebased.name)
+ subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
+ gff3_rebased.close()
+
+ # Replace original gff3 file
+ shutil.copy(gff3_rebased.name, gff3)
+ os.unlink(gff3_rebased.name)
+
+ rel_dest = os.path.join('data', trackData['label'] + '.gff')
+ dest = os.path.join(self.outdir, rel_dest)
+
+ self._sort_gff(gff3, dest)
+ os.unlink(gff3)
+
+ style_json = self._prepare_track_style(trackData)
+
+ self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json)
+
+ def add_bigwig(self, data, trackData, wiggleOpts, **kwargs):
+
+ rel_dest = os.path.join('data', trackData['label'] + '.bw')
+ dest = os.path.join(self.outdir, rel_dest)
+ self.symlink_or_copy(os.path.realpath(data), dest)
+
+ style_json = self._prepare_track_style(trackData)
+
+ self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
+
+ # Anything ending in "am" (Bam or Cram)
+ def add_xam(self, data, trackData, xamOpts, index=None, ext="bam", **kwargs):
+
+ index_ext = "bai"
+ if ext == "cram":
+ index_ext = "crai"
+
+ rel_dest = os.path.join('data', trackData['label'] + '.%s' % ext)
+ dest = os.path.join(self.outdir, rel_dest)
+
+ self.symlink_or_copy(os.path.realpath(data), dest)
+
+ if index is not None and os.path.exists(os.path.realpath(index)):
+ # xai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
+ self.subprocess_check_call(['cp', os.path.realpath(index), dest + '.%s' % index_ext])
+ else:
+ # Can happen in exotic condition
+ # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
+ # => no index generated by galaxy, but there might be one next to the symlink target
+ # this trick allows to skip the bam sorting made by galaxy if already done outside
+ if os.path.exists(os.path.realpath(data) + '.%s' % index_ext):
+ self.symlink_or_copy(os.path.realpath(data) + '.%s' % index_ext, dest + '.%s' % index_ext)
+ else:
+ log.warn('Could not find a bam index (.%s file) for %s', (index_ext, data))
+
+ style_json = self._prepare_track_style(trackData)
+
+ self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
+
+ def add_vcf(self, data, trackData, vcfOpts={}, zipped=False, **kwargs):
+
+ if zipped:
+ rel_dest = os.path.join('data', trackData['label'] + '.vcf.gz')
+ dest = os.path.join(self.outdir, rel_dest)
+ shutil.copy(os.path.realpath(data), dest)
+ else:
+ rel_dest = os.path.join('data', trackData['label'] + '.vcf')
+ dest = os.path.join(self.outdir, rel_dest)
+ shutil.copy(os.path.realpath(data), dest)
+
+ cmd = ['bgzip', dest]
+ self.subprocess_check_call(cmd)
+ cmd = ['tabix', dest + '.gz']
+ self.subprocess_check_call(cmd)
+
+ rel_dest = os.path.join('data', trackData['label'] + '.vcf.gz')
+
+ style_json = self._prepare_track_style(trackData)
+
+ self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
+
+ def add_gff(self, data, format, trackData, gffOpts, **kwargs):
+ rel_dest = os.path.join('data', trackData['label'] + '.gff')
+ dest = os.path.join(self.outdir, rel_dest)
+
+ self._sort_gff(data, dest)
+
+ style_json = self._prepare_track_style(trackData)
+
+ self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json)
+
+ def add_bed(self, data, format, trackData, gffOpts, **kwargs):
+ rel_dest = os.path.join('data', trackData['label'] + '.bed')
+ dest = os.path.join(self.outdir, rel_dest)
+
+ self._sort_bed(data, dest)
+
+ style_json = self._prepare_track_style(trackData)
+
+ self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest + '.gz', config=style_json)
+
+ def add_paf(self, data, trackData, pafOpts, **kwargs):
+ rel_dest = os.path.join('data', trackData['label'] + '.paf')
+ dest = os.path.join(self.outdir, rel_dest)
+
+ self.symlink_or_copy(os.path.realpath(data), dest)
+
+ added_assembly = self.add_assembly(pafOpts['genome'], pafOpts['genome_label'], default=False)
+
+ style_json = self._prepare_track_style(trackData)
+
+ self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, assemblies=[self.current_assembly_id, added_assembly], config=style_json)
+
+ def add_hic(self, data, trackData, hicOpts, **kwargs):
+ rel_dest = os.path.join('data', trackData['label'] + '.hic')
+ dest = os.path.join(self.outdir, rel_dest)
+
+ self.symlink_or_copy(os.path.realpath(data), dest)
+
+ style_json = self._prepare_track_style(trackData)
+
+ self._add_track(trackData['label'], trackData['key'], trackData['category'], rel_dest, config=style_json)
+
+ def add_sparql(self, url, query, query_refnames, trackData):
+
+ json_track_data = {
+ "type": "FeatureTrack",
+ "trackId": id,
+ "name": trackData['label'],
+ "adapter": {
+ "type": "SPARQLAdapter",
+ "endpoint": {
+ "uri": url,
+ "locationType": "UriLocation"
+ },
+ "queryTemplate": query
+ },
+ "category": [
+ trackData['category']
+ ],
+ "assemblyNames": [
+ self.current_assembly_id
+ ]
+ }
+
+ if query_refnames:
+ json_track_data['adapter']['refNamesQueryTemplate']: query_refnames
+
+ self.subprocess_check_call([
+ 'jbrowse', 'add-track-json',
+ '--target', os.path.join(self.outdir, 'data'),
+ json_track_data])
+
+ # Doesn't work as of 1.6.4, might work in the future
+ # self.subprocess_check_call([
+ # 'jbrowse', 'add-track',
+ # '--trackType', 'sparql',
+ # '--name', trackData['label'],
+ # '--category', trackData['category'],
+ # '--target', os.path.join(self.outdir, 'data'),
+ # '--trackId', id,
+ # '--config', '{"queryTemplate": "%s"}' % query,
+ # url])
+
+ def _add_track(self, id, label, category, path, assemblies=[], config=None):
+
+ assemblies_opt = self.current_assembly_id
+ if assemblies:
+ assemblies_opt = ','.join(assemblies)
+
+ cmd = [
+ 'jbrowse', 'add-track',
+ '--load', 'inPlace',
+ '--name', label,
+ '--category', category,
+ '--target', os.path.join(self.outdir, 'data'),
+ '--trackId', id,
+ '--assemblyNames', assemblies_opt
+ ]
+
+ if config:
+ cmd.append('--config')
+ cmd.append(json.dumps(config))
+
+ cmd.append(path)
+
+ self.subprocess_check_call(cmd)
+
+ def _sort_gff(self, data, dest):
+ # Only index if not already done
+ if not os.path.exists(dest):
+ cmd = "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" % (data, dest)
+ self.subprocess_popen(cmd)
+
+ self.subprocess_check_call(['bgzip', '-f', dest])
+ self.subprocess_check_call(['tabix', '-f', '-p', 'gff', dest + '.gz'])
+
+ def _sort_bed(self, data, dest):
+ # Only index if not already done
+ if not os.path.exists(dest):
+ cmd = ['sort', '-k1,1', '-k2,2n', data]
+ with open(dest, 'w') as handle:
+ self.subprocess_check_call(cmd, output=handle)
+
+ self.subprocess_check_call(['bgzip', '-f', dest])
+ self.subprocess_check_call(['tabix', '-f', '-p', 'bed', dest + '.gz'])
+
+ def process_annotations(self, track):
+
+ category = track['category'].replace('__pd__date__pd__', TODAY)
+ outputTrackConfig = {
+ 'category': category,
+ }
+
+ mapped_chars = {
+ '>': '__gt__',
+ '<': '__lt__',
+ "'": '__sq__',
+ '"': '__dq__',
+ '[': '__ob__',
+ ']': '__cb__',
+ '{': '__oc__',
+ '}': '__cc__',
+ '@': '__at__',
+ '#': '__pd__',
+ "": '__cn__'
+ }
+
+ for i, (dataset_path, dataset_ext, track_human_label, extra_metadata) in enumerate(track['trackfiles']):
+ # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
+ for key, value in mapped_chars.items():
+ track_human_label = track_human_label.replace(value, key)
+
+ log.info('Processing track %s / %s (%s)', category, track_human_label, dataset_ext)
+ outputTrackConfig['key'] = track_human_label
+ # We add extra data to hash for the case of REST + SPARQL.
+ if 'conf' in track and 'options' in track['conf'] and 'url' in track['conf']['options']:
+ rest_url = track['conf']['options']['url']
+ else:
+ rest_url = ''
+
+ # I chose to use track['category'] instead of 'category' here. This
+ # is intentional. This way re-running the tool on a different date
+ # will not generate different hashes and make comparison of outputs
+ # much simpler.
+ hashData = [str(dataset_path), track_human_label, track['category'], rest_url, self.current_assembly_id]
+ hashData = '|'.join(hashData).encode('utf-8')
+ outputTrackConfig['label'] = hashlib.md5(hashData).hexdigest() + '_%s' % i
+ outputTrackConfig['metadata'] = extra_metadata
+
+ outputTrackConfig['style'] = track['style']
+
+ if 'menus' in track['conf']['options']:
+ menus = self.cs.parse_menus(track['conf']['options'])
+ outputTrackConfig.update(menus)
+
+ if dataset_ext in ('gff', 'gff3'):
+ self.add_gff(dataset_path, dataset_ext, outputTrackConfig,
+ track['conf']['options']['gff'])
+ elif dataset_ext == 'bed':
+ self.add_bed(dataset_path, dataset_ext, outputTrackConfig,
+ track['conf']['options']['gff'])
+ elif dataset_ext == 'bigwig':
+ self.add_bigwig(dataset_path, outputTrackConfig,
+ track['conf']['options']['wiggle'])
+ elif dataset_ext == 'bam':
+ real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index']
+ if not isinstance(real_indexes, list):
+ #
+ # /path/to/a.bam.bai
+ #
+ #
+ # The above will result in the 'bam_index' key containing a
+ # string. If there are two or more indices, the container
+ # becomes a list. Fun!
+ real_indexes = [real_indexes]
+
+ self.add_xam(dataset_path, outputTrackConfig,
+ track['conf']['options']['pileup'],
+ index=real_indexes[i], ext="bam")
+ elif dataset_ext == 'cram':
+ real_indexes = track['conf']['options']['cram']['cram_indices']['cram_index']
+ if not isinstance(real_indexes, list):
+ #
+ # /path/to/a.bam.bai
+ #
+ #
+ # The above will result in the 'bam_index' key containing a
+ # string. If there are two or more indices, the container
+ # becomes a list. Fun!
+ real_indexes = [real_indexes]
+
+ self.add_xam(dataset_path, outputTrackConfig,
+ track['conf']['options']['cram'],
+ index=real_indexes[i], ext="cram")
+ elif dataset_ext == 'blastxml':
+ self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast'])
+ elif dataset_ext == 'vcf':
+ self.add_vcf(dataset_path, outputTrackConfig)
+ elif dataset_ext == 'vcf_bgzip':
+ self.add_vcf(dataset_path, outputTrackConfig, zipped=True)
+ elif dataset_ext == 'rest':
+ self.add_rest(track['conf']['options']['rest']['url'], outputTrackConfig)
+ elif dataset_ext == 'synteny':
+ self.add_paf(dataset_path, outputTrackConfig,
+ track['conf']['options']['synteny'])
+ elif dataset_ext == 'hic':
+ self.add_hic(dataset_path, outputTrackConfig,
+ track['conf']['options']['hic'])
+ elif dataset_ext == 'sparql':
+ sparql_query = track['conf']['options']['sparql']['query']
+ for key, value in mapped_chars.items():
+ sparql_query = sparql_query.replace(value, key)
+ sparql_query_refnames = track['conf']['options']['sparql']['query_refnames']
+ for key, value in mapped_chars.items():
+ sparql_query_refnames = sparql_query_refnames.replace(value, key)
+ self.add_sparql(track['conf']['options']['sparql']['url'], sparql_query, sparql_query_refnames, outputTrackConfig)
+ else:
+ log.warn('Do not know how to handle %s', dataset_ext)
+
+ # Return non-human label for use in other fields
+ yield outputTrackConfig['label']
+
+ def add_default_session(self, data):
+ """
+ Add some default session settings: set some assemblies/tracks on/off
+ """
+ tracks_data = []
+
+ # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708
+
+ # We need to know the track type from the config.json generated just before
+ config_path = os.path.join(self.outdir, 'data', 'config.json')
+ track_types = {}
+ with open(config_path, 'r') as config_file:
+ config_json = json.load(config_file)
+
+ for track_conf in config_json['tracks']:
+ track_types[track_conf['trackId']] = track_conf['type']
+
+ for on_track in data['visibility']['default_on']:
+ # TODO several problems with this currently
+ # - we are forced to copy the same kind of style config as the per track config from _prepare_track_style (not exactly the same though)
+ # - we get an error when refreshing the page
+ # - this could be solved by session specs, see https://github.com/GMOD/jbrowse-components/issues/2708
+ style_data = {
+ "type": "LinearBasicDisplay",
+ "height": 100
+ }
+
+ if on_track in data['style']:
+ if 'display' in data['style'][on_track]:
+ style_data['type'] = data['style'][on_track]['display']
+ del data['style'][on_track]['display']
+
+ style_data.update(data['style'][on_track])
+
+ if on_track in data['style_labels']:
+ # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
+ # TODO move this to per track displays?
+ style_data['labels'] = data['style_labels'][on_track]
+
+ tracks_data.append({
+ "type": track_types[on_track],
+ "configuration": on_track,
+ "displays": [
+ style_data
+ ]
+ })
+
+ # The view for the assembly we're adding
+ view_json = {
+ "type": "LinearGenomeView",
+ "tracks": tracks_data
+ }
+
+ refName = None
+ if data.get('defaultLocation', ''):
+ loc_match = re.search(r'^(\w+):(\d+)\.+(\d+)$', data['defaultLocation'])
+ if loc_match:
+ refName = loc_match.group(1)
+ start = int(loc_match.group(2))
+ end = int(loc_match.group(3))
+ elif self.assembly_ids[self.current_assembly_id] is not None:
+ refName = self.assembly_ids[self.current_assembly_id]
+ start = 0
+ end = 1000000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708
+
+ if refName is not None:
+ # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome
+ view_json['displayedRegions'] = [{
+ "refName": refName,
+ "start": start,
+ "end": end,
+ "reversed": False,
+ "assemblyName": self.current_assembly_id
+ }]
+
+ session_name = data.get('session_name', "New session")
+ if not session_name:
+ session_name = "New session"
+
+ # Merge with possibly existing defaultSession (if upgrading a jbrowse instance)
+ session_json = {}
+ if 'defaultSession' in config_json:
+ session_json = config_json['defaultSession']
+
+ session_json["name"] = session_name
+
+ if 'views' not in session_json:
+ session_json['views'] = []
+
+ session_json['views'].append(view_json)
+
+ config_json['defaultSession'] = session_json
+
+ with open(config_path, 'w') as config_file:
+ json.dump(config_json, config_file, indent=2)
+
+ def add_general_configuration(self, data):
+ """
+ Add some general configuration to the config.json file
+ """
+
+ config_path = os.path.join(self.outdir, 'data', 'config.json')
+ with open(config_path, 'r') as config_file:
+ config_json = json.load(config_file)
+
+ config_data = {}
+
+ config_data['disableAnalytics'] = data.get('analytics', 'false') == 'true'
+
+ config_data['theme'] = {
+ "palette": {
+ "primary": {
+ "main": data.get('primary_color', '#0D233F')
+ },
+ "secondary": {
+ "main": data.get('secondary_color', '#721E63')
+ },
+ "tertiary": {
+ "main": data.get('tertiary_color', '#135560')
+ },
+ "quaternary": {
+ "main": data.get('quaternary_color', '#FFB11D')
+ },
+ },
+ "typography": {
+ "fontSize": int(data.get('font_size', 10))
+ },
+ }
+
+ config_json['configuration'].update(config_data)
+
+ with open(config_path, 'w') as config_file:
+ json.dump(config_json, config_file, indent=2)
+
+ def clone_jbrowse(self, jbrowse_dir, destination):
+ """Clone a JBrowse directory into a destination directory.
+ """
+
+ copytree(jbrowse_dir, destination)
+
+ try:
+ shutil.rmtree(os.path.join(destination, 'test_data'))
+ except OSError as e:
+ log.error("Error: %s - %s." % (e.filename, e.strerror))
+
+ if not os.path.exists(os.path.join(destination, 'data')):
+ # It can already exist if upgrading an instance
+ os.makedirs(os.path.join(destination, 'data'))
+ log.info("makedir %s" % (os.path.join(destination, 'data')))
+
+ os.symlink('./data/config.json', os.path.join(destination, 'config.json'))
+
+
+def copytree(src, dst, symlinks=False, ignore=None):
+ for item in os.listdir(src):
+ s = os.path.join(src, item)
+ d = os.path.join(dst, item)
+ if os.path.isdir(s):
+ shutil.copytree(s, d, symlinks, ignore)
+ else:
+ shutil.copy2(s, d)
+
+
+def parse_style_conf(item):
+ if 'type' in item.attrib and item.attrib['type'] in ['boolean', 'integer']:
+ if item.attrib['type'] == 'boolean':
+ return item.text in ("yes", "true", "True")
+ elif item.attrib['type'] == 'integer':
+ return int(item.text)
+ else:
+ return item.text
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description="", epilog="")
+ parser.add_argument('xml', type=argparse.FileType('r'), help='Track Configuration')
+
+ parser.add_argument('--jbrowse', help='Folder containing a jbrowse release')
+ parser.add_argument('--outdir', help='Output directory', default='out')
+ parser.add_argument('--version', '-V', action='version', version="%(prog)s 0.8.0")
+ args = parser.parse_args()
+
+ tree = ET.parse(args.xml.name)
+ root = tree.getroot()
+
+ # This should be done ASAP
+ GALAXY_INFRASTRUCTURE_URL = root.find('metadata/galaxyUrl').text
+ # Sometimes this comes as `localhost` without a protocol
+ if not GALAXY_INFRASTRUCTURE_URL.startswith('http'):
+ # so we'll prepend `http://` and hope for the best. Requests *should*
+ # be GET and not POST so it should redirect OK
+ GALAXY_INFRASTRUCTURE_URL = 'http://' + GALAXY_INFRASTRUCTURE_URL
+
+ jc = JbrowseConnector(
+ jbrowse=args.jbrowse,
+ outdir=args.outdir,
+ genomes=[
+ {
+ 'path': os.path.realpath(x.attrib['path']),
+ 'meta': metadata_from_node(x.find('metadata')),
+ 'label': x.attrib['label']
+ }
+ for x in root.findall('metadata/genomes/genome')
+ ]
+ )
+
+ default_session_data = {
+ 'visibility': {
+ 'default_on': [],
+ 'default_off': [],
+ },
+ 'style': {},
+ 'style_labels': {}
+ }
+
+ # TODO add metadata to tracks
+ for track in root.findall('tracks/track'):
+ track_conf = {}
+ track_conf['trackfiles'] = []
+
+ trackfiles = track.findall('files/trackFile')
+ if trackfiles:
+ for x in track.findall('files/trackFile'):
+ if trackfiles:
+ metadata = metadata_from_node(x.find('metadata'))
+
+ track_conf['trackfiles'].append((
+ os.path.realpath(x.attrib['path']),
+ x.attrib['ext'],
+ x.attrib['label'],
+ metadata
+ ))
+ else:
+ # For tracks without files (rest, sparql)
+ track_conf['trackfiles'].append((
+ '', # N/A, no path for rest or sparql
+ track.attrib['format'],
+ track.find('options/label').text,
+ {}
+ ))
+
+ track_conf['category'] = track.attrib['cat']
+ track_conf['format'] = track.attrib['format']
+ track_conf['style'] = {item.tag: parse_style_conf(item) for item in track.find('options/style')}
+
+ track_conf['style'] = {item.tag: parse_style_conf(item) for item in track.find('options/style')}
+
+ track_conf['style_labels'] = {item.tag: parse_style_conf(item) for item in track.find('options/style_labels')}
+
+ track_conf['conf'] = etree_to_dict(track.find('options'))
+ keys = jc.process_annotations(track_conf)
+
+ for key in keys:
+ default_session_data['visibility'][track.attrib.get('visibility', 'default_off')].append(key)
+
+ default_session_data['style'][key] = track_conf['style'] # TODO do we need this anymore?
+ default_session_data['style_labels'][key] = track_conf['style_labels']
+
+ default_session_data['defaultLocation'] = root.find('metadata/general/defaultLocation').text
+ default_session_data['session_name'] = root.find('metadata/general/session_name').text
+
+ general_data = {
+ 'analytics': root.find('metadata/general/analytics').text,
+ 'primary_color': root.find('metadata/general/primary_color').text,
+ 'secondary_color': root.find('metadata/general/secondary_color').text,
+ 'tertiary_color': root.find('metadata/general/tertiary_color').text,
+ 'quaternary_color': root.find('metadata/general/quaternary_color').text,
+ 'font_size': root.find('metadata/general/font_size').text,
+ }
+
+ jc.add_default_session(default_session_data)
+ jc.add_general_configuration(general_data)
+ jc.text_index()
+
diff -r 247e17ce504b -r 1d86925dbb4c abjbrowse2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/abjbrowse2.xml Mon Jan 22 11:52:19 2024 +0000
@@ -0,0 +1,1295 @@
+
+ genome browser
+
+ macros.xml
+
+
+ topic_3307
+ topic_0092
+
+
+ operation_0573
+ operation_0564
+
+
+ jbrowse
+
+
+ python '${__tool_directory__}/jbrowse2.py' --version
+ to test the files I want to test. Hmph.
+#if str($uglyTestingHack) == "enabled":
+ cp $trackxml $output
+#end if
+ ]]>
+
+
+
+
+
+ #if str($reference_genome.genome_type_select) == "indexed":
+
+
+ s
+ #else
+
+
+
+
+ #else
+ user_email="anonymous"
+ user_id="-1"
+ display_name="Unnamed History"/>
+ #end if
+
+
+
+
+ #end if
+
+
+ ${jbgen.defaultLocation}
+ ${jbgen.enableAnalytics}
+ ${jbgen.primary_color}
+ ${jbgen.secondary_color}
+ ${jbgen.tertiary_color}
+ ${jbgen.quaternary_color}
+ ${jbgen.font_size}
+ ${jbgen.session_name}
+
+ ${__app__.config.galaxy_infrastructure_url}
+
+
+ #for $tg in $track_groups:
+ #for $track in $tg.data_tracks:
+
+ #end for
+ #end for
+
+
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `__, who you can help you
+with missing features or bugs in the tool.
+
+Options
+-------
+
+The first option you encounter is the **Fasta Sequence(s)**. This option
+now accepts multiple fasta files, allowing you to build JBrowse
+instances that contain data for multiple genomes or chrosomomes
+(generally known as "landmark features" in gff3 terminology.) Up to 30
+will be shown from the dropdown selector within JBrowse, this is a known
+issue.
+
+**Genetic Code** is a new feature in v0.4 of JiG / v1.12.0 of JBrowse,
+which allows users to specify a non standard genetic code, and have
+JBrowse highlight the correct start and stop codons.
+
+**Track Groups** represent a set of tracks in a single category. These
+can be used to let your users understand relationships between large
+groups of tracks.
+
+.. image:: sections.png
+
+Annotation Tracks
+-----------------
+
+Within Track Groups, you have one or more **Annotation Tracks**. Each
+Annotation Track is a groups of datasets which have similar styling.
+This allows you to rapidly build up JBrowse instances without having to
+configure tracks individually. A massive improvement over previous
+versions. For example, if you have five different GFF3 files from
+various gene callers that you wish to display, you can take advantage of
+this feature to style all of them similarly.
+
+There are a few different types of tracks supported, each with their own
+set of options:
+
+GFF3/BED
+~~~~~~~~
+
+These are your standard feature tracks. They usually highlight genes,
+mRNAs and other features of interest along a genomic region. The
+underlying tool and this help documentation focus primarily on GFF3
+data, and have not been tested extensively with other formats. Automatic
+min/max detection will fail under BED datasets.
+
+The data may be of a subclass we call **match/match part** data. This
+consists of top level ``match`` features, with a child ``match_part``
+feature, and is often used in displaying alignments. (See "Alignments"
+section on the `GFF3
+specification `__ for more
+information). If the data is match/match part, you will need to specify
+the top level match feature name, as it can be one of a few different SO
+terms, and JiG does not yet have the ability to understand SO terms.
+
+Next up is the **Styling Options** section, which lets you control a few
+properties on how the track is styled. Most of these you will not need
+to configure and can safely leave on defaults. Occasionally you will
+want to change what information is shown in the end product.
+
+.. image:: styling.png
+
+In the above image you can see some black text, and some blue text. The
+source of the black text is configured with the **style.label** option,
+and the source of the blue text is configured with the
+**style.description** option.
+
+Feature Score Scaling & Colouring Options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+First, you need to choose between ignoring the score attribute of GFF3
+files, or using it. If you choose to ignore it, all features will be
+coloured with a solid colour. If you choose to use it, features will
+have slightly different colours based on their scores.
+
+.. image:: opacity.png
+
+If you choose **Ignore score**, you may choose between automatically
+choosing a colour, or manually specifying one. The automatically chosen
+colours vary along a brewer palette and generally look quite nice with
+no human intervention required. The manual colour choice is somewhat
+self explanatory. Clicking on the small coloured square will bring up a
+colour palette.
+
+If you choose **Base on score**, you're faced with a dizzying array of
+options. First is the function to map the colour choices to colour
+values. JiG comes with a few functions built in such as linear scaling,
+logarithmic scaling, and blast scaling.
+
+The **linear scaling** method says "take these values, and they map
+directly to a range of output values". **Logarithmic scaling** says
+"please take the log of the score before mapping", and **Blast scaling**
+is further specialised to handle blast data more nicely. These are
+convenience functions to help transform the wide array of possible
+values in the GFF3 score attribute to more meaningful numbers. If you
+need more comprehensive score scaling, it is recommended that you
+pre-process your GFF3 files somehow.
+
+Once you've selected a scaling method, you can choose to manually
+specify the minimum and maximum expected values, or you can let JiG
+determine them for you automatically.
+
+Finally, opacity is the only mapping we currently provide. Future
+iterations will attempt to improve upon this and provide more colour
+scales. The Opacity option maps the highest scoring features to full
+opacity, and everything else to lower ones.
+
+BAM Pileups
+~~~~~~~~~~~
+
+We support BAM files and can automatically generate SNP tracks based on
+that bam data.
+
+.. image:: bam.png
+
+This is *strongly discouraged* for high coverage density datasets.
+Unfortunately there are no other configuration options exposed for bam
+files.
+
+BlastXML
+~~~~~~~~
+
+.. image:: blast.png
+
+JiG now supports both blastn and blastp datasets. JiG internally uses a
+blastXML to gapped GFF3 tool to convert your blastxml datasets into a
+format amenable to visualization in JBrowse. This tool is also
+available separately from the IUC on the toolshed.
+
+**Minimum Gap Size** reflects how long a gap must be before it becomes a
+real gap in the processed gff3 file. In the picture above, various sizes
+of gaps can be seen. If the minimum gap size was set much higher, say
+100nt, many of the smaller gaps would disappear, and the features on
+both sides would be merged into one, longer feature. This setting is
+inversely proportional to runtime and output file size. *Do not set this
+to a low value for large datasets*. By setting this number lower, you
+will have extremely large outputs and extremely long runtimes. The
+default was configured based off of the author's experience, but the
+author only works on small viruses. It is *strongly* recommended that
+you filter your blast results before display, e.g. picking out the top
+10 hits or so.
+
+**Protein blast search** option merely informs underlying tools that
+they should adjust feature locations by 3x.
+
+Styling Options
+^^^^^^^^^^^^^^^
+
+Please see the styling options for GFF3 datasets, they are identical.
+
+Feature Score Scaling & Coloring Options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Please see the score scaling and colouring options for GFF3 datasets,
+they are identical. Remember to set your score scaling to "blast" method
+if you do use it.
+
+Bigwig XY
+~~~~~~~~~
+
+.. image:: bigwig.png
+
+**XYPlot**
+
+BigWig tracks can be displayed as a "density" plot which is continuous
+line which varies in colour, or as an "XYplot." XYplots are preferable
+for users to visually identify specific features in a bigwig track,
+however density tracks are more visually compact.
+
+**Variance Band** is an option available to XYPlots, and can be seen in
+the third and fourth tracks in the above picture. This overlays a mean
+line, and 1 and 2 standard deviation areas.
+
+**Track Scaling** is different from colour scaling, instead it
+configures how the track behaves inside of JBrowse. **Autoscaling
+globally** means that JBrowse will determine the minimum and maximum for
+the track, and fix the bounds of the viewport to that. E.g. if your
+track ranges from 1-1000, and the region you're currently zoomed to only
+goes from 0-50, then the viewport range will still show 1-1000. This is
+good for global genomic context. However you may wish to consider
+**autoscaling locally** instead. In the example of a region which varies
+from 0-50, autoscaling locally would cause the individual track's
+viewport to re-adjust and show just the 0-50 region. If neither of these
+options are palatable, you may manually hardcode the minimum and
+maximums for the track to scale to.
+
+Colour Options
+^^^^^^^^^^^^^^
+
+BigWig tracks have two colours in JBrowse, a positive and a negative
+colour.
+
+As always you may manually choose a colour, or let JiG choose for you.
+
+One of the more interesting options is the **Bicolor pivot**. This
+option allows you to control the point at which JBrowse switches from
+the positive colour to the negative. In the above graphic, you can see
+this has been configured to "mean" for the first two (orange and blue)
+tracks.
+
+VCFs/SNPs
+~~~~~~~~~
+
+These tracks do not support any special configuration.
+
+@ATTRIBUTION@
+]]>
+
+
+
diff -r 247e17ce504b -r 1d86925dbb4c jbrowse2.py
--- a/jbrowse2.py Sat Jan 20 10:04:08 2024 +0000
+++ b/jbrowse2.py Mon Jan 22 11:52:19 2024 +0000
@@ -1175,7 +1175,9 @@
refName = None
if data.get("defaultLocation", ""):
ddl = data["defaultLocation"]
- loc_match = re.search(r"^(\w.+):(\d+)\.+(\d+)$", ddl)
+ loc_match = re.search(
+ r"^([^:]+):(\d+)\.+(\d+)$", ddl
+ ) # was re.search(r"^(\w.+):(\d+)\.+(\d+)$"
if loc_match:
refName = loc_match.group(1)
start = int(loc_match.group(2))
@@ -1190,7 +1192,7 @@
start = 0
end = 10000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708
logging.info(
- "@@@ no defaultlocation found for default session - suggest adding one!"
+ "@@@ no defaultlocation found for default session - please add one"
)
if refName is not None:
@@ -1211,7 +1213,7 @@
)
else:
logging.info(
- "@@@ no assembly name found default session - suggest adding one!"
+ "@@@ no contig name found for default session - please add one!"
)
session_name = data.get("session_name", "New session")
for key, value in mapped_chars.items():
diff -r 247e17ce504b -r 1d86925dbb4c macros.xml
--- a/macros.xml Sat Jan 20 10:04:08 2024 +0000
+++ b/macros.xml Mon Jan 22 11:52:19 2024 +0000
@@ -28,7 +28,6 @@