comparison jbrowse2.py @ 0:53c2be00bb6f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 0a86c88a95b0d1cc49d84544136de6556b95320f
author bgruening
date Wed, 05 Jun 2024 08:15:49 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:53c2be00bb6f
1 #!/usr/bin/env python
2
3 import argparse
4 import binascii
5 import datetime
6 # import hashlib
7 import json
8 import logging
9 import os
10 import re
11 import shutil
12 import ssl
13 import struct
14 import subprocess
15 import tempfile
16 import urllib.request
17 import xml.etree.ElementTree as ET
18 from collections import defaultdict
19
20 logging.basicConfig(level=logging.DEBUG)
21 log = logging.getLogger("jbrowse")
22
23 JB2VER = "v2.11.0"
24 # version pinned if cloning - but not used until now
25 logCommands = True
26 # useful for seeing what's being written but not for production setups
27 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
28 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__))
29 GALAXY_INFRASTRUCTURE_URL = None
30 mapped_chars = {
31 ">": "__gt__",
32 "<": "__lt__",
33 "'": "__sq__",
34 '"': "__dq__",
35 "[": "__ob__",
36 "]": "__cb__",
37 "{": "__oc__",
38 "}": "__cc__",
39 "@": "__at__",
40 "#": "__pd__",
41 "": "__cn__",
42 }
43
44
45 INDEX_TEMPLATE = """<!doctype html>
46 <html lang="en" style="height:100%">
47 <head>
48 <meta charset="utf-8"/>
49 <link rel="shortcut icon" href="./favicon.ico"/>
50 <meta name="viewport" content="width=device-width,initial-scale=1"/>
51 <meta name="theme-color" content="#000000"/>
52 <meta name="description" content="A fast and flexible genome browser"/>
53 <link rel="manifest" href="./manifest.json"/>
54 <title>JBrowse</title>
55 </script>
56 </head>
57 <body style="overscroll-behavior:none; height:100%; margin: 0;">
58 <iframe
59 id="jbframe"
60 title="JBrowse2"
61 frameborder="0"
62 width="100%"
63 height="100%"
64 src='index_noview.html?config=config.json__SESSION_SPEC__'>
65 </iframe>
66 </body>
67 </html>
68 """
69
70
71 class ColorScaling(object):
72
73 COLOR_FUNCTION_TEMPLATE = """
74 function(feature, variableName, glyphObject, track) {{
75 var score = {score};
76 {opacity}
77 return 'rgba({red}, {green}, {blue}, ' + opacity + ')';
78 }}
79 """
80
81 COLOR_FUNCTION_TEMPLATE_QUAL = r"""
82 function(feature, variableName, glyphObject, track) {{
83 var search_up = function self(sf, attr){{
84 if(sf.get(attr) !== undefined){{
85 return sf.get(attr);
86 }}
87 if(sf.parent() === undefined) {{
88 return;
89 }}else{{
90 return self(sf.parent(), attr);
91 }}
92 }};
93
94 var search_down = function self(sf, attr){{
95 if(sf.get(attr) !== undefined){{
96 return sf.get(attr);
97 }}
98 if(sf.children() === undefined) {{
99 return;
100 }}else{{
101 var kids = sf.children();
102 for(var child_idx in kids){{
103 var x = self(kids[child_idx], attr);
104 if(x !== undefined){{
105 return x;
106 }}
107 }}
108 return;
109 }}
110 }};
111
112 var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color});
113 var score = (search_up(feature, 'score') || search_down(feature, 'score'));
114 {opacity}
115 if(score === undefined){{ opacity = 1; }}
116 var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color);
117 var red = parseInt(result[1], 16);
118 var green = parseInt(result[2], 16);
119 var blue = parseInt(result[3], 16);
120 if(isNaN(opacity) || opacity < 0){{ opacity = 0; }}
121 return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')';
122 }}
123 """
124
125 OPACITY_MATH = {
126 "linear": """
127 var opacity = (score - ({min})) / (({max}) - ({min}));
128 """,
129 "logarithmic": """
130 var opacity = Math.log10(score - ({min})) / Math.log10(({max}) - ({min}));
131 """,
132 "blast": """
133 var opacity = 0;
134 if(score == 0.0) {{
135 opacity = 1;
136 }} else {{
137 opacity = (20 - Math.log10(score)) / 180;
138 }}
139 """,
140 }
141
142 BREWER_COLOUR_IDX = 0
143 BREWER_COLOUR_SCHEMES = [
144 (166, 206, 227),
145 (31, 120, 180),
146 (178, 223, 138),
147 (51, 160, 44),
148 (251, 154, 153),
149 (227, 26, 28),
150 (253, 191, 111),
151 (255, 127, 0),
152 (202, 178, 214),
153 (106, 61, 154),
154 (255, 255, 153),
155 (177, 89, 40),
156 (228, 26, 28),
157 (55, 126, 184),
158 (77, 175, 74),
159 (152, 78, 163),
160 (255, 127, 0),
161 ]
162
163 BREWER_DIVERGING_PALLETES = {
164 "BrBg": ("#543005", "#003c30"),
165 "PiYg": ("#8e0152", "#276419"),
166 "PRGn": ("#40004b", "#00441b"),
167 "PuOr": ("#7f3b08", "#2d004b"),
168 "RdBu": ("#67001f", "#053061"),
169 "RdGy": ("#67001f", "#1a1a1a"),
170 "RdYlBu": ("#a50026", "#313695"),
171 "RdYlGn": ("#a50026", "#006837"),
172 "Spectral": ("#9e0142", "#5e4fa2"),
173 }
174
175 def __init__(self):
176 self.brewer_colour_idx = 0
177
178 def rgb_from_hex(self, hexstr):
179 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
180 return struct.unpack("BBB", binascii.unhexlify(hexstr))
181
182 def min_max_gff(self, gff_file):
183 min_val = None
184 max_val = None
185 with open(gff_file, "r") as handle:
186 for line in handle:
187 try:
188 value = float(line.split("\t")[5])
189 min_val = min(value, (min_val or value))
190 max_val = max(value, (max_val or value))
191
192 if value < min_val:
193 min_val = value
194
195 if value > max_val:
196 max_val = value
197 except Exception:
198 pass
199 return min_val, max_val
200
201 def hex_from_rgb(self, r, g, b):
202 return "#%02x%02x%02x" % (r, g, b)
203
204 def _get_colours(self):
205 r, g, b = self.BREWER_COLOUR_SCHEMES[
206 self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)
207 ]
208 self.brewer_colour_idx += 1
209 return r, g, b
210
211 def parse_menus(self, track):
212 trackConfig = {"menuTemplate": [{}, {}, {}, {}]}
213
214 if "menu" in track["menus"]:
215 menu_list = [track["menus"]["menu"]]
216 if isinstance(track["menus"]["menu"], list):
217 menu_list = track["menus"]["menu"]
218
219 for m in menu_list:
220 tpl = {
221 "action": m["action"],
222 "label": m.get("label", "{name}"),
223 "iconClass": m.get("iconClass", "dijitIconBookmark"),
224 }
225 if "url" in m:
226 tpl["url"] = m["url"]
227 if "content" in m:
228 tpl["content"] = m["content"]
229 if "title" in m:
230 tpl["title"] = m["title"]
231
232 trackConfig["menuTemplate"].append(tpl)
233
234 return trackConfig
235
236 def parse_colours(self, track, trackFormat, gff3=None):
237 # Wiggle tracks have a bicolor pallete
238 trackConfig = {"style": {}}
239 if trackFormat == "wiggle":
240
241 trackConfig["style"]["pos_color"] = track["wiggle"]["color_pos"]
242 trackConfig["style"]["neg_color"] = track["wiggle"]["color_neg"]
243
244 if trackConfig["style"]["pos_color"] == "__auto__":
245 trackConfig["style"]["neg_color"] = self.hex_from_rgb(
246 *self._get_colours()
247 )
248 trackConfig["style"]["pos_color"] = self.hex_from_rgb(
249 *self._get_colours()
250 )
251
252 # Wiggle tracks can change colour at a specified place
253 bc_pivot = track["wiggle"]["bicolor_pivot"]
254 if bc_pivot not in ("mean", "zero"):
255 # The values are either one of those two strings
256 # or a number
257 bc_pivot = float(bc_pivot)
258 trackConfig["bicolor_pivot"] = bc_pivot
259 elif "scaling" in track:
260 if track["scaling"]["method"] == "ignore":
261 if track["scaling"]["scheme"]["color"] != "__auto__":
262 trackConfig["style"]["color"] = track["scaling"]["scheme"]["color"]
263 else:
264 trackConfig["style"]["color"] = self.hex_from_rgb(
265 *self._get_colours()
266 )
267 else:
268 # Scored method
269 algo = track["scaling"]["algo"]
270 # linear, logarithmic, blast
271 scales = track["scaling"]["scales"]
272 # type __auto__, manual (min, max)
273 scheme = track["scaling"]["scheme"]
274 # scheme -> (type (opacity), color)
275 # ==================================
276 # GENE CALLS OR BLAST
277 # ==================================
278 if trackFormat == "blast":
279 red, green, blue = self._get_colours()
280 color_function = self.COLOR_FUNCTION_TEMPLATE.format(
281 **{
282 "score": "feature._parent.get('score')",
283 "opacity": self.OPACITY_MATH["blast"],
284 "red": red,
285 "green": green,
286 "blue": blue,
287 }
288 )
289 trackConfig["style"]["color"] = color_function.replace("\n", "")
290 elif trackFormat == "gene_calls":
291 # Default values, based on GFF3 spec
292 min_val = 0
293 max_val = 1000
294 # Get min/max and build a scoring function since JBrowse doesn't
295 if scales["type"] == "automatic" or scales["type"] == "__auto__":
296 min_val, max_val = self.min_max_gff(gff3)
297 else:
298 min_val = scales.get("min", 0)
299 max_val = scales.get("max", 1000)
300
301 if scheme["color"] == "__auto__":
302 user_color = "undefined"
303 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
304 elif scheme["color"].startswith("#"):
305 user_color = "'%s'" % self.hex_from_rgb(
306 *self.rgb_from_hex(scheme["color"][1:])
307 )
308 auto_color = "undefined"
309 else:
310 user_color = "undefined"
311 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
312
313 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(
314 **{
315 "opacity": self.OPACITY_MATH[algo].format(
316 **{"max": max_val, "min": min_val}
317 ),
318 "user_spec_color": user_color,
319 "auto_gen_color": auto_color,
320 }
321 )
322
323 trackConfig["style"]["color"] = color_function.replace("\n", "")
324 return trackConfig
325
326
327 def etree_to_dict(t):
328 if t is None:
329 return {}
330
331 d = {t.tag: {} if t.attrib else None}
332 children = list(t)
333 if children:
334 dd = defaultdict(list)
335 for dc in map(etree_to_dict, children):
336 for k, v in dc.items():
337 dd[k].append(v)
338 d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
339 if t.attrib:
340 d[t.tag].update(("@" + k, v) for k, v in t.attrib.items())
341 if t.text:
342 text = t.text.strip()
343 if children or t.attrib:
344 if text:
345 d[t.tag]["#text"] = text
346 else:
347 d[t.tag] = text
348 return d
349
350
351 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
352
353
354 def metadata_from_node(node):
355 metadata = {}
356 try:
357 if len(node.findall("dataset")) != 1:
358 # exit early
359 return metadata
360 except Exception:
361 return {}
362
363 for (key, value) in node.findall("dataset")[0].attrib.items():
364 metadata["dataset_%s" % key] = value
365
366 if node.findall("history"):
367 for (key, value) in node.findall("history")[0].attrib.items():
368 metadata["history_%s" % key] = value
369
370 if node.findall("metadata"):
371 for (key, value) in node.findall("metadata")[0].attrib.items():
372 metadata["metadata_%s" % key] = value
373 # Additional Mappings applied:
374 metadata[
375 "dataset_edam_format"
376 ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(
377 metadata["dataset_edam_format"], metadata["dataset_file_ext"]
378 )
379 metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format(
380 metadata["history_user_email"]
381 )
382 metadata["hist_name"] = metadata["history_display_name"]
383 metadata[
384 "history_display_name"
385 ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
386 galaxy=GALAXY_INFRASTRUCTURE_URL,
387 encoded_hist_id=metadata.get("history_id", "not available"),
388 hist_name=metadata.get("history_display_name", "not available"),
389 )
390 if node.findall("tool"):
391 for (key, value) in node.findall("tool")[0].attrib.items():
392 metadata["tool_%s" % key] = value
393 metadata[
394 "tool_tool"
395 ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}{tool_version}</a>'.format(
396 galaxy=GALAXY_INFRASTRUCTURE_URL,
397 encoded_id=metadata.get("dataset_id", ""),
398 tool_id=metadata.get("tool_tool_id", ""),
399 tool_version=metadata.get("tool_tool_version", ""),
400 )
401 return metadata
402
403
404 class JbrowseConnector(object):
405 def __init__(self, outdir, jbrowse2path):
406 self.trackCounter = 0 # to avoid name clashes
407 self.assemblies = [] # these require more than a few line diff.
408 self.assmeta = {}
409 self.ass_first_contigs = (
410 []
411 ) # for default session - these are read as first line of the assembly .fai
412 self.giURL = GALAXY_INFRASTRUCTURE_URL
413 self.outdir = outdir
414 self.jbrowse2path = jbrowse2path
415 os.makedirs(self.outdir, exist_ok=True)
416 self.genome_names = []
417 self.trackIdlist = []
418 self.tracksToAdd = {}
419 self.config_json = {}
420 self.config_json_file = os.path.join(outdir, "config.json")
421 self.clone_jbrowse()
422
423 def get_cwd(self, cwd):
424 if cwd:
425 return self.outdir
426 else:
427 return subprocess.check_output(["pwd"]).decode("utf-8").strip()
428
429 def subprocess_check_call(self, command, output=None, cwd=True):
430 if output:
431 if logCommands:
432 log.debug(
433 "cd %s && %s > %s", self.get_cwd(cwd), " ".join(command), output
434 )
435 subprocess.check_call(command, cwd=self.get_cwd(cwd), stdout=output)
436 else:
437 if logCommands:
438 log.debug("cd %s && %s", self.get_cwd(cwd), " ".join(command))
439 subprocess.check_call(command, cwd=self.get_cwd(cwd))
440
441 def subprocess_popen(self, command, cwd=True):
442 if logCommands:
443 log.debug(command)
444 p = subprocess.Popen(
445 command,
446 cwd=self.get_cwd(cwd),
447 shell=True,
448 stdin=subprocess.PIPE,
449 stdout=subprocess.PIPE,
450 stderr=subprocess.PIPE,
451 )
452 output, err = p.communicate()
453 retcode = p.returncode
454 if retcode != 0:
455 log.error(command)
456 log.error(output)
457 log.error(err)
458 raise RuntimeError(f"Command ( {command} ) failed with exit code {retcode}")
459
460 def subprocess_check_output(self, command):
461 if logCommands:
462 log.debug(" ".join(command))
463 return subprocess.check_output(command, cwd=self.outdir)
464
465 def symlink_or_copy(self, src, dest):
466 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool(
467 os.environ["GALAXY_JBROWSE_SYMLINKS"]
468 ):
469 cmd = ["ln", "-s", src, dest]
470 else:
471 cmd = ["cp", src, dest]
472
473 return self.subprocess_check_call(cmd)
474
475 def _prepare_track_style(self, trackDict):
476
477 style_data = {
478 "type": "LinearBasicDisplay",
479 "displayId": "%s-LinearBasicDisplay" % trackDict["trackId"],
480 }
481
482 if trackDict.get("displays", None): # use first if multiple like bed
483 style_data["type"] = trackDict["displays"][0]["type"]
484 style_data["displayId"] = trackDict["displays"][0]["displayId"]
485 return style_data
486
487 def getNrow(self, url):
488 useuri = url.startswith("https://") or url.startswith("http://")
489 if not useuri:
490 fl = open(url, "r").readlines()
491 nrow = len(fl)
492 else:
493 try:
494 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
495 scontext.check_hostname = False
496 scontext.verify_mode = ssl.VerifyMode.CERT_NONE
497 with urllib.request.urlopen(url, context=scontext) as f:
498 fl = f.readlines()
499 nrow = len(fl)
500 except Exception:
501 nrow = 0
502 logging.debug("getNrow %s returning %d" % (url, nrow))
503 return nrow
504
505 def process_genomes(self, genomes):
506 assembly = []
507 assmeta = []
508 useuri = False
509 primaryGenome = None
510 for i, genome_node in enumerate(genomes):
511 this_genome = {}
512 if genome_node["useuri"] == "yes":
513 useuri = True
514 genome_name = genome_node["label"].strip()
515 if len(genome_name) == 0:
516 genome_name = os.path.splitext(os.path.basename(genome_node["path"]))[0]
517 if len(genome_name.split()) > 1:
518 genome_name = genome_name.split()[0]
519 # spaces and cruft break scripts when substituted
520 if not primaryGenome:
521 primaryGenome = genome_name
522 if genome_name not in self.genome_names:
523 self.genome_names.append(genome_name)
524 fapath = genome_node["path"]
525 if not useuri:
526 fapath = os.path.realpath(fapath)
527 assem, first_contig = self.make_assembly(fapath, genome_name, useuri)
528 assembly.append(assem)
529 self.ass_first_contigs.append(first_contig)
530 if genome_name == primaryGenome: # first one
531 this_genome["genome_name"] = genome_name # first one for all tracks
532 this_genome["genome_sequence_adapter"] = assem["sequence"][
533 "adapter"
534 ]
535 this_genome["genome_firstcontig"] = first_contig
536 assmeta.append(this_genome)
537 self.assemblies += assembly
538 self.assmeta[primaryGenome] = assmeta
539 self.tracksToAdd[primaryGenome] = []
540 return primaryGenome
541
542 def make_assembly(self, fapath, gname, useuri):
543 if useuri:
544 faname = fapath
545 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
546 scontext.check_hostname = False
547 scontext.verify_mode = ssl.VerifyMode.CERT_NONE
548 with urllib.request.urlopen(url=faname + ".fai", context=scontext) as f:
549 fl = f.readline()
550 contig = fl.decode("utf8").strip()
551 # Merlin 172788 8 60 61
552 else:
553 faname = gname + ".fa.gz"
554 fadest = os.path.realpath(os.path.join(self.outdir, faname))
555 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % (
556 fapath,
557 fadest,
558 fadest,
559 fadest,
560 )
561 self.subprocess_popen(cmd)
562 contig = open(fadest + ".fai", "r").readline().strip()
563 adapter = {
564 "type": "BgzipFastaAdapter",
565 "fastaLocation": {
566 "uri": faname,
567 },
568 "faiLocation": {
569 "uri": faname + ".fai",
570 },
571 "gziLocation": {
572 "uri": faname + ".gzi",
573 },
574 }
575 first_contig = contig.split()[:2]
576 first_contig.insert(0, gname)
577 trackDict = {
578 "name": gname,
579 "sequence": {
580 "type": "ReferenceSequenceTrack",
581 "trackId": gname,
582 "adapter": adapter,
583 },
584 "displays": [
585 {
586 "type": "LinearReferenceSequenceDisplay",
587 "displayId": "%s-LinearReferenceSequenceDisplay" % gname,
588 },
589 {
590 "type": "LinearGCContentDisplay",
591 "displayId": "%s-LinearGCContentDisplay" % gname,
592 },
593 ],
594 }
595 return (trackDict, first_contig)
596
597 def add_default_view(self):
598 cmd = [
599 "jbrowse",
600 "set-default-session",
601 "-s",
602 self.config_json_file,
603 "-t",
604 ",".join(self.trackIdlist),
605 "-n",
606 "JBrowse2 in Galaxy",
607 "--target",
608 self.config_json_file,
609 "-v",
610 " LinearGenomeView",
611 ]
612 self.subprocess_check_call(cmd)
613
614 def write_config(self):
615 with open(self.config_json_file, "w") as fp:
616 json.dump(self.config_json, fp, indent=2)
617
618 def text_index(self):
619 # Index tracks
620 args = [
621 "jbrowse",
622 "text-index",
623 "--target",
624 self.outdir,
625 "--assemblies",
626 self.genome_name,
627 ]
628
629 tracks = ",".join(self.trackIdlist)
630 if tracks:
631 args += ["--tracks", tracks]
632
633 self.subprocess_check_call(args)
634
635 def add_hic(self, data, trackData):
636 """
637 HiC adapter.
638 https://github.com/aidenlab/hic-format/blob/master/HiCFormatV9.md
639 for testing locally, these work:
640 HiC data is from https://s3.amazonaws.com/igv.broadinstitute.org/data/hic/intra_nofrag_30.hic
641 using hg19 reference track as a
642 'BgzipFastaAdapter'
643 fastaLocation:
644 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz',
645 faiLocation:
646 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai',
647 gziLocation:
648 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi',
649 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438
650
651 """
652 tId = trackData["label"]
653 wasCool = trackData["wasCool"]
654 # can be served - if public.
655 # dsId = trackData["metadata"]["dataset_id"]
656 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId)
657 useuri = trackData["useuri"].lower() == "yes"
658 logging.debug("wasCool=%s, data=%s, tId=%s" % (wasCool, data, tId))
659 if useuri:
660 uri = data
661 else:
662 uri = tId + ".hic"
663 if not wasCool:
664 dest = os.path.join(self.outdir, uri)
665 if not os.path.exists(dest):
666 cmd = ["cp", data, dest]
667 self.subprocess_check_call(cmd)
668 else:
669 logging.error("not wasCool but %s exists" % dest)
670 categ = trackData["category"]
671 trackDict = {
672 "type": "HicTrack",
673 "trackId": tId,
674 "name": trackData["name"],
675 "assemblyNames": [trackData["assemblyNames"]],
676 "category": [
677 categ,
678 ],
679 "adapter": {"type": "HicAdapter", "hicLocation": {"uri": uri}},
680 }
681 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
682 self.trackIdlist.append(tId)
683
684 def add_maf(self, data, trackData):
685 """
686 from https://github.com/cmdcolin/maf2bed
687 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name
688 e.g. hg38.chr1 in the sequence identifiers.
689 need the reference id - eg hg18, for maf2bed.pl as the first parameter
690 """
691 tId = trackData["label"]
692 mafPlugin = {
693 "plugins": [
694 {
695 "name": "MafViewer",
696 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js",
697 }
698 ]
699 }
700 categ = trackData["category"]
701 fname = f"{tId}"
702 dest = os.path.join(self.outdir, fname)
703 gname = trackData["assemblyNames"]
704
705 cmd = [
706 "bash",
707 os.path.join(INSTALLED_TO, "convertMAF.sh"),
708 data,
709 gname,
710 INSTALLED_TO,
711 dest,
712 ]
713 self.subprocess_check_call(cmd)
714 mafs = open(data, "r").readlines()
715 mafss = [x for x in mafs if (x.startswith("s\t") or x.startswith("s "))]
716 samp = [x.split()[1] for x in mafss if len(x.split()) > 0]
717 sampu = list(dict.fromkeys(samp))
718 samples = [x.split(".")[0] for x in sampu]
719 samples.sort()
720 if logCommands:
721 logging.debug(
722 "$$$$ cmd=%s, mafss=%s samp=%s samples=%s"
723 % (" ".join(cmd), mafss, samp, samples)
724 )
725 trackDict = {
726 "type": "MafTrack",
727 "trackId": tId,
728 "name": trackData["name"],
729 "category": [
730 categ,
731 ],
732 "adapter": {
733 "type": "MafTabixAdapter",
734 "samples": samples,
735 "bedGzLocation": {
736 "uri": fname + ".sorted.bed.gz",
737 },
738 "index": {
739 "location": {
740 "uri": fname + ".sorted.bed.gz.tbi",
741 },
742 },
743 },
744 "assemblyNames": [trackData["assemblyNames"]],
745 "displays": [
746 {
747 "type": "LinearBasicDisplay",
748 "displayId": "%s-LinearBasicDisplay" % tId,
749 },
750 {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},
751 ],
752 }
753 style_json = self._prepare_track_style(trackDict)
754 trackDict["style"] = style_json
755 self.tracksToAdd[gname].append(trackDict)
756 self.trackIdlist.append(tId)
757 if self.config_json.get("plugins", None):
758 self.config_json["plugins"].append(mafPlugin[0])
759 else:
760 self.config_json.update(mafPlugin)
761
762 def _blastxml_to_gff3(self, xml, min_gap=10):
763 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
764 cmd = [
765 "python",
766 os.path.join(INSTALLED_TO, "blastxml_to_gapped_gff3.py"),
767 "--trim",
768 "--trim_end",
769 "--include_seq",
770 "--min_gap",
771 str(min_gap),
772 xml,
773 ]
774 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
775 gff3_unrebased.close()
776 logging.debug("### blastxml to gff3 cmd = %s" % " ".join(cmd))
777 return gff3_unrebased.name
778
779 def add_blastxml(self, data, trackData, blastOpts, **kwargs):
780 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"])
781 if "parent" in blastOpts and blastOpts["parent"] != "None":
782 gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
783 cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")]
784 if blastOpts.get("protein", "false") == "true":
785 cmd.append("--protein2dna")
786 cmd.extend([os.path.realpath(blastOpts["parent"]), gff3])
787 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
788 logging.debug("### gff3rebase cmd = %s" % " ".join(cmd))
789 gff3_rebased.close()
790 # Replace original gff3 file
791 shutil.copy(gff3_rebased.name, gff3)
792 os.unlink(gff3_rebased.name)
793 self.add_gff(gff3, trackData, **kwargs)
794
795 def add_bigwig(self, data, trackData):
796 tId = trackData["label"]
797 useuri = trackData["useuri"].lower() == "yes"
798 if useuri:
799 url = data
800 else:
801 url = tId
802 # slashes in names cause path trouble
803 dest = os.path.join(self.outdir, url)
804 cmd = ["cp", data, dest]
805 self.subprocess_check_call(cmd)
806 bwloc = {"uri": url}
807 categ = trackData["category"]
808 trackDict = {
809 "type": "QuantitativeTrack",
810 "trackId": tId,
811 "name": trackData["name"],
812 "category": [
813 categ,
814 ],
815 "assemblyNames": [trackData["assemblyNames"]],
816 "adapter": {
817 "type": "BigWigAdapter",
818 "bigWigLocation": bwloc,
819 },
820 "displays": [
821 {
822 "type": "LinearWiggleDisplay",
823 "displayId": "%s-LinearWiggleDisplay" % tId,
824 }
825 ],
826 }
827 style_json = self._prepare_track_style(trackDict)
828 trackDict["style"] = style_json
829 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
830 self.trackIdlist.append(tId)
831
832 def add_bam(self, data, trackData, bam_indexes=None, **kwargs):
833 tId = trackData["label"]
834 realFName = trackData["path"]
835 useuri = trackData["useuri"].lower() == "yes"
836 categ = trackData["category"]
837 if useuri:
838 url = data
839 else:
840 fname = tId
841 dest = "%s/%s" % (self.outdir, fname)
842 self.subprocess_check_call(["cp", data, dest])
843 url = fname
844 bindex = fname + ".bai"
845 bi = bam_indexes.split(",")
846 bam_index = [
847 x.split(" ~ ")[1].strip()
848 for x in bi
849 if " ~ " in x and x.split(" ~ ")[0].strip() == realFName
850 ]
851 logging.debug(
852 "===realFName=%s got %s as bam_indexes %s as bi, %s for bam_index"
853 % (realFName, bam_indexes, bi, bam_index)
854 )
855 if len(bam_index) > 0 and os.path.exists(os.path.realpath(bam_index[0])):
856 self.subprocess_check_call(["cp", bam_index[0], bindex])
857 else:
858 cmd = ["samtools", "index", "-b", "-o", bindex, data]
859 self.subprocess_check_call(cmd)
860 trackDict = {
861 "type": "AlignmentsTrack",
862 "trackId": tId,
863 "name": trackData["name"],
864 "category": [
865 categ,
866 ],
867 "assemblyNames": [trackData["assemblyNames"]],
868 "adapter": {
869 "type": "BamAdapter",
870 "bamLocation": {"uri": url},
871 "index": {
872 "location": {
873 "uri": bindex,
874 }
875 },
876 },
877 "displays": [
878 {
879 "type": "LinearAlignmentsDisplay",
880 "displayId": "%s-LinearAlignmentsDisplay" % tId,
881 },
882 ],
883 }
884 style_json = self._prepare_track_style(trackDict)
885 trackDict["style"] = style_json
886 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
887 self.trackIdlist.append(tId)
888
889 def add_cram(self, data, trackData, cram_indexes=None, **kwargs):
890 tId = trackData["label"]
891 realFName = trackData["path"]
892 categ = trackData["category"]
893 useuri = trackData["useuri"].lower() == "yes"
894 gsa = self.assmeta.get(trackData["assemblyNames"], None)
895 if gsa:
896 genseqad = gsa[0]["genome_sequence_adapter"]
897 else:
898 genseqad = "Not found"
899 logging.warning("No adapter found for cram %s in gsa=%s" % (tId, gsa))
900 if useuri:
901 url = data
902 else:
903 fname = tId
904 dest = os.path.join(self.outdir, fname)
905 url = fname
906 self.subprocess_check_call(["cp", data, dest])
907 ci = cram_indexes.split(",")
908 cram_index = [
909 x.split(" ~ ")[1].strip()
910 for x in ci
911 if " ~ " in x and x.split(" ~ ")[0].strip() == realFName
912 ]
913 logging.debug(
914 "===realFName=%s got %s as cram_indexes %s as ci, %s for cram_index"
915 % (realFName, cram_indexes, ci, cram_index)
916 )
917 if len(cram_index) > 0 and os.path.exists(cram_index[0]):
918 if not os.path.exists(dest + ".crai"):
919 # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
920 self.subprocess_check_call(
921 ["cp", os.path.realpath(cram_index[0]), dest + ".crai"]
922 )
923 else:
924 cpath = os.path.realpath(dest) + ".crai"
925 cmd = ["samtools", "index", "-c", "-o", cpath, os.path.realpath(dest)]
926 self.subprocess_check_call(cmd)
927 trackDict = {
928 "type": "AlignmentsTrack",
929 "trackId": tId,
930 "name": trackData["name"],
931 "category": [
932 categ,
933 ],
934 "assemblyNames": [trackData["assemblyNames"]],
935 "adapter": {
936 "type": "CramAdapter",
937 "cramLocation": {"uri": url},
938 "craiLocation": {
939 "uri": url + ".crai",
940 },
941 "sequenceAdapter": genseqad,
942 },
943 "displays": [
944 {
945 "type": "LinearAlignmentsDisplay",
946 "displayId": "%s-LinearAlignmentsDisplay" % tId,
947 },
948 ],
949 }
950 style_json = self._prepare_track_style(trackDict)
951 trackDict["style"] = style_json
952 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
953 self.trackIdlist.append(tId)
954
955 def add_vcf(self, data, trackData):
956 tId = trackData["label"]
957 categ = trackData["category"]
958 useuri = trackData["useuri"].lower() == "yes"
959 if useuri:
960 url = data
961 else:
962 url = tId
963 dest = os.path.join(self.outdir, url)
964 cmd = "bgzip -c %s > %s" % (data, dest)
965 self.subprocess_popen(cmd)
966 cmd = ["tabix", "-f", "-p", "vcf", dest]
967 self.subprocess_check_call(cmd)
968 trackDict = {
969 "type": "VariantTrack",
970 "trackId": tId,
971 "name": trackData["name"],
972 "assemblyNames": [trackData["assemblyNames"]],
973 "category": [
974 categ,
975 ],
976 "adapter": {
977 "type": "VcfTabixAdapter",
978 "vcfGzLocation": {"uri": url},
979 "index": {
980 "location": {
981 "uri": url + ".tbi",
982 }
983 },
984 },
985 "displays": [
986 {
987 "type": "LinearVariantDisplay",
988 "displayId": "%s-LinearVariantDisplay" % tId,
989 },
990 {
991 "type": "ChordVariantDisplay",
992 "displayId": "%s-ChordVariantDisplay" % tId,
993 },
994 {
995 "type": "LinearPairedArcDisplay",
996 "displayId": "%s-LinearPairedArcDisplay" % tId,
997 },
998 ],
999 }
1000 style_json = self._prepare_track_style(trackDict)
1001 trackDict["style"] = style_json
1002 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
1003 self.trackIdlist.append(tId)
1004
1005 def _sort_gff(self, data, dest):
1006 # Only index if not already done
1007 if not os.path.exists(dest):
1008 cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s'" % (
1009 data,
1010 dest,
1011 )
1012 self.subprocess_popen(cmd)
1013 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest])
1014
1015 def _sort_bed(self, data, dest):
1016 # Only index if not already done
1017 if not os.path.exists(dest):
1018 cmd = "sort -k1,1 -k2,2n '%s' | bgzip -c > '%s'" % (data, dest)
1019 self.subprocess_popen(cmd)
1020 cmd = ["tabix", "-f", "-p", "bed", dest]
1021 self.subprocess_check_call(cmd)
1022
1023 def add_gff(self, data, trackData):
1024 tId = trackData["label"]
1025 useuri = trackData["useuri"].lower() == "yes"
1026 if useuri:
1027 url = trackData["path"]
1028 else:
1029 url = tId + ".gz"
1030 dest = os.path.join(self.outdir, url)
1031 self._sort_gff(data, dest)
1032 categ = trackData["category"]
1033 trackDict = {
1034 "type": "FeatureTrack",
1035 "trackId": tId,
1036 "name": trackData["name"],
1037 "assemblyNames": [trackData["assemblyNames"]],
1038 "category": [
1039 categ,
1040 ],
1041 "adapter": {
1042 "type": "Gff3TabixAdapter",
1043 "gffGzLocation": {
1044 "uri": url,
1045 },
1046 "index": {
1047 "location": {
1048 "uri": url + ".tbi",
1049 }
1050 },
1051 },
1052 "displays": [
1053 {
1054 "type": "LinearBasicDisplay",
1055 "displayId": "%s-LinearBasicDisplay" % tId,
1056 },
1057 {
1058 "type": "LinearArcDisplay",
1059 "displayId": "%s-LinearArcDisplay" % tId,
1060 },
1061 ],
1062 }
1063 style_json = self._prepare_track_style(trackDict)
1064 trackDict["style"] = style_json
1065 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
1066 self.trackIdlist.append(tId)
1067
1068 def add_bed(self, data, ext, trackData):
1069 tId = trackData["label"]
1070 categ = trackData["category"]
1071 useuri = trackData["useuri"].lower() == "yes"
1072 if useuri:
1073 url = data
1074 else:
1075 url = tId + ".gz"
1076 dest = os.path.join(self.outdir, url)
1077 self._sort_bed(data, dest)
1078 trackDict = {
1079 "type": "FeatureTrack",
1080 "trackId": tId,
1081 "name": trackData["name"],
1082 "assemblyNames": [trackData["assemblyNames"]],
1083 "adapter": {
1084 "category": [
1085 categ,
1086 ],
1087 "type": "BedTabixAdapter",
1088 "bedGzLocation": {
1089 "uri": url,
1090 },
1091 "index": {
1092 "location": {
1093 "uri": url + ".tbi",
1094 }
1095 },
1096 },
1097 "displays": [
1098 {
1099 "type": "LinearBasicDisplay",
1100 "displayId": "%s-LinearBasicDisplay" % tId,
1101 },
1102 {
1103 "type": "LinearPileupDisplay",
1104 "displayId": "%s-LinearPileupDisplay" % tId,
1105 },
1106 {
1107 "type": "LinearArcDisplay",
1108 "displayId": "%s-LinearArcDisplay" % tId,
1109 },
1110 ],
1111 }
1112 style_json = self._prepare_track_style(trackDict)
1113 trackDict["style"] = style_json
1114 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
1115 self.trackIdlist.append(tId)
1116
1117 def add_paf(self, data, trackData, pafOpts, **kwargs):
1118 tname = trackData["name"]
1119 tId = trackData["label"]
1120 url = tId
1121 useuri = data.startswith("http://") or data.startswith("https://")
1122 if not useuri:
1123 dest = os.path.join(self.outdir, url)
1124 self.symlink_or_copy(os.path.realpath(data), dest)
1125 nrow = self.getNrow(dest)
1126 else:
1127 url = data
1128 nrow = self.getNrow(url)
1129 categ = trackData["category"]
1130 pg = pafOpts["genome"].split(",")
1131 pgc = [x.strip() for x in pg if x.strip() > ""]
1132 gnomes = [x.split(" ~ ") for x in pgc]
1133 logging.debug("pg=%s, gnomes=%s" % (pg, gnomes))
1134 passnames = [trackData["assemblyNames"]] # always first
1135 for i, (gpath, gname) in enumerate(gnomes):
1136 # may have been forgotten by user for uri
1137 if len(gname) == 0:
1138 gn = os.path.basename(gpath)
1139 gname = os.path.splitext(gn)[0]
1140 # trouble from spacey names in command lines avoidance
1141 if len(gname.split()) > 1:
1142 gname = gname.split()[0]
1143 if gname not in passnames:
1144 passnames.append(gname)
1145 useuri = pafOpts["useuri"] == "true"
1146 if gname not in self.genome_names:
1147 # ignore if already there - eg for duplicates among pafs.
1148 asstrack, first_contig = self.make_assembly(gpath, gname, useuri)
1149 self.genome_names.append(gname)
1150 self.tracksToAdd[gname] = []
1151 self.assemblies.append(asstrack)
1152 self.ass_first_contigs.append(first_contig)
1153 trackDict = {
1154 "type": "SyntenyTrack",
1155 "trackId": tId,
1156 "assemblyNames": passnames,
1157 "category": [
1158 categ,
1159 ],
1160 "name": tname,
1161 "adapter": {
1162 "type": "PAFAdapter",
1163 "pafLocation": {"uri": url},
1164 "assemblyNames": passnames,
1165 },
1166 "displays": [
1167 {
1168 "type": "LGVSyntenyDisplay",
1169 "displayId": "%s-LGVSyntenyDisplay" % tId,
1170 },
1171 {
1172 "type": "DotplotDisplay",
1173 "displayId": "%s-DotplotDisplay" % tId,
1174 },
1175 {
1176 "type": "LinearComparativeDisplay",
1177 "displayId": "%s-LinearComparativeDisplay" % tId,
1178 },
1179 {
1180 "type": "LinearBasicDisplay",
1181 "displayId": "%s-LinearSyntenyDisplay" % tId,
1182 },
1183 ],
1184 }
1185 if nrow > 10000:
1186 style_json = {
1187 "type": "LGVSyntenyDisplay",
1188 "displayId": "%s-LGVSyntenyDisplay" % tId,
1189 }
1190 else:
1191 style_json = {
1192 "type": "LinearBasicDisplay",
1193 "displayId": "%s-LinearBasicDisplay" % tId,
1194 }
1195 trackDict["style"] = style_json
1196 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
1197 self.trackIdlist.append(tId)
1198
1199 def process_annotations(self, track):
1200 category = track["category"].replace("__pd__date__pd__", TODAY)
1201 for trackIndex, (
1202 dataset_path,
1203 dataset_ext,
1204 useuri,
1205 track_human_label,
1206 extra_metadata,
1207 ) in enumerate(track["trackfiles"]):
1208 if not dataset_path.strip().startswith("http"):
1209 # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
1210 for key, value in mapped_chars.items():
1211 track_human_label = track_human_label.replace(value, key)
1212 track_human_label = track_human_label.replace(" ", "_")
1213 outputTrackConfig = {
1214 "category": category,
1215 "style": {},
1216 }
1217
1218 # hashData = [
1219 # str(dataset_path),
1220 # track_human_label,
1221 # track["category"],
1222 # ]
1223 # hashData = "|".join(hashData).encode("utf-8")
1224 # hash_string = hashlib.md5(hashData).hexdigest()
1225
1226 outputTrackConfig["assemblyNames"] = track["assemblyNames"]
1227 outputTrackConfig["key"] = track_human_label
1228 outputTrackConfig["useuri"] = useuri
1229 outputTrackConfig["path"] = dataset_path
1230 outputTrackConfig["ext"] = dataset_ext
1231 outputTrackConfig["trackset"] = track.get("trackset", {})
1232 outputTrackConfig["label"] = track["label"]
1233 # outputTrackConfig["label"] = "%s_%i_%s_%s" % (
1234 # dataset_ext,
1235 # trackIndex,
1236 # track_human_label,
1237 # hash_string,
1238 # )
1239
1240 outputTrackConfig["metadata"] = extra_metadata
1241 outputTrackConfig["name"] = track_human_label
1242 if track["label"] in self.trackIdlist:
1243 logging.error(
1244 "### not adding %s already in %s"
1245 % (track["label"], self.trackIdlist)
1246 )
1247 yield None
1248 if dataset_ext in ("gff", "gff3"):
1249 self.add_gff(
1250 dataset_path,
1251 outputTrackConfig,
1252 )
1253 elif dataset_ext in ("hic", "juicebox_hic"):
1254 outputTrackConfig["wasCool"] = False
1255 self.add_hic(
1256 dataset_path,
1257 outputTrackConfig,
1258 )
1259 elif dataset_ext in ("cool", "mcool", "scool"):
1260 hic_url = outputTrackConfig["label"]
1261 hic_path = os.path.join(self.outdir, hic_url) + ".hic"
1262 outputTrackConfig["wasCool"] = True
1263 self.subprocess_check_call(
1264 [
1265 "hictk",
1266 "convert",
1267 "-f",
1268 "--output-fmt",
1269 "hic",
1270 dataset_path,
1271 hic_path,
1272 ]
1273 )
1274 self.add_hic(
1275 hic_path,
1276 outputTrackConfig,
1277 )
1278 elif dataset_ext in ("bed",):
1279 self.add_bed(
1280 dataset_path,
1281 dataset_ext,
1282 outputTrackConfig,
1283 )
1284 elif dataset_ext in ("maf",):
1285 self.add_maf(
1286 dataset_path,
1287 outputTrackConfig,
1288 )
1289 elif dataset_ext == "bigwig":
1290 self.add_bigwig(
1291 dataset_path,
1292 outputTrackConfig,
1293 )
1294 elif dataset_ext == "bam":
1295 real_indexes = track["conf"]["options"]["bam"]["bam_index"]
1296 self.add_bam(
1297 dataset_path,
1298 outputTrackConfig,
1299 bam_indexes=real_indexes,
1300 )
1301 elif dataset_ext == "cram":
1302 real_indexes = track["conf"]["options"]["cram"]["cram_index"]
1303 self.add_cram(
1304 dataset_path,
1305 outputTrackConfig,
1306 cram_indexes=real_indexes,
1307 )
1308 elif dataset_ext == "blastxml":
1309 self.add_blastxml(
1310 dataset_path,
1311 outputTrackConfig,
1312 track["conf"]["options"]["blast"],
1313 )
1314 elif dataset_ext == "vcf":
1315 self.add_vcf(dataset_path, outputTrackConfig)
1316 elif dataset_ext == "paf":
1317 self.add_paf(
1318 dataset_path,
1319 outputTrackConfig,
1320 track["conf"]["options"]["paf"],
1321 )
1322 else:
1323 logging.warning("Do not know how to handle %s", dataset_ext)
1324 # Return non-human label for use in other fields
1325 yield outputTrackConfig["label"]
1326
1327 def add_default_session(self, default_data):
1328 """
1329 default session settings are hard and fragile.
1330 .add_default_view() and other configuration code adapted from
1331 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py
1332 """
1333 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708
1334 track_types = {}
1335 with open(self.config_json_file, "r") as config_file:
1336 config_json = json.load(config_file)
1337 if self.config_json:
1338 config_json.update(self.config_json)
1339 if "defaultSession" in config_json:
1340 session_json = config_json["defaultSession"]
1341 session_views = []
1342 else:
1343 session_json = {}
1344 session_views = []
1345 for gnome in self.assmeta.keys(): # assemblies have their own tracks
1346 tracks_data = []
1347 for track_conf in self.tracksToAdd[gnome]:
1348 tId = track_conf["trackId"]
1349 if tId in default_data[gnome]["visibility"]["default_on"]:
1350 track_types[tId] = track_conf["type"]
1351 style_data = default_data[gnome]["style"].get(tId, None)
1352 if not style_data:
1353 logging.debug(
1354 "### No style data for %s in available default data %s"
1355 % (tId, default_data)
1356 )
1357 style_data = {"type": "LinearBasicDisplay"}
1358 if "displays" in track_conf:
1359 disp = track_conf["displays"][0]["type"]
1360 style_data["type"] = disp
1361 if track_conf.get("style_labels", None):
1362 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
1363 # TODO move this to per track displays?
1364 style_data["labels"] = track_conf["style_labels"]
1365 tracks_data.append(
1366 {
1367 "type": track_types[tId],
1368 "configuration": tId,
1369 "displays": [style_data],
1370 }
1371 )
1372 view_json = {
1373 "type": "LinearGenomeView",
1374 "offsetPx": 0,
1375 "minimized": False,
1376 "tracks": tracks_data,
1377 }
1378 first = [x for x in self.ass_first_contigs if x[0] == gnome]
1379 if len(first) > 0:
1380 [gnome, refName, end] = first[0]
1381 start = 0
1382 end = int(end)
1383 drdict = {
1384 "refName": refName,
1385 "start": start,
1386 "end": end,
1387 "reversed": False,
1388 "assemblyName": gnome,
1389 }
1390 else:
1391 ddl = default_data.get("defaultLocation", None)
1392 if ddl:
1393 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl)
1394 # allow commas like 100,000 but ignore as integer
1395 if loc_match:
1396 refName = loc_match.group(1)
1397 drdict["refName"] = refName
1398 if loc_match.group(2) > "":
1399 drdict["start"] = int(loc_match.group(2).replace(",", ""))
1400 if loc_match.group(3) > "":
1401 drdict["end"] = int(loc_match.group(3).replace(",", ""))
1402 else:
1403 logging.info(
1404 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix"
1405 % ddl
1406 )
1407 if drdict.get("refName", None):
1408 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome
1409 view_json["displayedRegions"] = [
1410 drdict,
1411 ]
1412 logging.info("@@@ defaultlocation %s for default session" % drdict)
1413 else:
1414 logging.info(
1415 "@@@ no track location for default session - please add one!"
1416 )
1417 session_views.append(view_json)
1418 session_name = default_data.get("session_name", "New session")
1419 for key, value in mapped_chars.items():
1420 session_name = session_name.replace(value, key)
1421 session_json["name"] = session_name
1422
1423 if "views" not in session_json:
1424 session_json["views"] = session_views
1425 else:
1426 session_json["views"] += session_views
1427
1428 pp = json.dumps(session_views, indent=2)
1429 config_json["defaultSession"] = session_json
1430 self.config_json.update(config_json)
1431 logging.debug("defaultSession=%s" % (pp))
1432 with open(self.config_json_file, "w") as config_file:
1433 json.dump(self.config_json, config_file, indent=2)
1434
1435 def add_defsess_to_index(self, data):
1436 """
1437 ----------------------------------------------------------
1438 Add some default session settings: set some assemblies/tracks on/off
1439
1440 This allows to select a default view:
1441 - jb type (Linear, Circular, etc)
1442 - default location on an assembly
1443 - default tracks
1444 - ...
1445
1446 Different methods to do that were tested/discussed:
1447 - using a defaultSession item in config.json: this proved to be difficult:
1448 forced to write a full session block, including hard-coded/hard-to-guess items,
1449 no good way to let Jbrowse2 display a scaffold without knowing its size
1450 - using JBrowse2 as an embedded React component in a tool-generated html file:
1451 it works but it requires generating js code to actually do what we want = chosing default view, assembly, tracks, ...
1452 - writing a session-spec inside the config.json file: this is not yet supported as of 2.10.2 (see PR 4148 below)
1453 a session-spec is a kind of simplified defaultSession where you don't need to specify every aspect of the session
1454 - passing a session-spec through URL params by embedding the JBrowse2 index.html inside an iframe
1455 we selected this option
1456
1457 Xrefs to understand the choices:
1458 https://github.com/GMOD/jbrowse-components/issues/2708
1459 https://github.com/GMOD/jbrowse-components/discussions/3568
1460 https://github.com/GMOD/jbrowse-components/pull/4148
1461 """
1462 new_index = "Nothing written"
1463 session_spec = {"views": []}
1464 logging.debug("def ass_first=%s\ndata=%s" % (self.ass_first_contigs, data))
1465 for first_contig in self.ass_first_contigs:
1466 logging.debug("first contig=%s" % self.ass_first_contigs)
1467 [gnome, refName, end] = first_contig
1468 start = 0
1469 aview = {
1470 "assembly": gnome,
1471 "loc": "{}:{}..{}".format(refName, start, end),
1472 "type": "LinearGenomeView",
1473 "tracks": data[gnome]["tracks"],
1474 }
1475 session_spec["views"].append(aview)
1476 sess = json.dumps(session_spec, sort_keys=True, indent=2)
1477 new_index = INDEX_TEMPLATE.replace(
1478 "__SESSION_SPEC__", "&session=spec-{}".format(sess)
1479 )
1480
1481 os.rename(
1482 os.path.join(self.outdir, "index.html"),
1483 os.path.join(self.outdir, "index_noview.html"),
1484 )
1485
1486 with open(os.path.join(self.outdir, "index.html"), "w") as nind:
1487 nind.write(new_index)
1488 logging.debug(
1489 "#### add_defsession gnome=%s refname=%s\nsession_spec=%s\nnew_index=%s"
1490 % (gnome, refName, sess, new_index)
1491 )
1492
1493 def add_general_configuration(self, data):
1494 """
1495 Add some general configuration to the config.json file
1496 """
1497
1498 config_path = self.config_json_file
1499 if os.path.exists(config_path):
1500 with open(config_path, "r") as config_file:
1501 config_json = json.load(config_file)
1502 else:
1503 config_json = {}
1504 if self.config_json:
1505 config_json.update(self.config_json)
1506 config_data = {}
1507
1508 config_data["disableAnalytics"] = data.get("analytics", "false") == "true"
1509
1510 config_data["theme"] = {
1511 "palette": {
1512 "primary": {"main": data.get("primary_color", "#0D233F")},
1513 "secondary": {"main": data.get("secondary_color", "#721E63")},
1514 "tertiary": {"main": data.get("tertiary_color", "#135560")},
1515 "quaternary": {"main": data.get("quaternary_color", "#FFB11D")},
1516 },
1517 "typography": {"fontSize": int(data.get("font_size", 10))},
1518 }
1519 if not config_json.get("configuration", None):
1520 config_json["configuration"] = {}
1521 config_json["configuration"].update(config_data)
1522 self.config_json.update(config_json)
1523 with open(config_path, "w") as config_file:
1524 json.dump(self.config_json, config_file, indent=2)
1525
1526 def clone_jbrowse(self, realclone=False):
1527 """
1528 Clone a JBrowse directory into a destination directory.
1529
1530 `realclone=true` will use the `jbrowse create` command.
1531 To allow running on internet-less compute and for reproducibility
1532 use frozen code with `realclone=false
1533
1534 """
1535 dest = self.outdir
1536 if realclone:
1537 self.subprocess_check_call(
1538 ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"]
1539 )
1540 else:
1541 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True)
1542 for fn in [
1543 "asset-manifest.json",
1544 "favicon.ico",
1545 "robots.txt",
1546 "umd_plugin.js",
1547 "version.txt",
1548 "test_data",
1549 ]:
1550 try:
1551 path = os.path.join(dest, fn)
1552 if os.path.isdir(path):
1553 shutil.rmtree(path)
1554 else:
1555 os.remove(path)
1556 except OSError as e:
1557 log.error("Error: %s - %s." % (e.filename, e.strerror))
1558 shutil.copyfile(os.path.join(INSTALLED_TO, "jb2_webserver.py"), os.path.join(dest, "jb2_webserver.py"))
1559
1560
1561 def parse_style_conf(item):
1562 if item.text.lower() in ["false", "true", "yes", "no"]:
1563 return item.text.lower in ("yes", "true")
1564 else:
1565 return item.text
1566
1567
1568 if __name__ == "__main__":
1569 parser = argparse.ArgumentParser(description="", epilog="")
1570 parser.add_argument("--xml", help="Track Configuration")
1571 parser.add_argument(
1572 "--jbrowse2path", help="Path to JBrowse2 directory in BioContainer or Conda"
1573 )
1574 parser.add_argument("--outdir", help="Output directory", default="out")
1575 parser.add_argument("--version", "-V", action="version", version=JB2VER)
1576 args = parser.parse_args()
1577 tree = ET.parse(args.xml)
1578 root = tree.getroot()
1579
1580 # This should be done ASAP
1581 GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text
1582 # Sometimes this comes as `localhost` without a protocol
1583 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"):
1584 # so we'll prepend `http://` and hope for the best. Requests *should*
1585 # be GET and not POST so it should redirect OK
1586 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL
1587
1588 jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path)
1589
1590 default_session_data = {}
1591 trackI = 0
1592 for ass in root.findall("assembly"):
1593 genomes = [
1594 {
1595 "path": x.attrib["path"],
1596 "label": x.attrib["label"].split(" ")[0].replace(",", ""),
1597 "useuri": x.attrib["useuri"],
1598 "meta": metadata_from_node(x.find("metadata")),
1599 }
1600 for x in ass.findall("metadata/genomes/genome")
1601 ]
1602 primaryGenome = jc.process_genomes(genomes)
1603 if not default_session_data.get(primaryGenome, None):
1604 default_session_data[primaryGenome] = {
1605 "tracks": [],
1606 "style": {},
1607 "style_labels": {},
1608 "visibility": {
1609 "default_on": [],
1610 "default_off": [],
1611 },
1612 }
1613 for track in ass.find("tracks"):
1614 track_conf = {}
1615 track_conf["trackfiles"] = []
1616 track_conf["assemblyNames"] = primaryGenome
1617 is_multi_bigwig = False
1618 try:
1619 if track.find("options/wiggle/multibigwig") and (
1620 track.find("options/wiggle/multibigwig").text == "True"
1621 ):
1622 is_multi_bigwig = True
1623 multi_bigwig_paths = []
1624 except KeyError:
1625 pass
1626
1627 trackfiles = track.findall("files/trackFile")
1628 if trackfiles:
1629 for x in trackfiles:
1630 track_conf["label"] = "%s_%d" % (
1631 x.attrib["label"].replace(" ", "_").replace(",", ""),
1632 trackI,
1633 )
1634 trackI += 1
1635 track_conf["useuri"] = x.attrib["useuri"]
1636 if is_multi_bigwig:
1637 multi_bigwig_paths.append(
1638 (
1639 track_conf["label"],
1640 track_conf["useuri"],
1641 os.path.realpath(x.attrib["path"]),
1642 )
1643 )
1644 else:
1645 if trackfiles:
1646 metadata = metadata_from_node(x.find("metadata"))
1647 track_conf["dataset_id"] = metadata.get(
1648 "dataset_id", "None"
1649 )
1650 if x.attrib["useuri"].lower() == "yes":
1651 tfa = (
1652 x.attrib["path"],
1653 x.attrib["ext"],
1654 x.attrib["useuri"],
1655 track_conf["label"],
1656 metadata,
1657 )
1658 else:
1659 tfa = (
1660 os.path.realpath(x.attrib["path"]),
1661 x.attrib["ext"],
1662 x.attrib["useuri"],
1663 track_conf["label"],
1664 metadata,
1665 )
1666 track_conf["trackfiles"].append(tfa)
1667
1668 if is_multi_bigwig:
1669 metadata = metadata_from_node(x.find("metadata"))
1670
1671 track_conf["trackfiles"].append(
1672 (
1673 multi_bigwig_paths, # Passing an array of paths to represent as one track
1674 "bigwig_multiple",
1675 "MultiBigWig", # Giving an hardcoded name for now
1676 {}, # No metadata for multiple bigwig
1677 )
1678 )
1679
1680 track_conf["category"] = track.attrib["cat"]
1681 track_conf["format"] = track.attrib["format"]
1682 track_conf["conf"] = etree_to_dict(track.find("options"))
1683 keys = jc.process_annotations(track_conf)
1684 if keys:
1685 for key in keys:
1686 vis = track.attrib.get("visibility", "default_off")
1687 if not vis:
1688 vis = "default_off"
1689 default_session_data[primaryGenome]["visibility"][vis].append(key)
1690 trakdat = jc.tracksToAdd[primaryGenome]
1691 stile = {}
1692 for trak in trakdat:
1693 if trak["trackId"] == key:
1694 stile = trak.get("style", {})
1695 if track.find("options/style"):
1696 supdate = {
1697 item.tag: parse_style_conf(item)
1698 for item in track.find("options/style")
1699 }
1700 stile.update(supdate)
1701 default_session_data[primaryGenome]["style"][key] = stile
1702 if track.find("options/style_labels"):
1703 default_session_data[primaryGenome]["style_labels"][key] = {
1704 item.tag: parse_style_conf(item)
1705 for item in track.find("options/style_labels")
1706 }
1707 default_session_data[primaryGenome]["tracks"].append(key)
1708 default_session_data["defaultLocation"] = root.find(
1709 "metadata/general/defaultLocation"
1710 ).text
1711 default_session_data["session_name"] = root.find(
1712 "metadata/general/session_name"
1713 ).text
1714 logging.debug("default_session=%s" % (json.dumps(default_session_data, indent=2)))
1715 jc.zipOut = root.find("metadata/general/zipOut").text == "true"
1716 general_data = {
1717 "analytics": root.find("metadata/general/analytics").text,
1718 "primary_color": root.find("metadata/general/primary_color").text,
1719 "secondary_color": root.find("metadata/general/secondary_color").text,
1720 "tertiary_color": root.find("metadata/general/tertiary_color").text,
1721 "quaternary_color": root.find("metadata/general/quaternary_color").text,
1722 "font_size": root.find("metadata/general/font_size").text,
1723 }
1724 jc.add_general_configuration(general_data)
1725 trackconf = jc.config_json.get("tracks", [])
1726 for gnome in jc.genome_names:
1727 gtracks = jc.tracksToAdd[gnome]
1728 if len(gtracks) > 0:
1729 logging.debug(
1730 "for genome %s adding gtracks %s"
1731 % (gnome, json.dumps(gtracks, indent=2))
1732 )
1733 trackconf += gtracks
1734 jc.config_json["tracks"] = trackconf
1735 assconf = jc.config_json.get("assemblies", [])
1736 assconf += jc.assemblies
1737 jc.config_json["assemblies"] = assconf
1738 logging.debug(
1739 "assmeta=%s, first_contigs=%s, assemblies=%s, gnames=%s, trackidlist=%s, tracks=%s"
1740 % (
1741 jc.assmeta,
1742 jc.ass_first_contigs,
1743 json.dumps(assconf, indent=2),
1744 jc.genome_names,
1745 jc.trackIdlist,
1746 json.dumps(trackconf, indent=2),
1747 )
1748 )
1749 jc.write_config()
1750 jc.add_default_session(default_session_data)
1751 # note that this can be left in the config.json but has NO EFFECT if add_defsess_to_index is called.
1752 # jc.add_defsess_to_index(default_session_data)
1753 # jc.text_index() not sure what broke here.