comparison jbrowse2.py.apr17 @ 96:5ef1ba2031f2 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 8e40627c0488fc1adf2b7d1e331ca65f533038b2
author fubar
date Thu, 25 Apr 2024 07:25:52 +0000
parents 4c517a0041a8
children
comparison
equal deleted inserted replaced
95:a0c848f00363 96:5ef1ba2031f2
1 #!/usr/bin/env python
2
3 import argparse
4 import binascii
5 import datetime
6 import json
7 import logging
8 import os
9 import re
10 import shutil
11 import ssl
12 import struct
13 import subprocess
14 import tempfile
15 import urllib.request
16 import xml.etree.ElementTree as ET
17 from collections import defaultdict
18
19 logging.basicConfig(level=logging.DEBUG)
20 log = logging.getLogger("jbrowse")
21
22 JB2VER = "v2.10.3"
23 # version pinned if cloning - but not cloning now
24
25 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
26 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__))
27 GALAXY_INFRASTRUCTURE_URL = None
28 mapped_chars = {
29 ">": "__gt__",
30 "<": "__lt__",
31 "'": "__sq__",
32 '"': "__dq__",
33 "[": "__ob__",
34 "]": "__cb__",
35 "{": "__oc__",
36 "}": "__cc__",
37 "@": "__at__",
38 "#": "__pd__",
39 "": "__cn__",
40 }
41
42
43 INDEX_TEMPLATE = """<!doctype html>
44 <html lang="en" style="height:100%">
45 <head>
46 <meta charset="utf-8"/>
47 <link rel="shortcut icon" href="./favicon.ico"/>
48 <meta name="viewport" content="width=device-width,initial-scale=1"/>
49 <meta name="theme-color" content="#000000"/>
50 <meta name="description" content="A fast and flexible genome browser"/>
51 <link rel="manifest" href="./manifest.json"/>
52 <title>JBrowse</title>
53 </script>
54 </head>
55 <body style="overscroll-behavior:none; height:100%; margin: 0;">
56 <iframe
57 id="jbframe"
58 title="JBrowse2"
59 frameborder="0"
60 width="100%"
61 height="100%"
62 src='index_noview.html?config=config.json__SESSION_SPEC__'>
63 </iframe>
64 </body>
65 </html>
66 """
67
68
69 class ColorScaling(object):
70
71 COLOR_FUNCTION_TEMPLATE = """
72 function(feature, variableName, glyphObject, track) {{
73 var score = {score};
74 {opacity}
75 return 'rgba({red}, {green}, {blue}, ' + opacity + ')';
76 }}
77 """
78
79 COLOR_FUNCTION_TEMPLATE_QUAL = r"""
80 function(feature, variableName, glyphObject, track) {{
81 var search_up = function self(sf, attr){{
82 if(sf.get(attr) !== undefined){{
83 return sf.get(attr);
84 }}
85 if(sf.parent() === undefined) {{
86 return;
87 }}else{{
88 return self(sf.parent(), attr);
89 }}
90 }};
91
92 var search_down = function self(sf, attr){{
93 if(sf.get(attr) !== undefined){{
94 return sf.get(attr);
95 }}
96 if(sf.children() === undefined) {{
97 return;
98 }}else{{
99 var kids = sf.children();
100 for(var child_idx in kids){{
101 var x = self(kids[child_idx], attr);
102 if(x !== undefined){{
103 return x;
104 }}
105 }}
106 return;
107 }}
108 }};
109
110 var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color});
111 var score = (search_up(feature, 'score') || search_down(feature, 'score'));
112 {opacity}
113 if(score === undefined){{ opacity = 1; }}
114 var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color);
115 var red = parseInt(result[1], 16);
116 var green = parseInt(result[2], 16);
117 var blue = parseInt(result[3], 16);
118 if(isNaN(opacity) || opacity < 0){{ opacity = 0; }}
119 return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')';
120 }}
121 """
122
123 OPACITY_MATH = {
124 "linear": """
125 var opacity = (score - ({min})) / (({max}) - ({min}));
126 """,
127 "logarithmic": """
128 var opacity = Math.log10(score - ({min})) / Math.log10(({max}) - ({min}));
129 """,
130 "blast": """
131 var opacity = 0;
132 if(score == 0.0) {{
133 opacity = 1;
134 }} else {{
135 opacity = (20 - Math.log10(score)) / 180;
136 }}
137 """,
138 }
139
140 BREWER_COLOUR_IDX = 0
141 BREWER_COLOUR_SCHEMES = [
142 (166, 206, 227),
143 (31, 120, 180),
144 (178, 223, 138),
145 (51, 160, 44),
146 (251, 154, 153),
147 (227, 26, 28),
148 (253, 191, 111),
149 (255, 127, 0),
150 (202, 178, 214),
151 (106, 61, 154),
152 (255, 255, 153),
153 (177, 89, 40),
154 (228, 26, 28),
155 (55, 126, 184),
156 (77, 175, 74),
157 (152, 78, 163),
158 (255, 127, 0),
159 ]
160
161 BREWER_DIVERGING_PALLETES = {
162 "BrBg": ("#543005", "#003c30"),
163 "PiYg": ("#8e0152", "#276419"),
164 "PRGn": ("#40004b", "#00441b"),
165 "PuOr": ("#7f3b08", "#2d004b"),
166 "RdBu": ("#67001f", "#053061"),
167 "RdGy": ("#67001f", "#1a1a1a"),
168 "RdYlBu": ("#a50026", "#313695"),
169 "RdYlGn": ("#a50026", "#006837"),
170 "Spectral": ("#9e0142", "#5e4fa2"),
171 }
172
173 def __init__(self):
174 self.brewer_colour_idx = 0
175
176 def rgb_from_hex(self, hexstr):
177 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
178 return struct.unpack("BBB", binascii.unhexlify(hexstr))
179
180 def min_max_gff(self, gff_file):
181 min_val = None
182 max_val = None
183 with open(gff_file, "r") as handle:
184 for line in handle:
185 try:
186 value = float(line.split("\t")[5])
187 min_val = min(value, (min_val or value))
188 max_val = max(value, (max_val or value))
189
190 if value < min_val:
191 min_val = value
192
193 if value > max_val:
194 max_val = value
195 except Exception:
196 pass
197 return min_val, max_val
198
199 def hex_from_rgb(self, r, g, b):
200 return "#%02x%02x%02x" % (r, g, b)
201
202 def _get_colours(self):
203 r, g, b = self.BREWER_COLOUR_SCHEMES[
204 self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)
205 ]
206 self.brewer_colour_idx += 1
207 return r, g, b
208
209 def parse_menus(self, track):
210 trackConfig = {"menuTemplate": [{}, {}, {}, {}]}
211
212 if "menu" in track["menus"]:
213 menu_list = [track["menus"]["menu"]]
214 if isinstance(track["menus"]["menu"], list):
215 menu_list = track["menus"]["menu"]
216
217 for m in menu_list:
218 tpl = {
219 "action": m["action"],
220 "label": m.get("label", "{name}"),
221 "iconClass": m.get("iconClass", "dijitIconBookmark"),
222 }
223 if "url" in m:
224 tpl["url"] = m["url"]
225 if "content" in m:
226 tpl["content"] = m["content"]
227 if "title" in m:
228 tpl["title"] = m["title"]
229
230 trackConfig["menuTemplate"].append(tpl)
231
232 return trackConfig
233
234 def parse_colours(self, track, trackFormat, gff3=None):
235 # Wiggle tracks have a bicolor pallete
236 trackConfig = {"style": {}}
237 if trackFormat == "wiggle":
238
239 trackConfig["style"]["pos_color"] = track["wiggle"]["color_pos"]
240 trackConfig["style"]["neg_color"] = track["wiggle"]["color_neg"]
241
242 if trackConfig["style"]["pos_color"] == "__auto__":
243 trackConfig["style"]["neg_color"] = self.hex_from_rgb(
244 *self._get_colours()
245 )
246 trackConfig["style"]["pos_color"] = self.hex_from_rgb(
247 *self._get_colours()
248 )
249
250 # Wiggle tracks can change colour at a specified place
251 bc_pivot = track["wiggle"]["bicolor_pivot"]
252 if bc_pivot not in ("mean", "zero"):
253 # The values are either one of those two strings
254 # or a number
255 bc_pivot = float(bc_pivot)
256 trackConfig["bicolor_pivot"] = bc_pivot
257 elif "scaling" in track:
258 if track["scaling"]["method"] == "ignore":
259 if track["scaling"]["scheme"]["color"] != "__auto__":
260 trackConfig["style"]["color"] = track["scaling"]["scheme"]["color"]
261 else:
262 trackConfig["style"]["color"] = self.hex_from_rgb(
263 *self._get_colours()
264 )
265 else:
266 # Scored method
267 algo = track["scaling"]["algo"]
268 # linear, logarithmic, blast
269 scales = track["scaling"]["scales"]
270 # type __auto__, manual (min, max)
271 scheme = track["scaling"]["scheme"]
272 # scheme -> (type (opacity), color)
273 # ==================================
274 # GENE CALLS OR BLAST
275 # ==================================
276 if trackFormat == "blast":
277 red, green, blue = self._get_colours()
278 color_function = self.COLOR_FUNCTION_TEMPLATE.format(
279 **{
280 "score": "feature._parent.get('score')",
281 "opacity": self.OPACITY_MATH["blast"],
282 "red": red,
283 "green": green,
284 "blue": blue,
285 }
286 )
287 trackConfig["style"]["color"] = color_function.replace("\n", "")
288 elif trackFormat == "gene_calls":
289 # Default values, based on GFF3 spec
290 min_val = 0
291 max_val = 1000
292 # Get min/max and build a scoring function since JBrowse doesn't
293 if scales["type"] == "automatic" or scales["type"] == "__auto__":
294 min_val, max_val = self.min_max_gff(gff3)
295 else:
296 min_val = scales.get("min", 0)
297 max_val = scales.get("max", 1000)
298
299 if scheme["color"] == "__auto__":
300 user_color = "undefined"
301 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
302 elif scheme["color"].startswith("#"):
303 user_color = "'%s'" % self.hex_from_rgb(
304 *self.rgb_from_hex(scheme["color"][1:])
305 )
306 auto_color = "undefined"
307 else:
308 user_color = "undefined"
309 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
310
311 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(
312 **{
313 "opacity": self.OPACITY_MATH[algo].format(
314 **{"max": max_val, "min": min_val}
315 ),
316 "user_spec_color": user_color,
317 "auto_gen_color": auto_color,
318 }
319 )
320
321 trackConfig["style"]["color"] = color_function.replace("\n", "")
322 return trackConfig
323
324
325 def etree_to_dict(t):
326 if t is None:
327 return {}
328
329 d = {t.tag: {} if t.attrib else None}
330 children = list(t)
331 if children:
332 dd = defaultdict(list)
333 for dc in map(etree_to_dict, children):
334 for k, v in dc.items():
335 dd[k].append(v)
336 d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
337 if t.attrib:
338 d[t.tag].update(("@" + k, v) for k, v in t.attrib.items())
339 if t.text:
340 text = t.text.strip()
341 if children or t.attrib:
342 if text:
343 d[t.tag]["#text"] = text
344 else:
345 d[t.tag] = text
346 return d
347
348
349 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
350
351
352 def metadata_from_node(node):
353 metadata = {}
354 try:
355 if len(node.findall("dataset")) != 1:
356 # exit early
357 return metadata
358 except Exception:
359 return {}
360
361 for (key, value) in node.findall("dataset")[0].attrib.items():
362 metadata["dataset_%s" % key] = value
363
364 if node.findall("history"):
365 for (key, value) in node.findall("history")[0].attrib.items():
366 metadata["history_%s" % key] = value
367
368 if node.findall("metadata"):
369 for (key, value) in node.findall("metadata")[0].attrib.items():
370 metadata["metadata_%s" % key] = value
371 # Additional Mappings applied:
372 metadata[
373 "dataset_edam_format"
374 ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(
375 metadata["dataset_edam_format"], metadata["dataset_file_ext"]
376 )
377 metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format(
378 metadata["history_user_email"]
379 )
380 metadata["hist_name"] = metadata["history_display_name"]
381 metadata[
382 "history_display_name"
383 ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
384 galaxy=GALAXY_INFRASTRUCTURE_URL,
385 encoded_hist_id=metadata.get("history_id", "not available"),
386 hist_name=metadata.get("history_display_name", "not available"),
387 )
388 if node.findall("tool"):
389 for (key, value) in node.findall("tool")[0].attrib.items():
390 metadata["tool_%s" % key] = value
391 metadata[
392 "tool_tool"
393 ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}{tool_version}</a>'.format(
394 galaxy=GALAXY_INFRASTRUCTURE_URL,
395 encoded_id=metadata.get("dataset_id", ""),
396 tool_id=metadata.get("tool_tool_id", ""),
397 tool_version=metadata.get("tool_tool_version", ""),
398 )
399 return metadata
400
401
402 class JbrowseConnector(object):
403 def __init__(self, outdir, jbrowse2path):
404 self.trackCounter = 0 # to avoid name clashes
405 self.assemblies = [] # these require more than a few line diff.
406 self.assmeta = {}
407 self.ass_first_contigs = (
408 []
409 ) # for default session - these are read as first line of the assembly .fai
410 self.giURL = GALAXY_INFRASTRUCTURE_URL
411 self.outdir = outdir
412 self.jbrowse2path = jbrowse2path
413 os.makedirs(self.outdir, exist_ok=True)
414 self.genome_names = []
415 self.trackIdlist = []
416 self.tracksToAdd = {}
417 self.config_json = {}
418 self.config_json_file = os.path.join(outdir, "config.json")
419 self.clone_jbrowse()
420
421 def get_cwd(self, cwd):
422 if cwd:
423 return self.outdir
424 else:
425 return subprocess.check_output(["pwd"]).decode("utf-8").strip()
426 # return None
427
428 def subprocess_check_call(self, command, output=None, cwd=True):
429 if output:
430 log.debug("cd %s && %s > %s", self.get_cwd(cwd), " ".join(command), output)
431 subprocess.check_call(command, cwd=self.get_cwd(cwd), stdout=output)
432 else:
433 log.debug("cd %s && %s", self.get_cwd(cwd), " ".join(command))
434 subprocess.check_call(command, cwd=self.get_cwd(cwd))
435
436 def subprocess_popen(self, command, cwd=True):
437 log.debug(command)
438 p = subprocess.Popen(
439 command,
440 cwd=self.get_cwd(cwd),
441 shell=True,
442 stdin=subprocess.PIPE,
443 stdout=subprocess.PIPE,
444 stderr=subprocess.PIPE,
445 )
446 output, err = p.communicate()
447 retcode = p.returncode
448 if retcode != 0:
449 log.error(command)
450 log.error(output)
451 log.error(err)
452 raise RuntimeError("Command failed with exit code %s" % (retcode))
453
454 def subprocess_check_output(self, command):
455 log.debug(" ".join(command))
456 return subprocess.check_output(command, cwd=self.outdir)
457
458 def symlink_or_copy(self, src, dest):
459 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool(
460 os.environ["GALAXY_JBROWSE_SYMLINKS"]
461 ):
462 cmd = ["ln", "-s", src, dest]
463 else:
464 cmd = ["cp", src, dest]
465
466 return self.subprocess_check_call(cmd)
467
468 def _prepare_track_style(self, trackDict):
469
470 style_data = {
471 "type": "LinearBasicDisplay",
472 "displayId": "%s-LinearBasicDisplay" % trackDict["trackId"],
473 }
474
475 if trackDict.get("displays", None): # use first if multiple like bed
476 style_data["type"] = trackDict["displays"][0]["type"]
477 style_data["displayId"] = trackDict["displays"][0]["displayId"]
478 return style_data
479
480 def getNrow(self, url):
481 useuri = url.startswith("https://") or url.startswith("http://")
482 if not useuri:
483 fl = open(url, "r").readlines()
484 nrow = len(fl)
485 else:
486 try:
487 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS)
488 scontext.verify_mode = ssl.VerifyMode.CERT_NONE
489 with urllib.request.urlopen(url, context=scontext) as f:
490 fl = f.readlines()
491 nrow = len(fl)
492 except Exception:
493 nrow = 0
494 logging.debug("### getNrow returning %d" % nrow)
495 return nrow
496
497 def process_genomes(self, genomes):
498 assembly = []
499 assmeta = []
500 useuri = False
501 genome_names = []
502 for i, genome_node in enumerate(genomes):
503 this_genome = {}
504 if genome_node["useuri"] == "yes":
505 useuri = True
506 genome_name = genome_node["label"].strip()
507 if len(genome_name) == 0:
508 genome_name = os.path.splitext(os.path.basename(genome_node["path"]))[0]
509 if len(genome_name.split()) > 1:
510 genome_name = genome_name.split()[0]
511 # spaces and cruft break scripts when substituted
512 if genome_name not in genome_names:
513 # pafs with shared references
514 fapath = genome_node["path"]
515 if not useuri:
516 fapath = os.path.realpath(fapath)
517 assem, first_contig = self.make_assembly(fapath, genome_name, useuri)
518 assembly.append(assem)
519 self.ass_first_contigs.append(first_contig)
520 if len(genome_names) == 0:
521 this_genome["genome_name"] = genome_name # first one for all tracks
522 genome_names.append(genome_name)
523 this_genome["genome_sequence_adapter"] = assem["sequence"][
524 "adapter"
525 ]
526 this_genome["genome_firstcontig"] = None
527 if not useuri:
528 fl = open(fapath, "r").readline()
529 fls = fl.strip().split(">")
530 if len(fls) > 1:
531 fl = fls[1]
532 if len(fl.split()) > 1:
533 this_genome["genome_firstcontig"] = fl.split()[
534 0
535 ].strip()
536 else:
537 this_genome["genome_firstcontig"] = fl
538 else:
539 try:
540 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS)
541 scontext.verify_mode = ssl.VerifyMode.CERT_NONE
542 with urllib.request.urlopen(
543 url=fapath + ".fai", context=scontext
544 ) as f:
545 fl = f.readline()
546 except Exception:
547 fl = None
548 if fl: # is first row of the text fai so the first contig name
549 this_genome["genome_firstcontig"] = (
550 fl.decode("utf8").strip().split()[0]
551 )
552 assmeta.append(this_genome)
553 self.assemblies += assembly
554 self.assmeta[genome_names[0]] = assmeta
555 self.tracksToAdd[genome_names[0]] = []
556 self.genome_names += genome_names
557 return this_genome["genome_name"]
558
559 def make_assembly(self, fapath, gname, useuri):
560 """added code to grab the first contig name and length for broken default session from Anthony and Helena's code
561 that poor Bjoern is trying to figure out.
562 """
563 if useuri:
564 faname = fapath
565 adapter = {
566 "type": "BgzipFastaAdapter",
567 "fastaLocation": {"uri": faname, "locationType": "UriLocation"},
568 "faiLocation": {"uri": faname + ".fai", "locationType": "UriLocation"},
569 "gziLocation": {"uri": faname + ".gzi", "locationType": "UriLocation"},
570 }
571 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS)
572 scontext.verify_mode = ssl.VerifyMode.CERT_NONE
573 with urllib.request.urlopen(url=faname + ".fai", context=scontext) as f:
574 fl = f.readline()
575 contig = fl.decode("utf8").strip()
576 # Merlin 172788 8 60 61
577 else:
578 faname = gname + ".fa.gz"
579 fadest = os.path.realpath(os.path.join(self.outdir, faname))
580 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % (
581 fapath,
582 fadest,
583 fadest,
584 fadest,
585 )
586 self.subprocess_popen(cmd)
587
588 adapter = {
589 "type": "BgzipFastaAdapter",
590 "fastaLocation": {
591 "uri": faname,
592 },
593 "faiLocation": {
594 "uri": faname + ".fai",
595 },
596 "gziLocation": {
597 "uri": faname + ".gzi",
598 },
599 }
600 contig = open(fadest + ".fai", "r").readline().strip()
601 first_contig = contig.split()[:2]
602 first_contig.insert(0, gname)
603 trackDict = {
604 "name": gname,
605 "sequence": {
606 "type": "ReferenceSequenceTrack",
607 "trackId": gname,
608 "adapter": adapter,
609 },
610 "displays": [
611 {
612 "type": "LinearReferenceSequenceDisplay",
613 "displayId": "%s-LinearReferenceSequenceDisplay" % gname,
614 },
615 {
616 "type": "LinearGCContentDisplay",
617 "displayId": "%s-LinearGCContentDisplay" % gname,
618 },
619 ],
620 }
621 return (trackDict, first_contig)
622
623 def add_default_view(self):
624 cmd = [
625 "jbrowse",
626 "set-default-session",
627 "-s",
628 self.config_json_file,
629 "-t",
630 ",".join(self.trackIdlist),
631 "-n",
632 "JBrowse2 in Galaxy",
633 "--target",
634 self.config_json_file,
635 "-v",
636 " LinearGenomeView",
637 ]
638 self.subprocess_check_call(cmd)
639
640 def write_config(self):
641 with open(self.config_json_file, "w") as fp:
642 json.dump(self.config_json, fp, indent=2)
643
644 def text_index(self):
645 # Index tracks
646 args = [
647 "jbrowse",
648 "text-index",
649 "--target",
650 self.outdir,
651 "--assemblies",
652 self.genome_name,
653 ]
654
655 tracks = ",".join(self.trackIdlist)
656 if tracks:
657 args += ["--tracks", tracks]
658
659 self.subprocess_check_call(args)
660
661 def add_hic(self, data, trackData):
662 """
663 HiC adapter.
664 https://github.com/aidenlab/hic-format/blob/master/HiCFormatV9.md
665 for testing locally, these work:
666 HiC data is from https://s3.amazonaws.com/igv.broadinstitute.org/data/hic/intra_nofrag_30.hic
667 using hg19 reference track as a
668 'BgzipFastaAdapter'
669 fastaLocation:
670 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz',
671 faiLocation:
672 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai',
673 gziLocation:
674 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi',
675 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438
676
677 """
678 tId = trackData["label"]
679 wasCool = trackData["wasCool"]
680 # can be served - if public.
681 # dsId = trackData["metadata"]["dataset_id"]
682 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId)
683 useuri = trackData["useuri"].lower() == "yes"
684 logging.debug("wasCool=%s, data=%s, tId=%s" % (wasCool, data, tId))
685 if useuri:
686 uri = data
687 else:
688 uri = tId + ".hic"
689 if not wasCool:
690 dest = os.path.join(self.outdir, uri)
691 if not os.path.exists(dest):
692 cmd = ["cp", data, dest]
693 self.subprocess_check_call(cmd)
694 else:
695 logging.error("not wasCool but %s exists" % dest)
696 categ = trackData["category"]
697 trackDict = {
698 "type": "HicTrack",
699 "trackId": tId,
700 "name": trackData["name"],
701 "assemblyNames": [trackData["assemblyNames"]],
702 "category": [
703 categ,
704 ],
705 "adapter": {"type": "HicAdapter", "hicLocation": {"uri": uri}},
706 }
707 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
708 self.trackIdlist.append(tId)
709
710 def add_maf(self, data, trackData):
711 """
712 from https://github.com/cmdcolin/maf2bed
713 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name
714 e.g. hg38.chr1 in the sequence identifiers.
715 need the reference id - eg hg18, for maf2bed.pl as the first parameter
716 """
717 tId = trackData["label"]
718 mafPlugin = {
719 "plugins": [
720 {
721 "name": "MafViewer",
722 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js",
723 }
724 ]
725 }
726 categ = trackData["category"]
727 fname = tId
728 dest = "%s/%s" % (self.outdir, fname)
729 gname = trackData["assemblyNames"]
730
731 cmd = [
732 "bash",
733 os.path.join(INSTALLED_TO, "convertMAF.sh"),
734 data,
735 gname,
736 INSTALLED_TO,
737 dest,
738 ]
739 self.subprocess_check_call(cmd)
740 mafs = open(data, "r").readlines()
741 mafss = [x for x in mafs if (x.startswith("s\t") or x.startswith("s "))]
742 samp = [x.split()[1] for x in mafss if len(x.split()) > 0]
743 sampu = list(dict.fromkeys(samp))
744 samples = [x.split(".")[0] for x in sampu]
745 samples.sort()
746 logging.debug(
747 "$$$$ cmd=%s, mafss=%s samp=%s samples=%s"
748 % (" ".join(cmd), mafss, samp, samples)
749 )
750 trackDict = {
751 "type": "MafTrack",
752 "trackId": tId,
753 "name": trackData["name"],
754 "category": [
755 categ,
756 ],
757 "adapter": {
758 "type": "MafTabixAdapter",
759 "samples": samples,
760 "bedGzLocation": {
761 "uri": fname + ".sorted.bed.gz",
762 },
763 "index": {
764 "location": {
765 "uri": fname + ".sorted.bed.gz.tbi",
766 },
767 },
768 },
769 "assemblyNames": [trackData["assemblyNames"]],
770 "displays": [
771 {
772 "type": "LinearBasicDisplay",
773 "displayId": "%s-LinearBasicDisplay" % tId,
774 },
775 {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},
776 ],
777 }
778 style_json = self._prepare_track_style(trackDict)
779 trackDict["style"] = style_json
780 self.tracksToAdd[gname].append(trackDict)
781 self.trackIdlist.append(tId)
782 if self.config_json.get("plugins", None):
783 self.config_json["plugins"].append(mafPlugin[0])
784 else:
785 self.config_json.update(mafPlugin)
786
787 def _blastxml_to_gff3(self, xml, min_gap=10):
788 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
789 cmd = [
790 "python",
791 os.path.join(INSTALLED_TO, "blastxml_to_gapped_gff3.py"),
792 "--trim",
793 "--trim_end",
794 "--include_seq",
795 "--min_gap",
796 str(min_gap),
797 xml,
798 ]
799 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
800 gff3_unrebased.close()
801 logging.debug("### blastxml to gff3 cmd = %s" % " ".join(cmd))
802 return gff3_unrebased.name
803
804 def add_blastxml(self, data, trackData, blastOpts, **kwargs):
805 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"])
806 if "parent" in blastOpts and blastOpts["parent"] != "None":
807 gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
808 cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")]
809 if blastOpts.get("protein", "false") == "true":
810 cmd.append("--protein2dna")
811 cmd.extend([os.path.realpath(blastOpts["parent"]), gff3])
812 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
813 logging.debug("### gff3rebase cmd = %s" % " ".join(cmd))
814 gff3_rebased.close()
815 # Replace original gff3 file
816 shutil.copy(gff3_rebased.name, gff3)
817 os.unlink(gff3_rebased.name)
818 self.add_gff(gff3, trackData, **kwargs)
819
820 def add_bigwig(self, data, trackData):
821 tId = trackData["label"]
822 useuri = trackData["useuri"].lower() == "yes"
823 if useuri:
824 url = data
825 else:
826 url = tId
827 # slashes in names cause path trouble
828 dest = os.path.join(self.outdir, url)
829 cmd = ["cp", data, dest]
830 self.subprocess_check_call(cmd)
831 bwloc = {"uri": url}
832 categ = trackData["category"]
833 trackDict = {
834 "type": "QuantitativeTrack",
835 "trackId": tId,
836 "name": trackData["name"],
837 "category": [
838 categ,
839 ],
840 "assemblyNames": [trackData["assemblyNames"]],
841 "adapter": {
842 "type": "BigWigAdapter",
843 "bigWigLocation": bwloc,
844 },
845 "displays": [
846 {
847 "type": "LinearWiggleDisplay",
848 "displayId": "%s-LinearWiggleDisplay" % tId,
849 }
850 ],
851 }
852 style_json = self._prepare_track_style(trackDict)
853 trackDict["style"] = style_json
854 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
855 self.trackIdlist.append(tId)
856
857 def add_bam(self, data, trackData, bam_indexes=None, **kwargs):
858 tId = trackData["label"]
859 realFName = trackData["key"]
860 useuri = trackData["useuri"].lower() == "yes"
861 categ = trackData["category"]
862 if useuri:
863 url = data
864 else:
865 fname = tId
866 dest = "%s/%s" % (self.outdir, fname)
867 url = fname
868 bindex = fname + ".bai"
869 self.subprocess_check_call(["cp", data, dest])
870 bi = bam_indexes.split()
871 bam_index = [
872 x.split(",")[1].strip()
873 for x in bi
874 if "," in x and x.split(",")[0].strip() == realFName
875 ]
876 if len(bam_index) > 0:
877 bam_index = bam_index[0]
878 else:
879 bam_index = None
880 logging.debug(
881 "===realFName=%s got %s as bi, %s for bam_index"
882 % (realFName, bi, bam_index)
883 )
884 if bam_index is not None and os.path.exists(bam_index):
885 if not os.path.exists(bindex):
886 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
887 self.subprocess_check_call(["cp", bam_index, bindex])
888 else:
889 # Can happen in exotic condition
890 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
891 # => no index generated by galaxy, but there might be one next to the symlink target
892 # this trick allows to skip the bam sorting made by galaxy if already done outside
893 if os.path.exists(os.path.realpath(data) + ".bai"):
894 self.symlink_or_copy(os.path.realpath(data) + ".bai", bindex)
895 else:
896 log.warn("Could not find a bam index (.bai file) for %s", data)
897 trackDict = {
898 "type": "AlignmentsTrack",
899 "trackId": tId,
900 "name": trackData["name"],
901 "category": [
902 categ,
903 ],
904 "assemblyNames": [trackData["assemblyNames"]],
905 "adapter": {
906 "type": "BamAdapter",
907 "bamLocation": {"uri": url},
908 "index": {
909 "location": {
910 "uri": bindex,
911 }
912 },
913 },
914 "displays": [
915 {
916 "type": "LinearAlignmentsDisplay",
917 "displayId": "%s-LinearAlignmentsDisplay" % tId,
918 },
919 ],
920 }
921 style_json = self._prepare_track_style(trackDict)
922 trackDict["style"] = style_json
923 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
924 self.trackIdlist.append(tId)
925
926 def add_cram(self, data, trackData, cram_indexes=None, **kwargs):
927 tId = trackData["label"]
928 realFName = trackData["key"]
929 categ = trackData["category"]
930 useuri = trackData["useuri"].lower() == "yes"
931 gsa = self.assmeta.get(trackData["assemblyNames"], None)
932 if gsa:
933 genseqad = gsa[0]["genome_sequence_adapter"]
934 else:
935 genseqad = "Not found"
936 logging.warn("No adapter found for cram %s in gsa=%s" % (tId, gsa))
937 if useuri:
938 url = data
939 else:
940 fname = tId
941 dest = os.path.join(self.outdir, fname)
942 url = fname
943 self.subprocess_check_call(["cp", data, dest])
944 ci = cram_indexes.split()
945 cram_index = [
946 x.split(",")[1].strip()
947 for x in ci
948 if "," in x and x.split(",")[0] == realFName
949 ]
950 if len(cram_index) > 0:
951 cram_index = cram_index[0]
952 else:
953 cram_index = None
954 logging.debug(
955 "=== for %s got %s as cram_indexes, %s for cram_index"
956 % (realFName, cram_indexes, cram_index)
957 )
958 if cram_index and os.path.exists(cram_index):
959 if not os.path.exists(dest + ".crai"):
960 # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
961 self.subprocess_check_call(
962 ["cp", os.path.realpath(cram_index), dest + ".crai"]
963 )
964 else:
965 cpath = os.path.realpath(dest) + ".crai"
966 cmd = ["samtools", "index", "-c", "-o", cpath, os.path.realpath(dest)]
967 self.subprocess_check_call(cmd)
968 trackDict = {
969 "type": "AlignmentsTrack",
970 "trackId": tId,
971 "name": trackData["name"],
972 "category": [
973 categ,
974 ],
975 "assemblyNames": [trackData["assemblyNames"]],
976 "adapter": {
977 "type": "CramAdapter",
978 "cramLocation": {"uri": url},
979 "craiLocation": {
980 "uri": url + ".crai",
981 },
982 "sequenceAdapter": genseqad,
983 },
984 "displays": [
985 {
986 "type": "LinearAlignmentsDisplay",
987 "displayId": "%s-LinearAlignmentsDisplay" % tId,
988 },
989 ],
990 }
991 style_json = self._prepare_track_style(trackDict)
992 trackDict["style"] = style_json
993 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
994 self.trackIdlist.append(tId)
995
996 def add_vcf(self, data, trackData):
997 tId = trackData["label"]
998 # url = "%s/api/datasets/%s/display" % (
999 # self.giURL,
1000 # trackData["metadata"]["dataset_id"],
1001 # )
1002 categ = trackData["category"]
1003 useuri = trackData["useuri"].lower() == "yes"
1004 if useuri:
1005 url = data
1006 else:
1007 url = tId
1008 dest = "%s/%s" % (self.outdir, url)
1009 cmd = "bgzip -c %s > %s" % (data, dest)
1010 self.subprocess_popen(cmd)
1011 cmd = ["tabix", "-f", "-p", "vcf", dest]
1012 self.subprocess_check_call(cmd)
1013 trackDict = {
1014 "type": "VariantTrack",
1015 "trackId": tId,
1016 "name": trackData["name"],
1017 "assemblyNames": [trackData["assemblyNames"]],
1018 "category": [
1019 categ,
1020 ],
1021 "adapter": {
1022 "type": "VcfTabixAdapter",
1023 "vcfGzLocation": {"uri": url},
1024 "index": {
1025 "location": {
1026 "uri": url + ".tbi",
1027 }
1028 },
1029 },
1030 "displays": [
1031 {
1032 "type": "LinearVariantDisplay",
1033 "displayId": "%s-LinearVariantDisplay" % tId,
1034 },
1035 {
1036 "type": "ChordVariantDisplay",
1037 "displayId": "%s-ChordVariantDisplay" % tId,
1038 },
1039 {
1040 "type": "LinearPairedArcDisplay",
1041 "displayId": "%s-LinearPairedArcDisplay" % tId,
1042 },
1043 ],
1044 }
1045 style_json = self._prepare_track_style(trackDict)
1046 trackDict["style"] = style_json
1047 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
1048 self.trackIdlist.append(tId)
1049
1050 def _sort_gff(self, data, dest):
1051 # Only index if not already done
1052 if not os.path.exists(dest):
1053 cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s'" % (
1054 data,
1055 dest,
1056 )
1057 self.subprocess_popen(cmd)
1058 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest])
1059
1060 def _sort_bed(self, data, dest):
1061 # Only index if not already done
1062 if not os.path.exists(dest):
1063 cmd = "sort -k1,1 -k2,2n '%s' | bgzip -c > '%s'" % (data, dest)
1064 self.subprocess_popen(cmd)
1065 cmd = ["tabix", "-f", "-p", "bed", dest]
1066 self.subprocess_check_call(cmd)
1067
1068 def add_gff(self, data, trackData):
1069 tId = trackData["label"]
1070 useuri = trackData["useuri"].lower() == "yes"
1071 if useuri:
1072 url = trackData["path"]
1073 else:
1074 url = tId + ".gz"
1075 dest = "%s/%s" % (self.outdir, url)
1076 self._sort_gff(data, dest)
1077 categ = trackData["category"]
1078 trackDict = {
1079 "type": "FeatureTrack",
1080 "trackId": tId,
1081 "name": trackData["name"],
1082 "assemblyNames": [trackData["assemblyNames"]],
1083 "category": [
1084 categ,
1085 ],
1086 "adapter": {
1087 "type": "Gff3TabixAdapter",
1088 "gffGzLocation": {
1089 "uri": url,
1090 },
1091 "index": {
1092 "location": {
1093 "uri": url + ".tbi",
1094 }
1095 },
1096 },
1097 "displays": [
1098 {
1099 "type": "LinearBasicDisplay",
1100 "displayId": "%s-LinearBasicDisplay" % tId,
1101 },
1102 {
1103 "type": "LinearArcDisplay",
1104 "displayId": "%s-LinearArcDisplay" % tId,
1105 },
1106 ],
1107 }
1108 style_json = self._prepare_track_style(trackDict)
1109 trackDict["style"] = style_json
1110 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
1111 self.trackIdlist.append(tId)
1112
1113 def add_bed(self, data, ext, trackData):
1114 tId = trackData["label"]
1115 categ = trackData["category"]
1116 useuri = trackData["useuri"].lower() == "yes"
1117 if useuri:
1118 url = data
1119 else:
1120 url = tId + ".gz"
1121 dest = "%s/%s" % (self.outdir, url)
1122 self._sort_bed(data, dest)
1123 trackDict = {
1124 "type": "FeatureTrack",
1125 "trackId": tId,
1126 "name": trackData["name"],
1127 "assemblyNames": [trackData["assemblyNames"]],
1128 "adapter": {
1129 "category": [
1130 categ,
1131 ],
1132 "type": "BedTabixAdapter",
1133 "bedGzLocation": {
1134 "uri": url,
1135 },
1136 "index": {
1137 "location": {
1138 "uri": url + ".tbi",
1139 }
1140 },
1141 },
1142 "displays": [
1143 {
1144 "type": "LinearBasicDisplay",
1145 "displayId": "%s-LinearBasicDisplay" % tId,
1146 },
1147 {
1148 "type": "LinearPileupDisplay",
1149 "displayId": "%s-LinearPileupDisplay" % tId,
1150 },
1151 {
1152 "type": "LinearArcDisplay",
1153 "displayId": "%s-LinearArcDisplay" % tId,
1154 },
1155 ],
1156 }
1157 style_json = self._prepare_track_style(trackDict)
1158 trackDict["style"] = style_json
1159 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
1160 self.trackIdlist.append(tId)
1161
1162 def add_paf(self, data, trackData, pafOpts, **kwargs):
1163 tname = trackData["name"]
1164 tId = trackData["label"]
1165 url = tId
1166 useuri = data.startswith("http://") or data.startswith("https://")
1167 if not useuri:
1168 dest = "%s/%s" % (self.outdir, url)
1169 self.symlink_or_copy(os.path.realpath(data), dest)
1170 nrow = self.getNrow(dest)
1171 else:
1172 url = data
1173 nrow = self.getNrow(url)
1174 categ = trackData["category"]
1175 pgnames = [x.strip() for x in pafOpts["genome_label"].split(",")]
1176 pgpaths = [
1177 x.strip() for x in pafOpts["genome"].split(",") if len(x.strip()) > 0
1178 ]
1179 passnames = [trackData["assemblyNames"]] # always first
1180 for i, gp in enumerate(pgpaths):
1181 if len(pgnames[i].strip()) == 0:
1182 # user may have left it blank - cannot make non-optional if want optional tracks.
1183 gn = os.path.basename(gp)
1184 pgnames[i] = os.path.splitext(gn)[0]
1185 logging.debug(
1186 "### add_paf got pafOpts=%s, pgnames=%s, pgpaths=%s for %s"
1187 % (pafOpts, pgnames, pgpaths, tId)
1188 )
1189 for i, gp in enumerate(pgpaths):
1190 gname = pgnames[i]
1191 if len(gname.split()) > 1:
1192 gname = gname.split()[0]
1193 passnames.append(gname)
1194 # trouble from spacey names in command lines avoidance
1195 useuri = gp.startswith("http://") or gp.startswith("https://")
1196
1197 if gname not in self.genome_names:
1198 # ignore if already there - eg for duplicates among pafs.
1199 asstrack, first_contig = self.make_assembly(gp, gname, useuri)
1200 self.genome_names.append(gname)
1201 self.tracksToAdd[gname] = []
1202 self.assemblies.append(asstrack)
1203 trackDict = {
1204 "type": "SyntenyTrack",
1205 "trackId": tId,
1206 "assemblyNames": passnames,
1207 "category": [
1208 categ,
1209 ],
1210 "name": tname,
1211 "adapter": {
1212 "type": "PAFAdapter",
1213 "pafLocation": {"uri": url},
1214 "assemblyNames": passnames,
1215 },
1216 "displays": [
1217 {
1218 "type": "LGVSyntenyDisplay",
1219 "displayId": "%s-LGVSyntenyDisplay" % tId,
1220 },
1221 {
1222 "type": "DotplotDisplay",
1223 "displayId": "%s-DotplotDisplay" % tId,
1224 },
1225 {
1226 "type": "LinearComparativeDisplay",
1227 "displayId": "%s-LinearComparativeDisplay" % tId,
1228 },
1229 {
1230 "type": "LinearBasicDisplay",
1231 "displayId": "%s-LinearSyntenyDisplay" % tId,
1232 },
1233 ],
1234 }
1235 if nrow > 10000:
1236 style_json = {
1237 "type": "LGVSyntenyDisplay",
1238 "displayId": "%s-LGVSyntenyDisplay" % tId,
1239 }
1240 else:
1241 style_json = {
1242 "type": "LinearBasicDisplay",
1243 "displayId": "%s-LinearBasicDisplay" % tId,
1244 }
1245 trackDict["style"] = style_json
1246 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
1247 self.trackIdlist.append(tId)
1248
1249 def process_annotations(self, track):
1250 category = track["category"].replace("__pd__date__pd__", TODAY)
1251 for trackIndex, (
1252 dataset_path,
1253 dataset_ext,
1254 useuri,
1255 track_human_label,
1256 extra_metadata,
1257 ) in enumerate(track["trackfiles"]):
1258 if not dataset_path.strip().startswith("http"):
1259 # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
1260 for key, value in mapped_chars.items():
1261 track_human_label = track_human_label.replace(value, key)
1262 track_human_label = track_human_label.replace(" ", "_")
1263 outputTrackConfig = {
1264 "category": category,
1265 "style": {},
1266 }
1267 outputTrackConfig["assemblyNames"] = track["assemblyNames"]
1268 outputTrackConfig["key"] = track_human_label
1269 outputTrackConfig["useuri"] = useuri
1270 outputTrackConfig["path"] = dataset_path
1271 outputTrackConfig["ext"] = dataset_ext
1272
1273 outputTrackConfig["trackset"] = track.get("trackset", {})
1274 outputTrackConfig["label"] = "%s_%d.%s" % (
1275 track_human_label,
1276 self.trackCounter,
1277 dataset_ext,
1278 )
1279 self.trackCounter += 1
1280 outputTrackConfig["metadata"] = extra_metadata
1281 outputTrackConfig["name"] = track_human_label
1282
1283 if dataset_ext in ("gff", "gff3"):
1284 self.add_gff(
1285 dataset_path,
1286 outputTrackConfig,
1287 )
1288 elif dataset_ext in ("hic", "juicebox_hic"):
1289 outputTrackConfig["wasCool"] = False
1290 self.add_hic(
1291 dataset_path,
1292 outputTrackConfig,
1293 )
1294 elif dataset_ext in ("cool", "mcool", "scool"):
1295 hic_url = outputTrackConfig["label"]
1296 hic_path = os.path.join(self.outdir, hic_url) + ".hic"
1297 outputTrackConfig["wasCool"] = True
1298 self.subprocess_check_call(
1299 [
1300 "hictk",
1301 "convert",
1302 "-f",
1303 "--output-fmt",
1304 "hic",
1305 dataset_path,
1306 hic_path,
1307 ]
1308 )
1309 logging.debug(
1310 "### ext=cool: wasCool=%s, hic_path=%s"
1311 % (outputTrackConfig["wasCool"], hic_path)
1312 )
1313 self.add_hic(
1314 hic_path,
1315 outputTrackConfig,
1316 )
1317 elif dataset_ext in ("bed",):
1318 self.add_bed(
1319 dataset_path,
1320 dataset_ext,
1321 outputTrackConfig,
1322 )
1323 elif dataset_ext in ("maf",):
1324 self.add_maf(
1325 dataset_path,
1326 outputTrackConfig,
1327 )
1328 elif dataset_ext == "bigwig":
1329 self.add_bigwig(
1330 dataset_path,
1331 outputTrackConfig,
1332 )
1333 elif dataset_ext == "bam":
1334 real_indexes = track["conf"]["options"]["bam"]["bam_index"]
1335 logging.debug("**** add bam got %s for indexes" % real_indexes)
1336 self.add_bam(
1337 dataset_path,
1338 outputTrackConfig,
1339 bam_indexes=real_indexes,
1340 )
1341 elif dataset_ext == "cram":
1342 real_indexes = track["conf"]["options"]["cram"]["cram_index"]
1343 logging.debug("**** add cram got %s for indexes" % real_indexes)
1344 self.add_cram(
1345 dataset_path,
1346 outputTrackConfig,
1347 cram_indexes=real_indexes,
1348 )
1349 elif dataset_ext == "blastxml":
1350 self.add_blastxml(
1351 dataset_path,
1352 outputTrackConfig,
1353 track["conf"]["options"]["blast"],
1354 )
1355 elif dataset_ext == "vcf":
1356 self.add_vcf(dataset_path, outputTrackConfig)
1357 elif dataset_ext == "paf":
1358 self.add_paf(
1359 dataset_path,
1360 outputTrackConfig,
1361 track["conf"]["options"]["paf"],
1362 )
1363 else:
1364 logging.warn("Do not know how to handle %s", dataset_ext)
1365 # Return non-human label for use in other fields
1366 yield outputTrackConfig["label"]
1367
1368 def add_default_session(self, default_data):
1369 """
1370 default session settings are hard and fragile.
1371 .add_default_view() and other configuration code adapted from
1372 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py
1373 """
1374 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708
1375 track_types = {}
1376 with open(self.config_json_file, "r") as config_file:
1377 config_json = json.load(config_file)
1378 if self.config_json:
1379 config_json.update(self.config_json)
1380 if "defaultSession" in config_json:
1381 session_json = config_json["defaultSession"]
1382 session_views = []
1383 else:
1384 session_json = {}
1385 session_views = []
1386 for gnome in self.assmeta.keys(): # assemblies have their own tracks
1387 tracks_data = []
1388 for track_conf in self.tracksToAdd[gnome]:
1389 tId = track_conf["trackId"]
1390 if tId in default_data[gnome]["visibility"]["default_on"]:
1391 track_types[tId] = track_conf["type"]
1392 style_data = default_data[gnome]["style"].get(tId, None)
1393 if not style_data:
1394 logging.debug(
1395 "### No style data for %s in available default data %s"
1396 % (tId, default_data)
1397 )
1398 style_data = {"type": "LinearBasicDisplay"}
1399 if "displays" in track_conf:
1400 disp = track_conf["displays"][0]["type"]
1401 style_data["type"] = disp
1402 if track_conf.get("style_labels", None):
1403 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
1404 # TODO move this to per track displays?
1405 style_data["labels"] = track_conf["style_labels"]
1406 tracks_data.append(
1407 {
1408 "type": track_types[tId],
1409 "configuration": tId,
1410 "displays": [style_data],
1411 }
1412 )
1413 view_json = {
1414 "type": "LinearGenomeView",
1415 "offsetPx": 0,
1416 "minimized": False,
1417 "tracks": tracks_data,
1418 }
1419 logging.debug(
1420 "Looking for %s in self.ass_ %s" % (gnome, self.ass_first_contigs)
1421 )
1422 first = [x for x in self.ass_first_contigs if x[0] == gnome]
1423 if len(first) > 0:
1424 [gnome, refName, end] = first[0]
1425 start = 0
1426 end = int(end)
1427 drdict = {
1428 "refName": refName,
1429 "start": start,
1430 "end": end,
1431 "reversed": False,
1432 "assemblyName": gnome,
1433 }
1434 else:
1435 ddl = default_data.get("defaultLocation", None)
1436 if ddl:
1437 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl)
1438 # allow commas like 100,000 but ignore as integer
1439 if loc_match:
1440 refName = loc_match.group(1)
1441 drdict["refName"] = refName
1442 if loc_match.group(2) > "":
1443 drdict["start"] = int(loc_match.group(2).replace(",", ""))
1444 if loc_match.group(3) > "":
1445 drdict["end"] = int(loc_match.group(3).replace(",", ""))
1446 else:
1447 logging.info(
1448 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix"
1449 % ddl
1450 )
1451 if drdict.get("refName", None):
1452 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome
1453 view_json["displayedRegions"] = [
1454 drdict,
1455 ]
1456 logging.info("@@@ defaultlocation %s for default session" % drdict)
1457 else:
1458 logging.info(
1459 "@@@ no track location for default session - please add one!"
1460 )
1461 session_views.append(view_json)
1462 session_name = default_data.get("session_name", "New session")
1463 for key, value in mapped_chars.items():
1464 session_name = session_name.replace(value, key)
1465 session_json["name"] = session_name
1466
1467 if "views" not in session_json:
1468 session_json["views"] = session_views
1469 else:
1470 session_json["views"] += session_views
1471 pp = json.dumps(session_views, indent=2)
1472 config_json["defaultSession"] = session_json
1473 self.config_json.update(config_json)
1474 logging.debug("defaultSession=%s" % (pp))
1475 with open(self.config_json_file, "w") as config_file:
1476 json.dump(self.config_json, config_file, indent=2)
1477
1478 def add_defsess_to_index(self, data):
1479 """
1480 Included on request of the new codeowner, from Anthony's IUC PR.
1481 Had to be fixed to keep each assembly with the associated tracks for a default view.
1482 Originally used only the first assembly, putting all tracks there and so breaking some
1483 when tested with 2 or more.
1484
1485 ----------------------------------------------------------
1486 Add some default session settings: set some assemblies/tracks on/off
1487
1488 This allows to select a default view:
1489 - jb type (Linear, Circular, etc)
1490 - default location on an assembly
1491 - default tracks
1492 - ...
1493
1494 Different methods to do that were tested/discussed:
1495 - using a defaultSession item in config.json: this proved to be difficult:
1496 forced to write a full session block, including hard-coded/hard-to-guess items,
1497 no good way to let Jbrowse2 display a scaffold without knowing its size
1498 - using JBrowse2 as an embedded React component in a tool-generated html file:
1499 it works but it requires generating js code to actually do what we want = chosing default view, assembly, tracks, ...
1500 - writing a session-spec inside the config.json file: this is not yet supported as of 2.10.2 (see PR 4148 below)
1501 a session-spec is a kind of simplified defaultSession where you don't need to specify every aspect of the session
1502 - passing a session-spec through URL params by embedding the JBrowse2 index.html inside an iframe
1503 we selected this option
1504
1505 Xrefs to understand the choices:
1506 https://github.com/GMOD/jbrowse-components/issues/2708
1507 https://github.com/GMOD/jbrowse-components/discussions/3568
1508 https://github.com/GMOD/jbrowse-components/pull/4148
1509 """
1510 new_index = "Nothing written"
1511 session_spec = {"views": []}
1512 logging.debug("def ass_first=%s\ndata=%s" % (self.ass_first_contigs, data))
1513 for first_contig in self.ass_first_contigs:
1514 logging.debug("first contig=%s" % self.ass_first_contigs)
1515 [gnome, refName, end] = first_contig
1516 start = 0
1517 aview = {
1518 "assembly": gnome,
1519 "loc": "{}:{}..{}".format(refName, start, end),
1520 "type": "LinearGenomeView",
1521 "tracks": data[gnome]["tracks"],
1522 }
1523 session_spec["views"].append(aview)
1524 sess = json.dumps(session_spec, sort_keys=True, indent=2)
1525 new_index = INDEX_TEMPLATE.replace(
1526 "__SESSION_SPEC__", "&session=spec-{}".format(sess)
1527 )
1528
1529 os.rename(
1530 os.path.join(self.outdir, "index.html"),
1531 os.path.join(self.outdir, "index_noview.html"),
1532 )
1533
1534 with open(os.path.join(self.outdir, "index.html"), "w") as nind:
1535 nind.write(new_index)
1536 logging.debug(
1537 "#### add_defsession gnome=%s refname=%s\nsession_spec=%s\nnew_index=%s"
1538 % (gnome, refName, sess, new_index)
1539 )
1540
1541 def add_general_configuration(self, data):
1542 """
1543 Add some general configuration to the config.json file
1544 """
1545
1546 config_path = self.config_json_file
1547 if os.path.exists(config_path):
1548 with open(config_path, "r") as config_file:
1549 config_json = json.load(config_file)
1550 else:
1551 config_json = {}
1552 if self.config_json:
1553 config_json.update(self.config_json)
1554 config_data = {}
1555
1556 config_data["disableAnalytics"] = data.get("analytics", "false") == "true"
1557
1558 config_data["theme"] = {
1559 "palette": {
1560 "primary": {"main": data.get("primary_color", "#0D233F")},
1561 "secondary": {"main": data.get("secondary_color", "#721E63")},
1562 "tertiary": {"main": data.get("tertiary_color", "#135560")},
1563 "quaternary": {"main": data.get("quaternary_color", "#FFB11D")},
1564 },
1565 "typography": {"fontSize": int(data.get("font_size", 10))},
1566 }
1567 if not config_json.get("configuration", None):
1568 config_json["configuration"] = {}
1569 config_json["configuration"].update(config_data)
1570 self.config_json.update(config_json)
1571 with open(config_path, "w") as config_file:
1572 json.dump(self.config_json, config_file, indent=2)
1573
1574 def clone_jbrowse(self, realclone=False):
1575 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now
1576 Leave as True between version updates on temporary tools - requires manual conda trigger :(
1577 """
1578 dest = self.outdir
1579 if realclone:
1580 self.subprocess_check_call(
1581 ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"]
1582 )
1583 else:
1584 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True)
1585 for fn in [
1586 "asset-manifest.json",
1587 "favicon.ico",
1588 "robots.txt",
1589 "umd_plugin.js",
1590 "version.txt",
1591 "test_data",
1592 ]:
1593 cmd = ["rm", "-rf", os.path.join(dest, fn)]
1594 self.subprocess_check_call(cmd)
1595 cmd = ["cp", os.path.join(INSTALLED_TO, "jb2_webserver.py"), dest]
1596 self.subprocess_check_call(cmd)
1597
1598
1599 def parse_style_conf(item):
1600 if item.text.lower() in ["false", "true", "yes", "no"]:
1601 return item.text.lower in ("yes", "true")
1602 else:
1603 return item.text
1604
1605
1606 if __name__ == "__main__":
1607 parser = argparse.ArgumentParser(description="", epilog="")
1608 parser.add_argument("--xml", help="Track Configuration")
1609 parser.add_argument(
1610 "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda"
1611 )
1612 parser.add_argument("--outdir", help="Output directory", default="out")
1613 parser.add_argument("--version", "-V", action="version", version=JB2VER)
1614 args = parser.parse_args()
1615 tree = ET.parse(args.xml)
1616 root = tree.getroot()
1617
1618 # This should be done ASAP
1619 GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text
1620 # Sometimes this comes as `localhost` without a protocol
1621 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"):
1622 # so we'll prepend `http://` and hope for the best. Requests *should*
1623 # be GET and not POST so it should redirect OK
1624 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL
1625
1626 jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path)
1627
1628 default_session_data = {}
1629 for ass in root.findall("assembly"):
1630 genomes = [
1631 {
1632 "path": x.attrib["path"],
1633 "label": x.attrib["label"],
1634 "useuri": x.attrib["useuri"],
1635 "meta": metadata_from_node(x.find("metadata")),
1636 }
1637 for x in ass.findall("metadata/genomes/genome")
1638 ]
1639 assref_name = jc.process_genomes(genomes)
1640 if not default_session_data.get(assref_name, None):
1641 default_session_data[assref_name] = {
1642 "tracks": [],
1643 "style": {},
1644 "style_labels": {},
1645 "visibility": {
1646 "default_on": [],
1647 "default_off": [],
1648 },
1649 }
1650 for track in ass.find("tracks"):
1651 track_conf = {}
1652 track_conf["trackfiles"] = []
1653 track_conf["assemblyNames"] = assref_name
1654 is_multi_bigwig = False
1655 try:
1656 if track.find("options/wiggle/multibigwig") and (
1657 track.find("options/wiggle/multibigwig").text == "True"
1658 ):
1659 is_multi_bigwig = True
1660 multi_bigwig_paths = []
1661 except KeyError:
1662 pass
1663
1664 trackfiles = track.findall("files/trackFile")
1665 if trackfiles:
1666 for x in trackfiles:
1667 track_conf["label"] = x.attrib["label"]
1668 track_conf["useuri"] = x.attrib["useuri"]
1669 if is_multi_bigwig:
1670 multi_bigwig_paths.append(
1671 (
1672 x.attrib["label"],
1673 x.attrib["useuri"],
1674 os.path.realpath(x.attrib["path"]),
1675 )
1676 )
1677 else:
1678 if trackfiles:
1679 metadata = metadata_from_node(x.find("metadata"))
1680 track_conf["dataset_id"] = metadata.get(
1681 "dataset_id", "None"
1682 )
1683 if x.attrib["useuri"].lower() == "yes":
1684 tfa = (
1685 x.attrib["path"],
1686 x.attrib["ext"],
1687 x.attrib["useuri"],
1688 x.attrib["label"],
1689 metadata,
1690 )
1691 else:
1692 tfa = (
1693 os.path.realpath(x.attrib["path"]),
1694 x.attrib["ext"],
1695 x.attrib["useuri"],
1696 x.attrib["label"],
1697 metadata,
1698 )
1699 track_conf["trackfiles"].append(tfa)
1700
1701 if is_multi_bigwig:
1702 metadata = metadata_from_node(x.find("metadata"))
1703
1704 track_conf["trackfiles"].append(
1705 (
1706 multi_bigwig_paths, # Passing an array of paths to represent as one track
1707 "bigwig_multiple",
1708 "MultiBigWig", # Giving an hardcoded name for now
1709 {}, # No metadata for multiple bigwig
1710 )
1711 )
1712
1713 track_conf["category"] = track.attrib["cat"]
1714 track_conf["format"] = track.attrib["format"]
1715 track_conf["conf"] = etree_to_dict(track.find("options"))
1716 keys = jc.process_annotations(track_conf)
1717
1718 if keys:
1719 for key in keys:
1720 vis = track.attrib.get("visibility", "default_off")
1721 if not vis:
1722 vis = "default_off"
1723 default_session_data[assref_name]["visibility"][vis].append(key)
1724 trakdat = jc.tracksToAdd[assref_name]
1725 stile = {}
1726 for trak in trakdat:
1727 if trak["trackId"] == key:
1728 stile = trak.get("style", {})
1729 if track.find("options/style"):
1730 supdate = {
1731 item.tag: parse_style_conf(item)
1732 for item in track.find("options/style")
1733 }
1734 stile.update(supdate)
1735 default_session_data[assref_name]["style"][key] = stile
1736 logging.debug("@@@ for %s got style=%s" % (key, stile))
1737 if track.find("options/style_labels"):
1738 default_session_data[assref_name]["style_labels"][key] = {
1739 item.tag: parse_style_conf(item)
1740 for item in track.find("options/style_labels")
1741 }
1742 default_session_data[assref_name]["tracks"].append(key)
1743 default_session_data["defaultLocation"] = root.find(
1744 "metadata/general/defaultLocation"
1745 ).text
1746 default_session_data["session_name"] = root.find(
1747 "metadata/general/session_name"
1748 ).text
1749 logging.debug("default_session=%s" % (default_session_data))
1750 jc.zipOut = root.find("metadata/general/zipOut").text == "true"
1751 general_data = {
1752 "analytics": root.find("metadata/general/analytics").text,
1753 "primary_color": root.find("metadata/general/primary_color").text,
1754 "secondary_color": root.find("metadata/general/secondary_color").text,
1755 "tertiary_color": root.find("metadata/general/tertiary_color").text,
1756 "quaternary_color": root.find("metadata/general/quaternary_color").text,
1757 "font_size": root.find("metadata/general/font_size").text,
1758 }
1759 jc.add_general_configuration(general_data)
1760 trackconf = jc.config_json.get("tracks", [])
1761 for gnome in jc.genome_names:
1762 trackconf += jc.tracksToAdd[gnome]
1763 jc.config_json["tracks"] = trackconf
1764 assconf = jc.config_json.get("assemblies", [])
1765 assconf += jc.assemblies
1766 jc.config_json["assemblies"] = assconf
1767 logging.debug("assemblies=%s, gnames=%s" % (assconf, jc.genome_names))
1768 jc.write_config()
1769 jc.add_default_session(default_session_data)
1770 # note that this can be left in the config.json but has NO EFFECT if add_defsess_to_index is called.
1771 # jc.add_defsess_to_index(default_session_data)
1772 # jc.text_index() not sure what broke here.