comparison jbrowse2broken.py @ 57:94264fe60478 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 4b5df41484f6bdf316edaf95b53c92d328ec1674-dirty
author fubar
date Thu, 21 Mar 2024 08:01:42 +0000
parents
children
comparison
equal deleted inserted replaced
56:c0097a584a8a 57:94264fe60478
1 #!/usr/bin/env python
2 # change to accumulating all configuration for config.json based on the default from the clone
3 import argparse
4 import binascii
5 import datetime
6 import json
7 import logging
8 import os
9 import re
10 import shutil
11 import struct
12 import subprocess
13 import tempfile
14 import urllib.request
15 import xml.etree.ElementTree as ET
16 from collections import defaultdict
17
18 logging.basicConfig(level=logging.INFO)
19 log = logging.getLogger("jbrowse")
20
21 JB2VER = "v2.10.3"
22 # version pinned for cloning
23
24 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
25 GALAXY_INFRASTRUCTURE_URL = None
26
27 # version pinned for cloning
28
29 mapped_chars = {
30 ">": "__gt__",
31 "<": "__lt__",
32 "'": "__sq__",
33 '"': "__dq__",
34 "[": "__ob__",
35 "]": "__cb__",
36 "{": "__oc__",
37 "}": "__cc__",
38 "@": "__at__",
39 "#": "__pd__",
40 "": "__cn__",
41 }
42
43
44 class ColorScaling(object):
45
46 COLOR_FUNCTION_TEMPLATE = """
47 function(feature, variableName, glyphObject, track) {{
48 var score = {score};
49 {opacity}
50 return 'rgba({red}, {green}, {blue}, ' + opacity + ')';
51 }}
52 """
53
54 COLOR_FUNCTION_TEMPLATE_QUAL = r"""
55 function(feature, variableName, glyphObject, track) {{
56 var search_up = function self(sf, attr){{
57 if(sf.get(attr) !== undefined){{
58 return sf.get(attr);
59 }}
60 if(sf.parent() === undefined) {{
61 return;
62 }}else{{
63 return self(sf.parent(), attr);
64 }}
65 }};
66
67 var search_down = function self(sf, attr){{
68 if(sf.get(attr) !== undefined){{
69 return sf.get(attr);
70 }}
71 if(sf.children() === undefined) {{
72 return;
73 }}else{{
74 var kids = sf.children();
75 for(var child_idx in kids){{
76 var x = self(kids[child_idx], attr);
77 if(x !== undefined){{
78 return x;
79 }}
80 }}
81 return;
82 }}
83 }};
84
85 var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color});
86 var score = (search_up(feature, 'score') || search_down(feature, 'score'));
87 {opacity}
88 if(score === undefined){{ opacity = 1; }}
89 var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color);
90 var red = parseInt(result[1], 16);
91 var green = parseInt(result[2], 16);
92 var blue = parseInt(result[3], 16);
93 if(isNaN(opacity) || opacity < 0){{ opacity = 0; }}
94 return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')';
95 }}
96 """
97
98 OPACITY_MATH = {
99 "linear": """
100 var opacity = (score - ({min})) / (({max}) - ({min}));
101 """,
102 "logarithmic": """
103 var opacity = Math.log10(score - ({min})) / Math.log10(({max}) - ({min}));
104 """,
105 "blast": """
106 var opacity = 0;
107 if(score == 0.0) {{
108 opacity = 1;
109 }} else {{
110 opacity = (20 - Math.log10(score)) / 180;
111 }}
112 """,
113 }
114
115 BREWER_COLOUR_IDX = 0
116 BREWER_COLOUR_SCHEMES = [
117 (166, 206, 227),
118 (31, 120, 180),
119 (178, 223, 138),
120 (51, 160, 44),
121 (251, 154, 153),
122 (227, 26, 28),
123 (253, 191, 111),
124 (255, 127, 0),
125 (202, 178, 214),
126 (106, 61, 154),
127 (255, 255, 153),
128 (177, 89, 40),
129 (228, 26, 28),
130 (55, 126, 184),
131 (77, 175, 74),
132 (152, 78, 163),
133 (255, 127, 0),
134 ]
135
136 BREWER_DIVERGING_PALLETES = {
137 "BrBg": ("#543005", "#003c30"),
138 "PiYg": ("#8e0152", "#276419"),
139 "PRGn": ("#40004b", "#00441b"),
140 "PuOr": ("#7f3b08", "#2d004b"),
141 "RdBu": ("#67001f", "#053061"),
142 "RdGy": ("#67001f", "#1a1a1a"),
143 "RdYlBu": ("#a50026", "#313695"),
144 "RdYlGn": ("#a50026", "#006837"),
145 "Spectral": ("#9e0142", "#5e4fa2"),
146 }
147
148 def __init__(self):
149 self.brewer_colour_idx = 0
150
151 def rgb_from_hex(self, hexstr):
152 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back
153 return struct.unpack("BBB", binascii.unhexlify(hexstr))
154
155 def min_max_gff(self, gff_file):
156 min_val = None
157 max_val = None
158 with open(gff_file, "r") as handle:
159 for line in handle:
160 try:
161 value = float(line.split("\t")[5])
162 min_val = min(value, (min_val or value))
163 max_val = max(value, (max_val or value))
164
165 if value < min_val:
166 min_val = value
167
168 if value > max_val:
169 max_val = value
170 except Exception:
171 pass
172 return min_val, max_val
173
174 def hex_from_rgb(self, r, g, b):
175 return "#%02x%02x%02x" % (r, g, b)
176
177 def _get_colours(self):
178 r, g, b = self.BREWER_COLOUR_SCHEMES[
179 self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)
180 ]
181 self.brewer_colour_idx += 1
182 return r, g, b
183
184 def parse_menus(self, track):
185 trackConfig = {"menuTemplate": [{}, {}, {}, {}]}
186
187 if "menu" in track["menus"]:
188 menu_list = [track["menus"]["menu"]]
189 if isinstance(track["menus"]["menu"], list):
190 menu_list = track["menus"]["menu"]
191
192 for m in menu_list:
193 tpl = {
194 "action": m["action"],
195 "label": m.get("label", "{name}"),
196 "iconClass": m.get("iconClass", "dijitIconBookmark"),
197 }
198 if "url" in m:
199 tpl["url"] = m["url"]
200 if "content" in m:
201 tpl["content"] = m["content"]
202 if "title" in m:
203 tpl["title"] = m["title"]
204
205 trackConfig["menuTemplate"].append(tpl)
206
207 return trackConfig
208
209 def parse_colours(self, track, trackFormat, gff3=None):
210 # Wiggle tracks have a bicolor pallete
211 trackConfig = {"style": {}}
212 if trackFormat == "wiggle":
213
214 trackConfig["style"]["pos_color"] = track["wiggle"]["color_pos"]
215 trackConfig["style"]["neg_color"] = track["wiggle"]["color_neg"]
216
217 if trackConfig["style"]["pos_color"] == "__auto__":
218 trackConfig["style"]["neg_color"] = self.hex_from_rgb(
219 *self._get_colours()
220 )
221 trackConfig["style"]["pos_color"] = self.hex_from_rgb(
222 *self._get_colours()
223 )
224
225 # Wiggle tracks can change colour at a specified place
226 bc_pivot = track["wiggle"]["bicolor_pivot"]
227 if bc_pivot not in ("mean", "zero"):
228 # The values are either one of those two strings
229 # or a number
230 bc_pivot = float(bc_pivot)
231 trackConfig["bicolor_pivot"] = bc_pivot
232 elif "scaling" in track:
233 if track["scaling"]["method"] == "ignore":
234 if track["scaling"]["scheme"]["color"] != "__auto__":
235 trackConfig["style"]["color"] = track["scaling"]["scheme"]["color"]
236 else:
237 trackConfig["style"]["color"] = self.hex_from_rgb(
238 *self._get_colours()
239 )
240 else:
241 # Scored method
242 algo = track["scaling"]["algo"]
243 # linear, logarithmic, blast
244 scales = track["scaling"]["scales"]
245 # type __auto__, manual (min, max)
246 scheme = track["scaling"]["scheme"]
247 # scheme -> (type (opacity), color)
248 # ==================================
249 # GENE CALLS OR BLAST
250 # ==================================
251 if trackFormat == "blast":
252 red, green, blue = self._get_colours()
253 color_function = self.COLOR_FUNCTION_TEMPLATE.format(
254 **{
255 "score": "feature._parent.get('score')",
256 "opacity": self.OPACITY_MATH["blast"],
257 "red": red,
258 "green": green,
259 "blue": blue,
260 }
261 )
262 trackConfig["style"]["color"] = color_function.replace("\n", "")
263 elif trackFormat == "gene_calls":
264 # Default values, based on GFF3 spec
265 min_val = 0
266 max_val = 1000
267 # Get min/max and build a scoring function since JBrowse doesn't
268 if scales["type"] == "automatic" or scales["type"] == "__auto__":
269 min_val, max_val = self.min_max_gff(gff3)
270 else:
271 min_val = scales.get("min", 0)
272 max_val = scales.get("max", 1000)
273
274 if scheme["color"] == "__auto__":
275 user_color = "undefined"
276 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
277 elif scheme["color"].startswith("#"):
278 user_color = "'%s'" % self.hex_from_rgb(
279 *self.rgb_from_hex(scheme["color"][1:])
280 )
281 auto_color = "undefined"
282 else:
283 user_color = "undefined"
284 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
285
286 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(
287 **{
288 "opacity": self.OPACITY_MATH[algo].format(
289 **{"max": max_val, "min": min_val}
290 ),
291 "user_spec_color": user_color,
292 "auto_gen_color": auto_color,
293 }
294 )
295
296 trackConfig["style"]["color"] = color_function.replace("\n", "")
297 return trackConfig
298
299
300 def etree_to_dict(t):
301 if t is None:
302 return {}
303
304 d = {t.tag: {} if t.attrib else None}
305 children = list(t)
306 if children:
307 dd = defaultdict(list)
308 for dc in map(etree_to_dict, children):
309 for k, v in dc.items():
310 dd[k].append(v)
311 d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
312 if t.attrib:
313 d[t.tag].update(("@" + k, v) for k, v in t.attrib.items())
314 if t.text:
315 text = t.text.strip()
316 if children or t.attrib:
317 if text:
318 d[t.tag]["#text"] = text
319 else:
320 d[t.tag] = text
321 return d
322
323
324 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
325
326
327 def metadata_from_node(node):
328 metadata = {}
329 try:
330 if len(node.findall("dataset")) != 1:
331 # exit early
332 return metadata
333 except Exception:
334 return {}
335
336 for (key, value) in node.findall("dataset")[0].attrib.items():
337 metadata["dataset_%s" % key] = value
338
339 if node.findall("history"):
340 for (key, value) in node.findall("history")[0].attrib.items():
341 metadata["history_%s" % key] = value
342
343 if node.findall("metadata"):
344 for (key, value) in node.findall("metadata")[0].attrib.items():
345 metadata["metadata_%s" % key] = value
346 # Additional Mappings applied:
347 metadata[
348 "dataset_edam_format"
349 ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(
350 metadata["dataset_edam_format"], metadata["dataset_file_ext"]
351 )
352 metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format(
353 metadata["history_user_email"]
354 )
355 metadata["hist_name"] = metadata["history_display_name"]
356 metadata[
357 "history_display_name"
358 ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
359 galaxy=GALAXY_INFRASTRUCTURE_URL,
360 encoded_hist_id=metadata["history_id"],
361 hist_name=metadata["history_display_name"],
362 )
363 if node.findall("tool"):
364 for (key, value) in node.findall("tool")[0].attrib.items():
365 metadata["tool_%s" % key] = value
366 metadata[
367 "tool_tool"
368 ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}{tool_version}</a>'.format(
369 galaxy=GALAXY_INFRASTRUCTURE_URL,
370 encoded_id=metadata.get("dataset_id", ""),
371 tool_id=metadata.get("tool_tool_id", ""),
372 tool_version=metadata.get("tool_tool_version", ""),
373 )
374 return metadata
375
376
377 class JbrowseConnector(object):
378 def __init__(self, outdir, jbrowse2path, genomes):
379 self.giURL = GALAXY_INFRASTRUCTURE_URL
380 self.outdir = outdir
381 self.jbrowse2path = jbrowse2path
382 os.makedirs(self.outdir, exist_ok=True)
383 self.genome_paths = genomes
384 self.genome_name = None
385 self.genome_names = []
386 self.trackIdlist = []
387 self.tracksToAdd = []
388 self.config_json = {}
389 self.config_json_file = os.path.join(outdir, "config.json")
390 self.clone_jbrowse()
391
392 def subprocess_check_call(self, command, output=None):
393 if output:
394 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output)
395 subprocess.check_call(command, cwd=self.outdir, stdout=output)
396 else:
397 log.debug("cd %s && %s", self.outdir, " ".join(command))
398 subprocess.check_call(command, cwd=self.outdir)
399
400 def subprocess_popen(self, command):
401 log.debug(command)
402 p = subprocess.Popen(
403 command,
404 cwd=self.outdir,
405 shell=True,
406 stdin=subprocess.PIPE,
407 stdout=subprocess.PIPE,
408 stderr=subprocess.PIPE,
409 )
410 output, err = p.communicate()
411 retcode = p.returncode
412 if retcode != 0:
413 log.error(command)
414 log.error(output)
415 log.error(err)
416 raise RuntimeError("Command failed with exit code %s" % (retcode))
417
418 def _prepare_track_style(self, trackDict):
419 style_data = {
420 "type": "LinearBasicDisplay",
421 "displayId": "%s-LinearBasicDisplay" % trackDict["trackId"],
422 }
423
424 if trackDict.get("displays", None): # use first if multiple like bed
425 style_data["type"] = trackDict["displays"][0]["type"]
426 style_data["displayId"] = trackDict["displays"][0]["displayId"]
427 return {
428 "displays": [
429 style_data,
430 ]
431 }
432
433 def subprocess_check_output(self, command):
434 log.debug(" ".join(command))
435 return subprocess.check_output(command, cwd=self.outdir)
436
437 def symlink_or_copy(self, src, dest):
438 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool(
439 os.environ["GALAXY_JBROWSE_SYMLINKS"]
440 ):
441 cmd = ["ln", "-s", src, dest]
442 else:
443 cmd = ["cp", src, dest]
444
445 return self.subprocess_check_call(cmd)
446
447 def process_genomes(self):
448 assemblies = []
449 useuri = False
450 for i, genome_node in enumerate(self.genome_paths):
451 if genome_node["useuri"].strip().lower() == "yes":
452 useuri = True
453 genome_name = genome_node["meta"]["dataset_dname"].strip()
454 if len(genome_name.split()) > 1:
455 genome_name = genome_name.split()[0]
456 # spaces and cruft break scripts when substituted
457 if genome_name not in self.genome_names:
458 # ignore dupes - can have multiple pafs with same references?
459 fapath = genome_node["path"]
460 if not useuri:
461 fapath = os.path.realpath(fapath)
462 assem = self.make_assembly(fapath, genome_name, useuri)
463 assemblies.append(assem)
464 self.genome_names.append(genome_name)
465 if self.genome_name is None:
466 self.genome_name = (
467 genome_name # first one for all tracks - other than paf
468 )
469 self.genome_sequence_adapter = assem["sequence"]["adapter"]
470 self.genome_firstcontig = None
471 if not useuri:
472 fl = open(fapath, "r").readline()
473 fls = fl.strip().split(">")
474 if len(fls) > 1:
475 fl = fls[1]
476 if len(fl.split()) > 1:
477 self.genome_firstcontig = fl.split()[0].strip()
478 else:
479 self.genome_firstcontig = fl
480 else:
481 fl = urllib.request.urlopen(fapath + ".fai").readline()
482 if fl: # is first row of the text fai so the first contig name
483 self.genome_firstcontig = (
484 fl.decode("utf8").strip().split()[0]
485 )
486 if self.config_json.get("assemblies", None):
487 self.config_json["assemblies"] += assemblies
488 else:
489 self.config_json["assemblies"] = assemblies
490
491 def make_assembly(self, fapath, gname, useuri):
492 if useuri:
493 faname = fapath
494 adapter = {
495 "type": "BgzipFastaAdapter",
496 "fastaLocation": {
497 "uri": faname,
498 "locationType": "UriLocation",
499 },
500 "faiLocation": {
501 "uri": faname + ".fai",
502 "locationType": "UriLocation",
503 },
504 "gziLocation": {
505 "uri": faname + ".gzi",
506 "locationType": "UriLocation",
507 },
508 }
509 else:
510 faname = gname + ".fa.gz"
511 fadest = os.path.realpath(os.path.join(self.outdir, faname))
512 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % (
513 fapath,
514 fadest,
515 fadest,
516 fadest,
517 )
518 self.subprocess_popen(cmd)
519
520 adapter = {
521 "type": "BgzipFastaAdapter",
522 "fastaLocation": {
523 "uri": faname,
524 },
525 "faiLocation": {
526 "uri": faname + ".fai",
527 },
528 "gziLocation": {
529 "uri": faname + ".gzi",
530 },
531 }
532
533 trackDict = {
534 "name": gname,
535 "sequence": {
536 "type": "ReferenceSequenceTrack",
537 "trackId": gname,
538 "adapter": adapter,
539 },
540 "displays": [
541 {
542 "type": "LinearReferenceSequenceDisplay",
543 "displayId": "%s-LinearReferenceSequenceDisplay" % gname,
544 },
545 {
546 "type": "LinearGCContentDisplay",
547 "displayId": "%s-LinearGCContentDisplay" % gname,
548 },
549 ],
550 }
551 return trackDict
552
553 def add_default_view(self):
554 cmd = [
555 "jbrowse",
556 "set-default-session",
557 "-s",
558 self.config_json_file,
559 "-t",
560 ",".join(self.trackIdlist),
561 "-n",
562 "JBrowse2 in Galaxy",
563 "--target",
564 self.config_json_file,
565 "-v",
566 " LinearGenomeView",
567 ]
568 self.subprocess_check_call(cmd)
569
570 def write_config(self):
571 with open(self.config_json_file, "w") as fp:
572 json.dump(self.config_json, fp, indent=2)
573
574 def text_index(self):
575 # Index tracks
576 args = [
577 "jbrowse",
578 "text-index",
579 "--target",
580 os.path.join(self.outdir, "data"),
581 "--assemblies",
582 self.genome_name,
583 ]
584
585 tracks = ",".join(self.trackIdlist)
586 if tracks:
587 args += ["--tracks", tracks]
588
589 self.subprocess_check_call(args)
590
591 def add_hic(self, data, trackData):
592 """
593 HiC adapter.
594 https://github.com/aidenlab/hic-format/blob/master/HiCFormatV9.md
595 for testing locally, these work:
596 HiC data is from https://s3.amazonaws.com/igv.broadinstitute.org/data/hic/intra_nofrag_30.hic
597 using hg19 reference track as a
598 'BgzipFastaAdapter'
599 fastaLocation:
600 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz',
601 faiLocation:
602 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai',
603 gziLocation:
604 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi',
605 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438
606 """
607 tId = trackData["label"]
608 # can be served - if public.
609 # dsId = trackData["metadata"]["dataset_id"]
610 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId)
611 useuri = trackData["useuri"].lower() == "yes"
612 if useuri:
613 uri = data
614 else:
615 uri = trackData["hic_url"]
616 categ = trackData["category"]
617 trackDict = {
618 "type": "HicTrack",
619 "trackId": tId,
620 "name": uri,
621 "assemblyNames": [self.genome_name],
622 "category": [
623 categ,
624 ],
625 "adapter": {
626 "type": "HicAdapter",
627 "hicLocation": uri,
628 },
629 "displays": [
630 {
631 "type": "LinearHicDisplay",
632 "displayId": "%s-LinearHicDisplay" % tId,
633 },
634 ],
635 }
636 style_json = self._prepare_track_style(trackDict)
637 trackDict["style"] = style_json
638 self.tracksToAdd.append(trackDict)
639 self.trackIdlist.append(tId)
640
641 def add_maf(self, data, trackData):
642 """
643 from https://github.com/cmdcolin/maf2bed
644 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name
645 e.g. hg38.chr1 in the sequence identifiers.
646 need the reference id - eg hg18, for maf2bed.pl as the first parameter
647 """
648 tId = trackData["label"]
649 mafPlugin = {
650 "plugins": [
651 {
652 "name": "MafViewer",
653 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js",
654 }
655 ]
656 }
657 categ = trackData["category"]
658 fname = "%s.bed" % tId
659 dest = "%s/%s" % (self.outdir, fname)
660 gname = self.genome_name
661 cmd = [
662 "bash",
663 os.path.join(INSTALLED_TO, "convertMAF.sh"),
664 data,
665 gname,
666 INSTALLED_TO,
667 dest,
668 ]
669 self.subprocess_check_call(cmd)
670 # Construct samples list
671 # We could get this from galaxy metadata, not sure how easily.
672 ps = subprocess.Popen(["grep", "^s [^ ]*", "-o", data], stdout=subprocess.PIPE)
673 output = subprocess.check_output(("sort", "-u"), stdin=ps.stdout)
674 ps.wait()
675 outp = output.decode("ascii")
676 soutp = outp.split("\n")
677 samp = [x.split("s ")[1] for x in soutp if x.startswith("s ")]
678 samples = [x.split(".")[0] for x in samp]
679 trackDict = {
680 "type": "MafTrack",
681 "trackId": tId,
682 "name": trackData["name"],
683 "category": [
684 categ,
685 ],
686 "adapter": {
687 "type": "MafTabixAdapter",
688 "samples": samples,
689 "bedGzLocation": {
690 "uri": fname + ".sorted.bed.gz",
691 },
692 "index": {
693 "location": {
694 "uri": fname + ".sorted.bed.gz.tbi",
695 },
696 },
697 },
698 "assemblyNames": [self.genome_name],
699 "displays": [
700 {
701 "type": "LinearBasicDisplay",
702 "displayId": "%s-LinearBasicDisplay" % tId,
703 },
704 {
705 "type": "LinearArcDisplay",
706 "displayId": "%s-LinearArcDisplay" % tId,
707 },
708 ],
709 }
710 style_json = self._prepare_track_style(trackDict)
711 trackDict["style"] = style_json
712 self.tracksToAdd.append(trackDict)
713 self.trackIdlist.append(tId)
714 if self.config_json.get("plugins", None):
715 self.config_json["plugins"].append(mafPlugin[0])
716 else:
717 self.config_json.update(mafPlugin)
718
719 def _blastxml_to_gff3(self, xml, min_gap=10):
720 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
721 cmd = [
722 "python",
723 os.path.join(INSTALLED_TO, "blastxml_to_gapped_gff3.py"),
724 "--trim",
725 "--trim_end",
726 "--include_seq",
727 "--min_gap",
728 str(min_gap),
729 xml,
730 ]
731 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
732 gff3_unrebased.close()
733 return gff3_unrebased.name
734
735 def add_blastxml(self, data, trackData, blastOpts, **kwargs):
736 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"])
737
738 if "parent" in blastOpts and blastOpts["parent"] != "None":
739 gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
740 cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")]
741 if blastOpts.get("protein", "false") == "true":
742 cmd.append("--protein2dna")
743 cmd.extend([os.path.realpath(blastOpts["parent"]), gff3])
744 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
745 gff3_rebased.close()
746
747 # Replace original gff3 file
748 shutil.copy(gff3_rebased.name, gff3)
749 os.unlink(gff3_rebased.name)
750 url = "%s.gff3" % trackData["label"]
751 dest = "%s/%s" % (self.outdir, url)
752 self._sort_gff(gff3, dest)
753 url = url + ".gz"
754 tId = trackData["label"]
755 categ = trackData["category"]
756 trackDict = {
757 "type": "FeatureTrack",
758 "trackId": tId,
759 "name": trackData["name"],
760 "assemblyNames": [self.genome_name],
761 "category": [
762 categ,
763 ],
764 "adapter": {
765 "type": "Gff3TabixAdapter",
766 "gffGzLocation": {
767 "uri": url,
768 },
769 "index": {
770 "location": {
771 "uri": url + ".tbi",
772 }
773 },
774 },
775 "displays": [
776 {
777 "type": "LinearBasicDisplay",
778 "displayId": "%s-LinearBasicDisplay" % tId,
779 },
780 {
781 "type": "LinearArcDisplay",
782 "displayId": "%s-LinearArcDisplay" % tId,
783 },
784 ],
785 }
786 style_json = self._prepare_track_style(trackDict)
787 trackDict["style"] = style_json
788 self.tracksToAdd.append(trackDict)
789 self.trackIdlist.append(tId)
790 os.unlink(gff3)
791
792 def add_bigwig(self, data, trackData):
793 """ "type": "LinearWiggleDisplay",
794 "configuration": {},
795 "selectedRendering": "",
796 "resolution": 1,
797 "posColor": "rgb(228, 26, 28)",
798 "negColor": "rgb(255, 255, 51)",
799 "constraints": {}
800 """
801 useuri = trackData["useuri"].lower() == "yes"
802 if useuri:
803 url = data
804 else:
805 url = "%s.bigwig" % trackData["label"]
806 # slashes in names cause path trouble
807 dest = os.path.join(self.outdir, url)
808 cmd = ["cp", data, dest]
809 self.subprocess_check_call(cmd)
810 bwloc = {"uri": url}
811 tId = trackData["label"]
812 categ = trackData["category"]
813 trackDict = {
814 "type": "QuantitativeTrack",
815 "trackId": tId,
816 "name": trackData["name"],
817 "category": [
818 categ,
819 ],
820 "assemblyNames": [
821 self.genome_name,
822 ],
823 "adapter": {
824 "type": "BigWigAdapter",
825 "bigWigLocation": bwloc,
826 },
827 "displays": [
828 {
829 "type": "LinearWiggleDisplay",
830 "displayId": "%s-LinearWiggleDisplay" % tId,
831 }
832 ],
833 }
834 style_json = self._prepare_track_style(trackDict)
835 trackDict["style"] = style_json
836 self.tracksToAdd.append(trackDict)
837 self.trackIdlist.append(tId)
838
839 def add_bam(self, data, trackData, bam_index=None, **kwargs):
840 tId = trackData["label"]
841 useuri = trackData["useuri"].lower() == "yes"
842 bindex = bam_index
843 categ = trackData["category"]
844 if useuri:
845 url = data
846 else:
847 fname = "%s.bam" % trackData["label"]
848 dest = "%s/%s" % (self.outdir, fname)
849 url = fname
850 bindex = fname + ".bai"
851 self.subprocess_check_call(["cp", data, dest])
852 if bam_index is not None and os.path.exists(bam_index):
853 if not os.path.exists(bindex):
854 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
855 self.subprocess_check_call(["cp", bam_index, bindex])
856 else:
857 # Can happen in exotic condition
858 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
859 # => no index generated by galaxy, but there might be one next to the symlink target
860 # this trick allows to skip the bam sorting made by galaxy if already done outside
861 if os.path.exists(os.path.realpath(data) + ".bai"):
862 self.symlink_or_copy(os.path.realpath(data) + ".bai", bindex)
863 else:
864 log.warn("Could not find a bam index (.bai file) for %s", data)
865 trackDict = {
866 "type": "AlignmentsTrack",
867 "trackId": tId,
868 "name": trackData["name"],
869 "category": [
870 categ,
871 ],
872 "assemblyNames": [self.genome_name],
873 "adapter": {
874 "type": "BamAdapter",
875 "bamLocation": {"uri": url},
876 "index": {
877 "location": {
878 "uri": bindex,
879 }
880 },
881 },
882 "displays": [
883 {
884 "type": "LinearAlignmentsDisplay",
885 "displayId": "%s-LinearAlignmentsDisplay" % tId,
886 }
887 ],
888 }
889 style_json = self._prepare_track_style(trackDict)
890 trackDict["style"] = style_json
891 self.tracksToAdd.append(trackDict)
892 self.trackIdlist.append(tId)
893
894 def add_cram(self, data, trackData, cram_index=None, **kwargs):
895 tId = trackData["label"]
896 categ = trackData["category"]
897 useuri = trackData["useuri"].lower() == "yes"
898 if useuri:
899 url = data
900 else:
901 fname = "%s.cram" % trackData["label"]
902 dest = "%s/%s" % (self.outdir, fname)
903 url = fname
904 self.subprocess_check_call(["cp", data, dest])
905 if cram_index is not None and os.path.exists(cram_index):
906 if not os.path.exists(dest + ".crai"):
907 # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
908 self.subprocess_check_call(
909 ["cp", os.path.realpath(cram_index), dest + ".crai"]
910 )
911 else:
912 cpath = os.path.realpath(dest) + ".crai"
913 cmd = ["samtools", "index", "-c", "-o", cpath, os.path.realpath(dest)]
914 logging.debug("executing cmd %s" % " ".join(cmd))
915 self.subprocess_check_call(cmd)
916 trackDict = {
917 "type": "AlignmentsTrack",
918 "trackId": tId,
919 "name": trackData["name"],
920 "category": [
921 categ,
922 ],
923 "assemblyNames": [self.genome_name],
924 "adapter": {
925 "type": "CramAdapter",
926 "cramLocation": {"uri": url},
927 "craiLocation": {
928 "uri": url + ".crai",
929 },
930 "sequenceAdapter": self.genome_sequence_adapter,
931 },
932 "displays": [
933 {
934 "type": "LinearAlignmentsDisplay",
935 "displayId": "%s-LinearAlignmentsDisplay" % tId,
936 },
937 ],
938 }
939 style_json = self._prepare_track_style(trackDict)
940 trackDict["style"] = style_json
941 self.tracksToAdd.append(trackDict)
942 self.trackIdlist.append(tId)
943
944 def add_vcf(self, data, trackData):
945 tId = trackData["label"]
946 # url = "%s/api/datasets/%s/display" % (
947 # self.giURL,
948 # trackData["metadata"]["dataset_id"],
949 # )
950 categ = trackData["category"]
951 useuri = trackData["useuri"].lower() == "yes"
952 if useuri:
953 url = data
954 else:
955 url = "%s.vcf.gz" % tId
956 dest = "%s/%s" % (self.outdir, url)
957 cmd = "bgzip -c %s > %s" % (data, dest)
958 self.subprocess_popen(cmd)
959 cmd = ["tabix", "-f", "-p", "vcf", dest]
960 self.subprocess_check_call(cmd)
961 trackDict = {
962 "type": "VariantTrack",
963 "trackId": tId,
964 "name": trackData["name"],
965 "assemblyNames": [self.genome_name],
966 "category": [
967 categ,
968 ],
969 "adapter": {
970 "type": "VcfTabixAdapter",
971 "vcfGzLocation": {"uri": url},
972 "index": {
973 "location": {
974 "uri": url + ".tbi",
975 }
976 },
977 },
978 "displays": [
979 {
980 "type": "LinearVariantDisplay",
981 "displayId": "%s-LinearVariantDisplay" % tId,
982 },
983 {
984 "type": "ChordVariantDisplay",
985 "displayId": "%s-ChordVariantDisplay" % tId,
986 },
987 {
988 "type": "LinearPairedArcDisplay",
989 "displayId": "%s-LinearPairedArcDisplay" % tId,
990 },
991 ],
992 }
993 style_json = self._prepare_track_style(trackDict)
994 trackDict["style"] = style_json
995 self.tracksToAdd.append(trackDict)
996 self.trackIdlist.append(tId)
997
998 def _sort_gff(self, data, dest):
999 # Only index if not already done
1000 if not os.path.exists(dest):
1001 cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s'" % (
1002 data,
1003 dest,
1004 ) # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'"
1005 self.subprocess_popen(cmd)
1006 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest])
1007
1008 def _sort_bed(self, data, dest):
1009 # Only index if not already done
1010 if not os.path.exists(dest):
1011 cmd = "sort -k1,1 -k2,2n '%s' | bgzip -c > '%s'" % (data, dest)
1012 self.subprocess_popen(cmd)
1013 cmd = ["tabix", "-f", "-p", "bed", dest]
1014 self.subprocess_check_call(cmd)
1015
1016 def add_gff(self, data, ext, trackData):
1017 useuri = trackData["useuri"].lower() == "yes"
1018 if useuri:
1019 url = trackData["path"]
1020 else:
1021 url = "%s.%s.gz" % (trackData["label"], ext)
1022 dest = "%s/%s" % (self.outdir, url)
1023 self._sort_gff(data, dest)
1024 tId = trackData["label"]
1025 categ = trackData["category"]
1026 trackDict = {
1027 "type": "FeatureTrack",
1028 "trackId": tId,
1029 "name": trackData["name"],
1030 "assemblyNames": [self.genome_name],
1031 "category": [
1032 categ,
1033 ],
1034 "adapter": {
1035 "type": "Gff3TabixAdapter",
1036 "gffGzLocation": {
1037 "uri": url,
1038 },
1039 "index": {
1040 "location": {
1041 "uri": url + ".tbi",
1042 }
1043 },
1044 },
1045 "displays": [
1046 {
1047 "type": "LinearBasicDisplay",
1048 "displayId": "%s-LinearBasicDisplay" % tId,
1049 },
1050 {
1051 "type": "LinearArcDisplay",
1052 "displayId": "%s-LinearArcDisplay" % tId,
1053 },
1054 ],
1055 }
1056 style_json = self._prepare_track_style(trackDict)
1057 trackDict["style"] = style_json
1058 self.tracksToAdd.append(trackDict)
1059 self.trackIdlist.append(tId)
1060
1061 def add_bed(self, data, ext, trackData):
1062 tId = trackData["label"]
1063 categ = trackData["category"]
1064 useuri = trackData["useuri"].lower() == "yes"
1065 if useuri:
1066 url = data
1067 else:
1068 url = "%s.%s.gz" % (trackData["label"], ext)
1069 dest = "%s/%s" % (self.outdir, url)
1070 self._sort_bed(data, dest)
1071 trackDict = {
1072 "type": "FeatureTrack",
1073 "trackId": tId,
1074 "name": trackData["name"],
1075 "assemblyNames": [self.genome_name],
1076 "adapter": {
1077 "category": [
1078 categ,
1079 ],
1080 "type": "BedTabixAdapter",
1081 "bedGzLocation": {
1082 "uri": url,
1083 },
1084 "index": {
1085 "location": {
1086 "uri": url + ".tbi",
1087 }
1088 },
1089 },
1090 "displays": [
1091 {
1092 "type": "LinearBasicDisplay",
1093 "displayId": "%s-LinearBasicDisplay" % tId,
1094 },
1095 {
1096 "type": "LinearPileupDisplay",
1097 "displayId": "%s-LinearPileupDisplay" % tId,
1098 },
1099 {
1100 "type": "LinearArcDisplay",
1101 "displayId": "%s-LinearArcDisplay" % tId,
1102 },
1103 ],
1104 }
1105 style_json = self._prepare_track_style(trackDict)
1106 trackDict["style"] = style_json
1107 self.tracksToAdd.append(trackDict)
1108 self.trackIdlist.append(tId)
1109
1110 def add_paf(self, data, trackData, pafOpts, **kwargs):
1111 tname = trackData["name"]
1112 tId = trackData["label"]
1113 categ = trackData["category"]
1114 pgnames = [x.strip() for x in pafOpts["genome_label"].split(",")]
1115 pgpaths = [x.strip() for x in pafOpts["genome"].split(",")]
1116 passnames = [self.genome_name] # always first
1117 for i, gname in enumerate(pgnames):
1118 if len(gname.split()) > 1:
1119 gname = gname.split()[0]
1120 passnames.append(gname)
1121 # trouble from spacey names in command lines avoidance
1122 if gname not in self.genome_names:
1123 # ignore if already there - eg for duplicates among pafs.
1124 useuri = pgpaths[i].startswith("http://") or pgpaths[i].startswith(
1125 "https://"
1126 )
1127 asstrack = self.make_assembly(pgpaths[i], gname, useuri)
1128 self.genome_names.append(gname)
1129 if self.config_json.get("assemblies", None):
1130 self.config_json["assemblies"].append(asstrack)
1131 else:
1132 self.config_json["assemblies"] = [
1133 asstrack,
1134 ]
1135 url = "%s.paf" % (trackData["label"])
1136 dest = "%s/%s" % (self.outdir, url)
1137 self.symlink_or_copy(os.path.realpath(data), dest)
1138 trackDict = {
1139 "type": "SyntenyTrack",
1140 "trackId": tId,
1141 "assemblyNames": passnames,
1142 "category": [
1143 categ,
1144 ],
1145 "name": tname,
1146 "adapter": {
1147 "type": "PAFAdapter",
1148 "pafLocation": {"uri": url},
1149 "assemblyNames": passnames,
1150 },
1151 "displays": [
1152 {
1153 "type": "LinearSyntenyDisplay",
1154 "displayId": "%s-LinearSyntenyDisplay" % tId,
1155 },
1156 {
1157 "type": "DotPlotDisplay",
1158 "displayId": "%s-DotPlotDisplay" % tId,
1159 },
1160 ],
1161 }
1162 style_json = self._prepare_track_style(trackDict)
1163 trackDict["style"] = style_json
1164 self.tracksToAdd.append(trackDict)
1165 self.trackIdlist.append(tId)
1166
1167 def process_annotations(self, track):
1168 category = track["category"].replace("__pd__date__pd__", TODAY)
1169 for i, (
1170 dataset_path,
1171 dataset_ext,
1172 useuri,
1173 track_human_label,
1174 extra_metadata,
1175 ) in enumerate(track["trackfiles"]):
1176 if not dataset_path.strip().startswith("http"):
1177 # Unsanitize labels (element_identifiers are always sanitized by Galaxy)
1178 for key, value in mapped_chars.items():
1179 track_human_label = track_human_label.replace(value, key)
1180 track_human_label = track_human_label.replace(" ", "_")
1181 outputTrackConfig = {
1182 "category": category,
1183 "style": track["style"],
1184 }
1185
1186 outputTrackConfig["label"] = "%s_%i_%s" % (
1187 dataset_ext,
1188 i,
1189 track_human_label,
1190 )
1191 outputTrackConfig["useuri"] = useuri
1192 outputTrackConfig["path"] = dataset_path
1193 outputTrackConfig["ext"] = dataset_ext
1194 outputTrackConfig["key"] = track_human_label
1195
1196 outputTrackConfig["trackset"] = track.get("trackset", {})
1197 outputTrackConfig["metadata"] = extra_metadata
1198 outputTrackConfig["name"] = track_human_label
1199
1200 if dataset_ext in ("gff", "gff3"):
1201 self.add_gff(
1202 dataset_path,
1203 dataset_ext,
1204 outputTrackConfig,
1205 )
1206 elif dataset_ext in ("hic", "juicebox_hic"):
1207 self.add_hic(
1208 dataset_path,
1209 outputTrackConfig,
1210 )
1211 elif dataset_ext in ("cool", "mcool", "scool"):
1212 hic_url = "%s_%d.juicebox_hic" % (track_human_label, i)
1213 hic_path = os.path.join(self.outdir, hic_url)
1214 self.subprocess_check_call(
1215 [
1216 "hictk",
1217 "convert",
1218 "-f",
1219 "--output-fmt",
1220 "hic",
1221 dataset_path,
1222 hic_path,
1223 ]
1224 )
1225 outputTrackConfig["hic_url"] = hic_url
1226 self.add_hic(
1227 hic_path,
1228 outputTrackConfig,
1229 )
1230 elif dataset_ext in ("bed",):
1231 self.add_bed(
1232 dataset_path,
1233 dataset_ext,
1234 outputTrackConfig,
1235 )
1236 elif dataset_ext in ("maf",):
1237 self.add_maf(
1238 dataset_path,
1239 outputTrackConfig,
1240 )
1241 elif dataset_ext == "bigwig":
1242 self.add_bigwig(
1243 dataset_path,
1244 outputTrackConfig,
1245 )
1246 elif dataset_ext == "bam":
1247 real_indexes = track["conf"]["options"]["bam"]["bam_index"]
1248 self.add_bam(
1249 dataset_path,
1250 outputTrackConfig,
1251 bam_index=real_indexes,
1252 )
1253 elif dataset_ext == "cram":
1254 real_indexes = track["conf"]["options"]["cram"]["cram_index"]
1255 self.add_cram(
1256 dataset_path,
1257 outputTrackConfig,
1258 cram_index=real_indexes,
1259 )
1260 elif dataset_ext == "blastxml":
1261 self.add_blastxml(
1262 dataset_path,
1263 outputTrackConfig,
1264 track["conf"]["options"]["blast"],
1265 )
1266 elif dataset_ext == "vcf":
1267 self.add_vcf(dataset_path, outputTrackConfig)
1268 elif dataset_ext == "paf":
1269 self.add_paf(
1270 dataset_path,
1271 outputTrackConfig,
1272 track["conf"]["options"]["paf"],
1273 )
1274 else:
1275 logging.warn("Do not know how to handle %s", dataset_ext)
1276 # Return non-human label for use in other fields
1277 yield outputTrackConfig["label"]
1278
1279 def add_default_session(self, default_data):
1280 """
1281 Add some default session settings: set some assemblies/tracks on/off
1282
1283 labels off 1
1284 {
1285 "id": "JJNRSOoj8cPCTR8ZJ7Yne",
1286 "type": "VariantTrack",
1287 "configuration": "vcf_0_merlin.vcf",
1288 "minimized": false,
1289 "displays": [
1290 {
1291 "id": "JOvAkv1bdyz5SAJs3JBby",
1292 "type": "LinearVariantDisplay",
1293 "configuration": {},
1294 "trackShowLabels": false,
1295 "trackShowDescriptions": false
1296 }
1297 ]
1298 },
1299
1300 track labels at end of default view
1301 "hideHeader": false,
1302 "hideHeaderOverview": false,
1303 "hideNoTracksActive": false,
1304 "trackSelectorType": "hierarchical",
1305 "showCenterLine": false,
1306 "showCytobandsSetting": true,
1307 "trackLabels": "hidden",
1308 "showGridlines": true,
1309 "showBookmarkHighlights": true,
1310 "showBookmarkLabels": true
1311 }
1312 ],
1313 "sessionTracks": [],
1314 "sessionAssemblies": [],
1315 "temporaryAssemblies": [],
1316 "connectionInstances": [],
1317 "sessionConnections": [],
1318 "focusedViewId": "n-7YuEPiR5QUtHntU-xcO",
1319 "sessionPlugins": []
1320 }
1321 }
1322
1323
1324 """
1325 tracks_data = []
1326
1327 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708
1328
1329 # We need to know the track type from the config.json generated just before
1330 track_types = {}
1331 with open(self.config_json_file, "r") as config_file:
1332 config_json = json.load(config_file)
1333 if self.config_json:
1334 config_json.update(self.config_json)
1335
1336 for track_conf in self.tracksToAdd:
1337 tId = track_conf["trackId"]
1338 track_types[tId] = track_conf["type"]
1339 style_data = default_data["style"][tId]
1340 logging.warn(
1341 "### defsession for %s got style_data=%s given default_data %s"
1342 % (tId, style_data, default_data)
1343 )
1344 if "displays" in track_conf:
1345 disp = track_conf["displays"][0]["type"]
1346 style_data["type"] = disp
1347
1348 style_data["configuration"] = "%s-%s" % (tId, disp)
1349 if track_conf.get("style_labels", None):
1350 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
1351 # TODO move this to per track displays?
1352 style_data["labels"] = track_conf["style_labels"]
1353 tracks_data.append(
1354 {
1355 "type": track_types[tId],
1356 "configuration": tId,
1357 "displays": [style_data],
1358 }
1359 )
1360
1361 # The view for the assembly we're adding
1362 view_json = {"type": "LinearGenomeView", "tracks": tracks_data}
1363
1364 refName = None
1365 drdict = {
1366 "reversed": False,
1367 "assemblyName": self.genome_name,
1368 "start": 1,
1369 "end": 200000,
1370 "refName": "x",
1371 }
1372
1373 if default_data.get("defaultLocation", ""):
1374 ddl = default_data["defaultLocation"]
1375 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl)
1376 # allow commas like 100,000 but ignore as integer
1377 if loc_match:
1378 refName = loc_match.group(1)
1379 drdict["refName"] = refName
1380 if loc_match.group(2) > "":
1381 drdict["start"] = int(loc_match.group(2).replace(",", ""))
1382 if loc_match.group(3) > "":
1383 drdict["end"] = int(loc_match.group(3).replace(",", ""))
1384 else:
1385 logging.info(
1386 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix"
1387 % ddl
1388 )
1389 else:
1390 drdict["refName"] = self.genome_firstcontig
1391 if drdict.get("refName", None):
1392 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome
1393 view_json["displayedRegions"] = [
1394 drdict,
1395 ]
1396
1397 logging.info("@@@ defaultlocation %s for default session" % drdict)
1398 else:
1399 logging.info(
1400 "@@@ no contig name found for default session - please add one!"
1401 )
1402 session_name = default_data.get("session_name", "New session")
1403 for key, value in mapped_chars.items():
1404 session_name = session_name.replace(value, key)
1405 # Merge with possibly existing defaultSession (if upgrading a jbrowse instance)
1406 session_json = {}
1407 if "defaultSession" in config_json:
1408 session_json = config_json["defaultSession"]
1409
1410 session_json["name"] = session_name
1411
1412 if "views" not in session_json:
1413 session_json["views"] = []
1414
1415 session_json["views"].append(view_json)
1416
1417 config_json["defaultSession"] = session_json
1418 self.config_json.update(config_json)
1419
1420 with open(self.config_json_file, "w") as config_file:
1421 json.dump(self.config_json, config_file, indent=2)
1422
1423 def add_general_configuration(self, data):
1424 """
1425 Add some general configuration to the config.json file
1426 """
1427
1428 config_path = self.config_json_file
1429 if os.path.exists(config_path):
1430 with open(config_path, "r") as config_file:
1431 config_json = json.load(config_file)
1432 else:
1433 config_json = {}
1434 if self.config_json:
1435 config_json.update(self.config_json)
1436 config_data = {}
1437
1438 config_data["disableAnalytics"] = data.get("analytics", "false") == "true"
1439
1440 config_data["theme"] = {
1441 "palette": {
1442 "primary": {"main": data.get("primary_color", "#0D233F")},
1443 "secondary": {"main": data.get("secondary_color", "#721E63")},
1444 "tertiary": {"main": data.get("tertiary_color", "#135560")},
1445 "quaternary": {"main": data.get("quaternary_color", "#FFB11D")},
1446 },
1447 "typography": {"fontSize": int(data.get("font_size", 10))},
1448 }
1449 if not config_json.get("configuration", None):
1450 config_json["configuration"] = {}
1451 config_json["configuration"].update(config_data)
1452 self.config_json.update(config_json)
1453 with open(config_path, "w") as config_file:
1454 json.dump(self.config_json, config_file, indent=2)
1455
1456 def clone_jbrowse(self, realclone=True):
1457 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now"""
1458 dest = self.outdir
1459 if realclone:
1460 self.subprocess_check_call(
1461 ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"]
1462 )
1463 else:
1464 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True)
1465 for fn in [
1466 "asset-manifest.json",
1467 "favicon.ico",
1468 "robots.txt",
1469 "umd_plugin.js",
1470 "version.txt",
1471 "test_data",
1472 ]:
1473 cmd = ["rm", "-rf", os.path.join(dest, fn)]
1474 self.subprocess_check_call(cmd)
1475 cmd = ["cp", os.path.join(INSTALLED_TO, "jb2_webserver.py"), dest]
1476 self.subprocess_check_call(cmd)
1477
1478
1479 def parse_style_conf(item):
1480 if "type" in item.attrib and item.attrib["type"] in [
1481 "boolean",
1482 "integer",
1483 ]:
1484 if item.attrib["type"] == "boolean":
1485 return item.text in ("yes", "true", "True")
1486 elif item.attrib["type"] == "integer":
1487 return int(item.text)
1488 else:
1489 return item.text
1490
1491
1492 if __name__ == "__main__":
1493 parser = argparse.ArgumentParser(description="", epilog="")
1494 parser.add_argument("--xml", help="Track Configuration")
1495 parser.add_argument(
1496 "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda"
1497 )
1498 parser.add_argument("--outdir", help="Output directory", default="out")
1499 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1")
1500 args = parser.parse_args()
1501 tree = ET.parse(args.xml)
1502 root = tree.getroot()
1503
1504 # This should be done ASAP
1505 GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text
1506 # Sometimes this comes as `localhost` without a protocol
1507 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"):
1508 # so we'll prepend `http://` and hope for the best. Requests *should*
1509 # be GET and not POST so it should redirect OK
1510 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL
1511 jc = JbrowseConnector(
1512 outdir=args.outdir,
1513 jbrowse2path=args.jbrowse2path,
1514 genomes=[
1515 {
1516 "path": x.attrib["path"],
1517 "label": x.attrib["label"],
1518 "useuri": x.attrib["useuri"],
1519 "meta": metadata_from_node(x.find("metadata")),
1520 }
1521 for x in root.findall("metadata/genomes/genome")
1522 ],
1523 )
1524 jc.process_genomes()
1525
1526 # .add_default_view() replace from https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py
1527 default_session_data = {
1528 "visibility": {
1529 "default_on": [],
1530 "default_off": [],
1531 },
1532 "style": {},
1533 "style_labels": {},
1534 }
1535 for track in root.findall("tracks/track"):
1536 track_conf = {}
1537 track_conf["trackfiles"] = []
1538
1539 is_multi_bigwig = False
1540 try:
1541 if track.find("options/wiggle/multibigwig") and (
1542 track.find("options/wiggle/multibigwig").text == "True"
1543 ):
1544 is_multi_bigwig = True
1545 multi_bigwig_paths = []
1546 except KeyError:
1547 pass
1548
1549 trackfiles = track.findall("files/trackFile")
1550 if trackfiles:
1551 for x in track.findall("files/trackFile"):
1552 track_conf["label"] = x.attrib["label"]
1553 track_conf["useuri"] = x.attrib["useuri"]
1554 if is_multi_bigwig:
1555 multi_bigwig_paths.append(
1556 (
1557 x.attrib["label"],
1558 x.attrib["useuri"],
1559 os.path.realpath(x.attrib["path"]),
1560 )
1561 )
1562 else:
1563 if trackfiles:
1564 metadata = metadata_from_node(x.find("metadata"))
1565 track_conf["dataset_id"] = metadata["dataset_id"]
1566 if x.attrib["useuri"].lower() == "yes":
1567 tfa = (
1568 x.attrib["path"],
1569 x.attrib["ext"],
1570 x.attrib["useuri"],
1571 x.attrib["label"],
1572 metadata,
1573 )
1574 else:
1575 tfa = (
1576 os.path.realpath(x.attrib["path"]),
1577 x.attrib["ext"],
1578 x.attrib["useuri"],
1579 x.attrib["label"],
1580 metadata,
1581 )
1582 track_conf["trackfiles"].append(tfa)
1583
1584 if is_multi_bigwig:
1585 metadata = metadata_from_node(x.find("metadata"))
1586
1587 track_conf["trackfiles"].append(
1588 (
1589 multi_bigwig_paths, # Passing an array of paths to represent as one track
1590 "bigwig_multiple",
1591 "MultiBigWig", # Giving an hardcoded name for now
1592 {}, # No metadata for multiple bigwig
1593 )
1594 )
1595 track_conf["category"] = track.attrib["cat"]
1596 track_conf["format"] = track.attrib["format"]
1597 if track.find("options/style"):
1598 track_conf["style"] = {
1599 item.tag: parse_style_conf(item) for item in track.find("options/style")
1600 }
1601 else:
1602 track_conf["style"] = {}
1603 tst = track_conf["style"].get("type", None)
1604 if tst:
1605 track_conf["style"]["configuration"] = "%s-%s" % (track_conf["label"], tst)
1606 logging.warn("### got %s for track style" % track_conf["style"])
1607 if track.find("options/style_labels"):
1608 track_conf["style_labels"] = {
1609 item.tag: parse_style_conf(item)
1610 for item in track.find("options/style_labels")
1611 }
1612 track_conf["conf"] = etree_to_dict(track.find("options"))
1613 track_conf["category"] = track.attrib["cat"]
1614 track_conf["format"] = track.attrib["format"]
1615 keys = jc.process_annotations(track_conf)
1616
1617 if keys:
1618 for key in keys:
1619 default_session_data["visibility"][
1620 track.attrib.get("visibility", "default_off")
1621 ].append(key)
1622 if track_conf.get("style", None):
1623 default_session_data["style"][key] = track_conf["style"]
1624 if track_conf.get("style_labels", None):
1625 default_session_data["style_labels"][key] = track_conf.get(
1626 "style_labels", None
1627 )
1628 logging.warn(
1629 "# after process, key=%s def session style = %s"
1630 % (key, default_session_data["style"][key])
1631 )
1632 default_session_data["defaultLocation"] = root.find(
1633 "metadata/general/defaultLocation"
1634 ).text
1635 default_session_data["session_name"] = root.find(
1636 "metadata/general/session_name"
1637 ).text
1638 jc.zipOut = root.find("metadata/general/zipOut").text == "true"
1639 general_data = {
1640 "analytics": root.find("metadata/general/analytics").text,
1641 "primary_color": root.find("metadata/general/primary_color").text,
1642 "secondary_color": root.find("metadata/general/secondary_color").text,
1643 "tertiary_color": root.find("metadata/general/tertiary_color").text,
1644 "quaternary_color": root.find("metadata/general/quaternary_color").text,
1645 "font_size": root.find("metadata/general/font_size").text,
1646 }
1647 jc.add_general_configuration(general_data)
1648 trackconf = jc.config_json.get("tracks", None)
1649 if trackconf:
1650 jc.config_json["tracks"].update(jc.tracksToAdd)
1651 else:
1652 jc.config_json["tracks"] = jc.tracksToAdd
1653 jc.write_config()
1654 jc.add_default_session(default_session_data)
1655 logging.warn("### got default_session_data=%s" % default_session_data)
1656 # jc.text_index() not sure what broke here.