comparison jbrowse2.py @ 73:3b2815efa5d9 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit b361309b05a861da9b64e1324157a8c32767e0bf
author fubar
date Mon, 01 Apr 2024 03:41:42 +0000
parents 2bdb748df098
children 13ede71c3a4b
comparison
equal deleted inserted replaced
72:2bdb748df098 73:3b2815efa5d9
6 import json 6 import json
7 import logging 7 import logging
8 import os 8 import os
9 import re 9 import re
10 import shutil 10 import shutil
11 import ssl
11 import struct 12 import struct
12 import subprocess 13 import subprocess
13 import tempfile 14 import tempfile
14 import urllib.request 15 import urllib.request
15 import xml.etree.ElementTree as ET 16 import xml.etree.ElementTree as ET
35 "}": "__cc__", 36 "}": "__cc__",
36 "@": "__at__", 37 "@": "__at__",
37 "#": "__pd__", 38 "#": "__pd__",
38 "": "__cn__", 39 "": "__cn__",
39 } 40 }
41
42
43 INDEX_TEMPLATE = """<!doctype html>
44 <html lang="en" style="height:100%">
45 <head>
46 <meta charset="utf-8"/>
47 <link rel="shortcut icon" href="./favicon.ico"/>
48 <meta name="viewport" content="width=device-width,initial-scale=1"/>
49 <meta name="theme-color" content="#000000"/>
50 <meta name="description" content="A fast and flexible genome browser"/>
51 <link rel="manifest" href="./manifest.json"/>
52 <title>JBrowse</title>
53 </script>
54 </head>
55 <body style="overscroll-behavior:none; height:100%; margin: 0;">
56 <iframe
57 id="jbframe"
58 title="JBrowse2"
59 frameborder="0"
60 width="100%"
61 height="100%"
62 src='index_noview.html?config=config.json__SESSION_SPEC__'>
63 </iframe>
64 </body>
65 </html>
66 """
40 67
41 68
42 class ColorScaling(object): 69 class ColorScaling(object):
43 70
44 COLOR_FUNCTION_TEMPLATE = """ 71 COLOR_FUNCTION_TEMPLATE = """
374 401
375 class JbrowseConnector(object): 402 class JbrowseConnector(object):
376 def __init__(self, outdir, jbrowse2path): 403 def __init__(self, outdir, jbrowse2path):
377 self.assemblies = [] # these require more than a few line diff. 404 self.assemblies = [] # these require more than a few line diff.
378 self.assmeta = {} 405 self.assmeta = {}
406 self.ass_first_contigs = (
407 []
408 ) # for default session - these are read as first line of the assembly .fai
379 self.giURL = GALAXY_INFRASTRUCTURE_URL 409 self.giURL = GALAXY_INFRASTRUCTURE_URL
380 self.outdir = outdir 410 self.outdir = outdir
381 self.jbrowse2path = jbrowse2path 411 self.jbrowse2path = jbrowse2path
382 os.makedirs(self.outdir, exist_ok=True) 412 os.makedirs(self.outdir, exist_ok=True)
383 self.genome_names = [] 413 self.genome_names = []
448 "displays": [ 478 "displays": [
449 style_data, 479 style_data,
450 ] 480 ]
451 } 481 }
452 return wstyle 482 return wstyle
483
484 def urllib_get_2018():
485 # Using a protected member like this is not any more fragile
486 # than extending the class and using it. I would use it.
487 url = "https://localhost:6667/my-endpoint"
488 ssl._create_default_https_context = ssl._create_unverified_context
489 with urllib.request.urlopen(url=url) as f:
490 print(f.read().decode("utf-8"))
491
492 def urllib_get_2022():
493 # Finally! Able to use the publice API. Happy happy!
494 url = "https://localhost:6667/my-endpoint"
495 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS)
496 scontext.verify_mode = ssl.VerifyMode.CERT_NONE
497 with urllib.request.urlopen(url=url, context=scontext) as f:
498 print(f.read().decode("utf-8"))
453 499
454 def process_genomes(self, genomes): 500 def process_genomes(self, genomes):
455 assembly = [] 501 assembly = []
456 assmeta = [] 502 assmeta = []
457 useuri = False 503 useuri = False
467 if genome_name not in genome_names: 513 if genome_name not in genome_names:
468 # pafs with shared references 514 # pafs with shared references
469 fapath = genome_node["path"] 515 fapath = genome_node["path"]
470 if not useuri: 516 if not useuri:
471 fapath = os.path.realpath(fapath) 517 fapath = os.path.realpath(fapath)
472 assem = self.make_assembly(fapath, genome_name, useuri) 518 assem, first_contig = self.make_assembly(fapath, genome_name, useuri)
473 assembly.append(assem) 519 assembly.append(assem)
520 self.ass_first_contigs.append(first_contig)
474 if len(genome_names) == 0: 521 if len(genome_names) == 0:
475 this_genome["genome_name"] = genome_name # first one for all tracks 522 this_genome["genome_name"] = genome_name # first one for all tracks
476 genome_names.append(genome_name) 523 genome_names.append(genome_name)
477 this_genome["genome_sequence_adapter"] = assem["sequence"][ 524 this_genome["genome_sequence_adapter"] = assem["sequence"][
478 "adapter" 525 "adapter"
489 ].strip() 536 ].strip()
490 else: 537 else:
491 this_genome["genome_firstcontig"] = fl 538 this_genome["genome_firstcontig"] = fl
492 else: 539 else:
493 try: 540 try:
494 fl = urllib.request.urlopen(fapath + ".fai").readline() 541 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS)
542 scontext.verify_mode = ssl.VerifyMode.CERT_NONE
543 with urllib.request.urlopen(
544 url=fapath + ".fai", context=scontext
545 ) as f:
546 fl = f.readline()
495 except Exception: 547 except Exception:
496 fl = None 548 fl = None
497 if fl: # is first row of the text fai so the first contig name 549 if fl: # is first row of the text fai so the first contig name
498 this_genome["genome_firstcontig"] = ( 550 this_genome["genome_firstcontig"] = (
499 fl.decode("utf8").strip().split()[0] 551 fl.decode("utf8").strip().split()[0]
504 self.tracksToAdd[genome_names[0]] = [] 556 self.tracksToAdd[genome_names[0]] = []
505 self.genome_names += genome_names 557 self.genome_names += genome_names
506 return this_genome["genome_name"] 558 return this_genome["genome_name"]
507 559
508 def make_assembly(self, fapath, gname, useuri): 560 def make_assembly(self, fapath, gname, useuri):
561 """added code to grab the first contig name and length for broken default session from Anthony and Helena's code
562 that poor Bjoern is trying to figure out.
563 """
509 if useuri: 564 if useuri:
510 faname = fapath 565 faname = fapath
511 adapter = { 566 adapter = {
512 "type": "BgzipFastaAdapter", 567 "type": "BgzipFastaAdapter",
513 "fastaLocation": {"uri": faname, "locationType": "UriLocation"}, 568 "fastaLocation": {"uri": faname, "locationType": "UriLocation"},
514 "faiLocation": {"uri": faname + ".fai", "locationType": "UriLocation"}, 569 "faiLocation": {"uri": faname + ".fai", "locationType": "UriLocation"},
515 "gziLocation": {"uri": faname + ".gzi", "locationType": "UriLocation"}, 570 "gziLocation": {"uri": faname + ".gzi", "locationType": "UriLocation"},
516 } 571 }
572 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS)
573 scontext.verify_mode = ssl.VerifyMode.CERT_NONE
574 with urllib.request.urlopen(url=faname + ".fai", context=scontext) as f:
575 fl = f.readline()
576 contig = fl.decode("utf8").strip()
577 # Merlin 172788 8 60 61
517 else: 578 else:
518 faname = gname + ".fa.gz" 579 faname = gname + ".fa.gz"
519 fadest = os.path.realpath(os.path.join(self.outdir, faname)) 580 fadest = os.path.realpath(os.path.join(self.outdir, faname))
520 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( 581 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % (
521 fapath, 582 fapath,
535 }, 596 },
536 "gziLocation": { 597 "gziLocation": {
537 "uri": faname + ".gzi", 598 "uri": faname + ".gzi",
538 }, 599 },
539 } 600 }
540 601 contig = open(fadest + ".fai", "r").readline().strip()
602 first_contig = contig.split()[:2]
603 first_contig.insert(0, gname)
541 trackDict = { 604 trackDict = {
542 "name": gname, 605 "name": gname,
543 "sequence": { 606 "sequence": {
544 "type": "ReferenceSequenceTrack", 607 "type": "ReferenceSequenceTrack",
545 "trackId": gname, 608 "trackId": gname,
554 "type": "LinearGCContentDisplay", 617 "type": "LinearGCContentDisplay",
555 "displayId": "%s-LinearGCContentDisplay" % gname, 618 "displayId": "%s-LinearGCContentDisplay" % gname,
556 }, 619 },
557 ], 620 ],
558 } 621 }
559 return trackDict 622 return (trackDict, first_contig)
560 623
561 def add_default_view(self): 624 def add_default_view(self):
562 cmd = [ 625 cmd = [
563 "jbrowse", 626 "jbrowse",
564 "set-default-session", 627 "set-default-session",
905 ["cp", os.path.realpath(cram_index), dest + ".crai"] 968 ["cp", os.path.realpath(cram_index), dest + ".crai"]
906 ) 969 )
907 else: 970 else:
908 cpath = os.path.realpath(dest) + ".crai" 971 cpath = os.path.realpath(dest) + ".crai"
909 cmd = ["samtools", "index", "-c", "-o", cpath, os.path.realpath(dest)] 972 cmd = ["samtools", "index", "-c", "-o", cpath, os.path.realpath(dest)]
910 logging.debug("executing cmd %s" % " ".join(cmd))
911 self.subprocess_check_call(cmd) 973 self.subprocess_check_call(cmd)
912 trackDict = { 974 trackDict = {
913 "type": "AlignmentsTrack", 975 "type": "AlignmentsTrack",
914 "trackId": tId, 976 "trackId": tId,
915 "name": trackData["name"], 977 "name": trackData["name"],
1134 "https://" 1196 "https://"
1135 ) 1197 )
1136 1198
1137 if gname not in self.genome_names: 1199 if gname not in self.genome_names:
1138 # ignore if already there - eg for duplicates among pafs. 1200 # ignore if already there - eg for duplicates among pafs.
1139 asstrack = self.make_assembly(pgpaths[i], gname, useuri) 1201 asstrack, first_contig = self.make_assembly(pgpaths[i], gname, useuri)
1140 self.genome_names.append(gname) 1202 self.genome_names.append(gname)
1141 self.tracksToAdd[gname] = [] 1203 self.tracksToAdd[gname] = []
1142 self.assemblies.append(asstrack) 1204 self.assemblies.append(asstrack)
1143 trackDict = { 1205 trackDict = {
1144 "type": "SyntenyTrack", 1206 "type": "SyntenyTrack",
1316 for gnome in self.genome_names: 1378 for gnome in self.genome_names:
1317 tracks_data = [] 1379 tracks_data = []
1318 for track_conf in self.tracksToAdd[gnome]: 1380 for track_conf in self.tracksToAdd[gnome]:
1319 tId = track_conf["trackId"] 1381 tId = track_conf["trackId"]
1320 track_types[tId] = track_conf["type"] 1382 track_types[tId] = track_conf["type"]
1321 style_data = default_data["style"].get(tId, None) 1383 style_data = default_data[gnome]["style"].get(tId, None)
1322 if not style_data: 1384 if not style_data:
1323 logging.debug( 1385 logging.debug(
1324 "### No style data in default data %s for %s" 1386 "### No style data in default data %s for %s"
1325 % (default_data, tId) 1387 % (default_data, tId)
1326 ) 1388 )
1391 self.config_json.update(config_json) 1453 self.config_json.update(config_json)
1392 logging.debug("defaultSession=%s" % (pp)) 1454 logging.debug("defaultSession=%s" % (pp))
1393 with open(self.config_json_file, "w") as config_file: 1455 with open(self.config_json_file, "w") as config_file:
1394 json.dump(self.config_json, config_file, indent=2) 1456 json.dump(self.config_json, config_file, indent=2)
1395 1457
1458 def add_defsess_to_index(self, data):
1459 """
1460 Broken in Anthony's PR because only ever dealt with the first assembly.
1461
1462 Add some default session settings: set some assemblies/tracks on/off
1463
1464 This allows to select a default view:
1465 - jb type (Linear, Circular, etc)
1466 - default location on an assembly
1467 - default tracks
1468 - ...
1469
1470 Different methods to do that were tested/discussed:
1471 - using a defaultSession item in config.json: this proved to be difficult:
1472 forced to write a full session block, including hard-coded/hard-to-guess items,
1473 no good way to let Jbrowse2 display a scaffold without knowing its size
1474 - using JBrowse2 as an embedded React component in a tool-generated html file:
1475 it works but it requires generating js code to actually do what we want = chosing default view, assembly, tracks, ...
1476 - writing a session-spec inside the config.json file: this is not yet supported as of 2.10.2 (see PR 4148 below)
1477 a session-spec is a kind of simplified defaultSession where you don't need to specify every aspect of the session
1478 - passing a session-spec through URL params by embedding the JBrowse2 index.html inside an iframe
1479 we selected this option
1480
1481 Xrefs to understand the choices:
1482 https://github.com/GMOD/jbrowse-components/issues/2708
1483 https://github.com/GMOD/jbrowse-components/discussions/3568
1484 https://github.com/GMOD/jbrowse-components/pull/4148
1485 """
1486 new_index = "Nothing written"
1487 session_spec = {"views": []}
1488 logging.debug("def data=%s" % data)
1489 for first_contig in self.ass_first_contigs:
1490 gnome, refName, end = first_contig
1491 start = 0
1492 if False or data.get("defaultLocation", ""):
1493 loc_match = re.search(
1494 r"^([^:]+):([\d,]*)\.*([\d,]*)$", data["defaultLocation"]
1495 )
1496 # loc_match = re.search(r"^(\w+):(\d+)\.+(\d+)$", data["defaultLocation"])
1497 if loc_match:
1498 refName = loc_match.group(1)
1499 start = int(loc_match.group(2))
1500 end = int(loc_match.group(3))
1501 else:
1502 if refName:
1503 view = {
1504 "assembly": gnome,
1505 "loc": "{}:{}-{}".format(refName, start, end),
1506 "type": "LinearGenomeView",
1507 "tracks": data[gnome]["tracks"],
1508 }
1509 session_spec["views"].append(view)
1510 sess = json.dumps(session_spec, sort_keys=True, indent=2)
1511 new_index = INDEX_TEMPLATE.replace(
1512 "__SESSION_SPEC__", "&session=spec-{}".format(sess)
1513 )
1514
1515 os.rename(
1516 os.path.join(self.outdir, "index.html"),
1517 os.path.join(self.outdir, "index_noview.html"),
1518 )
1519
1520 with open(os.path.join(self.outdir, "index.html"), "w") as nind:
1521 nind.write(new_index)
1522 logging.debug(
1523 "#### add_defsession gnome=%s refname=%s\nsession_spec=%s\nnew_index=%s"
1524 % (gnome, refName, sess, new_index)
1525 )
1526
1396 def add_general_configuration(self, data): 1527 def add_general_configuration(self, data):
1397 """ 1528 """
1398 Add some general configuration to the config.json file 1529 Add some general configuration to the config.json file
1399 """ 1530 """
1400 1531
1424 config_json["configuration"].update(config_data) 1555 config_json["configuration"].update(config_data)
1425 self.config_json.update(config_json) 1556 self.config_json.update(config_json)
1426 with open(config_path, "w") as config_file: 1557 with open(config_path, "w") as config_file:
1427 json.dump(self.config_json, config_file, indent=2) 1558 json.dump(self.config_json, config_file, indent=2)
1428 1559
1429 def clone_jbrowse(self, realclone=True): 1560 def clone_jbrowse(self, realclone=False):
1430 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now 1561 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now
1431 Leave as True between version updates on temporary tools - requires manual conda trigger :( 1562 Leave as True between version updates on temporary tools - requires manual conda trigger :(
1432 """ 1563 """
1433 dest = self.outdir 1564 dest = self.outdir
1434 if realclone: 1565 if realclone:
1478 # be GET and not POST so it should redirect OK 1609 # be GET and not POST so it should redirect OK
1479 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL 1610 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL
1480 1611
1481 jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path) 1612 jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path)
1482 1613
1483 default_session_data = { 1614 default_session_data = {}
1484 "visibility": {
1485 "default_on": [],
1486 "default_off": [],
1487 },
1488 "style": {},
1489 "style_labels": {},
1490 }
1491 1615
1492 for ass in root.findall("assembly"): 1616 for ass in root.findall("assembly"):
1493 genomes = [ 1617 genomes = [
1494 { 1618 {
1495 "path": x.attrib["path"], 1619 "path": x.attrib["path"],
1498 "meta": metadata_from_node(x.find("metadata")), 1622 "meta": metadata_from_node(x.find("metadata")),
1499 } 1623 }
1500 for x in ass.findall("metadata/genomes/genome") 1624 for x in ass.findall("metadata/genomes/genome")
1501 ] 1625 ]
1502 assref_name = jc.process_genomes(genomes) 1626 assref_name = jc.process_genomes(genomes)
1503 1627 if not default_session_data.get(assref_name, None):
1628 default_session_data[assref_name] = {
1629 "tracks": [],
1630 "style": {},
1631 "style_labels": {},
1632 "visibility": {
1633 "default_on": [],
1634 "default_off": [],
1635 },
1636 }
1504 for track in ass.find("tracks"): 1637 for track in ass.find("tracks"):
1505 track_conf = {} 1638 track_conf = {}
1506 track_conf["trackfiles"] = [] 1639 track_conf["trackfiles"] = []
1507 track_conf["assemblyNames"] = assref_name 1640 track_conf["assemblyNames"] = assref_name
1508 is_multi_bigwig = False 1641 is_multi_bigwig = False
1575 if keys: 1708 if keys:
1576 for key in keys: 1709 for key in keys:
1577 vis = track.attrib.get("visibility", "default_off") 1710 vis = track.attrib.get("visibility", "default_off")
1578 if not vis: 1711 if not vis:
1579 vis = "default_off" 1712 vis = "default_off"
1580 default_session_data["visibility"][vis].append(key) 1713 default_session_data[assref_name]["visibility"][vis].append(key)
1581 if track.find("options/style"): 1714 if track.find("options/style"):
1582 default_session_data["style"][key] = { 1715 default_session_data[assref_name]["style"][key] = {
1583 item.tag: parse_style_conf(item) 1716 item.tag: parse_style_conf(item)
1584 for item in track.find("options/style") 1717 for item in track.find("options/style")
1585 } 1718 }
1586 else: 1719 else:
1587 default_session_data["style"][key] = {} 1720 default_session_data[assref_name]["style"][key] = {}
1588 logging.debug("@@@@ no options/style found for %s" % (key)) 1721 logging.debug("no options/style found for %s" % (key))
1589 1722
1590 if track.find("options/style_labels"): 1723 if track.find("options/style_labels"):
1591 default_session_data["style_labels"][key] = { 1724 default_session_data[assref_name]["style_labels"][key] = {
1592 item.tag: parse_style_conf(item) 1725 item.tag: parse_style_conf(item)
1593 for item in track.find("options/style_labels") 1726 for item in track.find("options/style_labels")
1594 } 1727 }
1728 default_session_data[assref_name]["tracks"].append(key)
1595 default_session_data["defaultLocation"] = root.find( 1729 default_session_data["defaultLocation"] = root.find(
1596 "metadata/general/defaultLocation" 1730 "metadata/general/defaultLocation"
1597 ).text 1731 ).text
1598 default_session_data["session_name"] = root.find( 1732 default_session_data["session_name"] = root.find(
1599 "metadata/general/session_name" 1733 "metadata/general/session_name"
1614 trackconf += jc.tracksToAdd[gnome] 1748 trackconf += jc.tracksToAdd[gnome]
1615 jc.config_json["tracks"] = trackconf 1749 jc.config_json["tracks"] = trackconf
1616 assconf = jc.config_json.get("assemblies", []) 1750 assconf = jc.config_json.get("assemblies", [])
1617 assconf += jc.assemblies 1751 assconf += jc.assemblies
1618 jc.config_json["assemblies"] = assconf 1752 jc.config_json["assemblies"] = assconf
1619 logging.debug("&&&assemblies=%s, gnames=%s" % (assconf, jc.genome_names)) 1753 logging.debug("assemblies=%s, gnames=%s" % (assconf, jc.genome_names))
1620 jc.write_config() 1754 jc.write_config()
1621 jc.add_default_session(default_session_data) 1755 jc.add_default_session(default_session_data)
1756 jc.add_defsess_to_index(default_session_data)
1622 # jc.text_index() not sure what broke here. 1757 # jc.text_index() not sure what broke here.