comparison jbrowse2.py @ 60:81d535970196 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 403a35e1245fa5e62f4be6116a725b9e4d9c353a
author fubar
date Mon, 25 Mar 2024 02:10:05 +0000
parents f807e219cec3
children e7a6f7a7148d
comparison
equal deleted inserted replaced
59:f807e219cec3 60:81d535970196
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # change to accumulating all configuration for config.json based on the default from the clone 2
3 import argparse 3 import argparse
4 import binascii 4 import binascii
5 import datetime 5 import datetime
6 import json 6 import json
7 import logging 7 import logging
21 JB2VER = "v2.10.3" 21 JB2VER = "v2.10.3"
22 # version pinned for cloning 22 # version pinned for cloning
23 23
24 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") 24 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
25 GALAXY_INFRASTRUCTURE_URL = None 25 GALAXY_INFRASTRUCTURE_URL = None
26
27 # version pinned for cloning
28 26
29 mapped_chars = { 27 mapped_chars = {
30 ">": "__gt__", 28 ">": "__gt__",
31 "<": "__lt__", 29 "<": "__lt__",
32 "'": "__sq__", 30 "'": "__sq__",
456 genome_name = genome_node["meta"]["dataset_dname"].strip() 454 genome_name = genome_node["meta"]["dataset_dname"].strip()
457 if len(genome_name.split()) > 1: 455 if len(genome_name.split()) > 1:
458 genome_name = genome_name.split()[0] 456 genome_name = genome_name.split()[0]
459 # spaces and cruft break scripts when substituted 457 # spaces and cruft break scripts when substituted
460 if genome_name not in self.genome_names: 458 if genome_name not in self.genome_names:
461 # ignore dupes - can have multiple pafs with same references? 459 # pafs with shared references
462 fapath = genome_node["path"] 460 fapath = genome_node["path"]
463 if not useuri: 461 if not useuri:
464 fapath = os.path.realpath(fapath) 462 fapath = os.path.realpath(fapath)
465 assem = self.make_assembly(fapath, genome_name, useuri) 463 assem = self.make_assembly(fapath, genome_name, useuri)
466 assemblies.append(assem) 464 assemblies.append(assem)
467 self.genome_names.append(genome_name) 465 self.genome_names.append(genome_name)
468 if self.genome_name is None: 466 if self.genome_name is None:
469 self.genome_name = ( 467 self.genome_name = (
470 genome_name # first one for all tracks - other than paf 468 genome_name # first one for all tracks
471 ) 469 )
472 self.genome_sequence_adapter = assem["sequence"]["adapter"] 470 self.genome_sequence_adapter = assem["sequence"]["adapter"]
473 self.genome_firstcontig = None 471 self.genome_firstcontig = None
474 if not useuri: 472 if not useuri:
475 fl = open(fapath, "r").readline() 473 fl = open(fapath, "r").readline()
479 if len(fl.split()) > 1: 477 if len(fl.split()) > 1:
480 self.genome_firstcontig = fl.split()[0].strip() 478 self.genome_firstcontig = fl.split()[0].strip()
481 else: 479 else:
482 self.genome_firstcontig = fl 480 self.genome_firstcontig = fl
483 else: 481 else:
484 fl = urllib.request.urlopen(fapath + ".fai").readline() 482 try:
483 fl = urllib.request.urlopen(fapath + ".fai").readline()
484 except:
485 fl = None
485 if fl: # is first row of the text fai so the first contig name 486 if fl: # is first row of the text fai so the first contig name
486 self.genome_firstcontig = ( 487 self.genome_firstcontig = (
487 fl.decode("utf8").strip().split()[0] 488 fl.decode("utf8").strip().split()[0]
488 ) 489 )
490 else:
491 self.genome_firstcontig = None
489 if self.config_json.get("assemblies", None): 492 if self.config_json.get("assemblies", None):
490 self.config_json["assemblies"] += assemblies 493 self.config_json["assemblies"] += assemblies
491 else: 494 else:
492 self.config_json["assemblies"] = assemblies 495 self.config_json["assemblies"] = assemblies
493 496
604 faiLocation: 607 faiLocation:
605 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai', 608 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai',
606 gziLocation: 609 gziLocation:
607 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi', 610 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi',
608 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438 611 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438
612
613
609 """ 614 """
610 tId = trackData["label"] 615 tId = trackData["label"]
611 # can be served - if public. 616 # can be served - if public.
612 # dsId = trackData["metadata"]["dataset_id"] 617 # dsId = trackData["metadata"]["dataset_id"]
613 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) 618 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId)
614 useuri = trackData["useuri"].lower() == "yes" 619 useuri = trackData["useuri"].lower() == "yes"
615 if useuri: 620 if useuri:
616 uri = data 621 uri = data
617 else: 622 else:
618 uri = trackData["hic_url"] 623 uri = "%s.hic" % trackData["label"]
624 # slashes in names cause path trouble
625 dest = os.path.join(self.outdir, uri)
626 cmd = ["cp", data, dest]
627 self.subprocess_check_call(cmd)
619 categ = trackData["category"] 628 categ = trackData["category"]
620 trackDict = { 629 trackDict = {
621 "type": "HicTrack", 630 "type": "HicTrack",
622 "trackId": tId, 631 "trackId": tId,
623 "name": uri, 632 "name": trackData["name"],
624 "assemblyNames": [self.genome_name], 633 "assemblyNames": [self.genome_name],
625 "category": [ 634 "category": [
626 categ, 635 categ,
627 ], 636 ],
628 "adapter": { 637 "adapter": {
629 "type": "HicAdapter", 638 "type": "HicAdapter",
630 "hicLocation": uri, 639 "hicLocation": { "uri": uri }
631 }, 640 }
632 "displays": [ 641 }
633 {
634 "type": "LinearHicDisplay",
635 "displayId": "%s-LinearHicDisplay" % tId,
636 },
637 ],
638 }
639 style_json = self._prepare_track_style(trackDict)
640 trackDict["style"] = style_json
641 self.tracksToAdd.append(trackDict) 642 self.tracksToAdd.append(trackDict)
642 self.trackIdlist.append(tId) 643 self.trackIdlist.append(tId)
643 644
644 def add_maf(self, data, trackData): 645 def add_maf(self, data, trackData):
645 """ 646 """
791 self.tracksToAdd.append(trackDict) 792 self.tracksToAdd.append(trackDict)
792 self.trackIdlist.append(tId) 793 self.trackIdlist.append(tId)
793 os.unlink(gff3) 794 os.unlink(gff3)
794 795
795 def add_bigwig(self, data, trackData): 796 def add_bigwig(self, data, trackData):
796 """ "type": "LinearWiggleDisplay",
797 "configuration": {},
798 "selectedRendering": "",
799 "resolution": 1,
800 "posColor": "rgb(228, 26, 28)",
801 "negColor": "rgb(255, 255, 51)",
802 "constraints": {}
803 """
804 useuri = trackData["useuri"].lower() == "yes" 797 useuri = trackData["useuri"].lower() == "yes"
805 if useuri: 798 if useuri:
806 url = data 799 url = data
807 else: 800 else:
808 url = "%s.bigwig" % trackData["label"] 801 url = "%s.bigwig" % trackData["label"]
1276 # Return non-human label for use in other fields 1269 # Return non-human label for use in other fields
1277 yield outputTrackConfig["label"] 1270 yield outputTrackConfig["label"]
1278 1271
1279 def add_default_session(self, default_data): 1272 def add_default_session(self, default_data):
1280 """ 1273 """
1281 Add some default session settings: set some assemblies/tracks on/off 1274 default session settings are hard and fragile.
1275 .add_default_view() and other configuration code adapted from
1276 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py
1282 """ 1277 """
1283 tracks_data = [] 1278 tracks_data = []
1284
1285 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 1279 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708
1286
1287 # We need to know the track type from the config.json generated just before
1288 track_types = {} 1280 track_types = {}
1289 with open(self.config_json_file, "r") as config_file: 1281 with open(self.config_json_file, "r") as config_file:
1290 config_json = json.load(config_file) 1282 config_json = json.load(config_file)
1291 if self.config_json: 1283 if self.config_json:
1292 config_json.update(self.config_json) 1284 config_json.update(self.config_json)
1293
1294 for track_conf in self.tracksToAdd: 1285 for track_conf in self.tracksToAdd:
1295 track_types[track_conf["trackId"]] = track_conf["type"]
1296 tId = track_conf["trackId"] 1286 tId = track_conf["trackId"]
1297 #if tId in data["visibility"]["default_on"]: 1287 track_types[tId] = track_conf["type"]
1298 style_data = default_data["style"].get(tId, None) 1288 style_data = default_data["style"].get(tId, None)
1299 if not style_data: 1289 if not style_data:
1300 logging.warn("### No style data in default data for %s" % tId) 1290 logging.warn("### No style data in default data %s for %s" % (default_data, tId))
1301 style_data = {"type": "LinearBasicDisplay"} 1291 style_data = {"type": "LinearBasicDisplay"}
1302 if "displays" in track_conf: 1292 if "displays" in track_conf:
1303 disp = track_conf["displays"][0]["type"] 1293 disp = track_conf["displays"][0]["type"]
1304 style_data["type"] = disp 1294 style_data["type"] = disp
1305 if track_conf.get("style_labels", None): 1295 if track_conf.get("style_labels", None):
1313 "displays": [style_data], 1303 "displays": [style_data],
1314 } 1304 }
1315 ) 1305 )
1316 # The view for the assembly we're adding 1306 # The view for the assembly we're adding
1317 view_json = {"type": "LinearGenomeView", "tracks": tracks_data} 1307 view_json = {"type": "LinearGenomeView", "tracks": tracks_data}
1318 1308 logging.warn("### view_json=%s" % view_json)
1319 refName = None 1309 refName = None
1320 drdict = { 1310 drdict = {
1321 "reversed": False, 1311 "reversed": False,
1322 "assemblyName": self.genome_name, 1312 "assemblyName": self.genome_name,
1323 "start": 1, 1313 "start": 1,
1406 config_json["configuration"].update(config_data) 1396 config_json["configuration"].update(config_data)
1407 self.config_json.update(config_json) 1397 self.config_json.update(config_json)
1408 with open(config_path, "w") as config_file: 1398 with open(config_path, "w") as config_file:
1409 json.dump(self.config_json, config_file, indent=2) 1399 json.dump(self.config_json, config_file, indent=2)
1410 1400
1411 def clone_jbrowse(self, realclone=True): 1401 def clone_jbrowse(self, realclone=False):
1412 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now""" 1402 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now
1403 Leave as True between version updates on temporary tools - requires manual conda trigger :(
1404 """
1413 dest = self.outdir 1405 dest = self.outdir
1414 if realclone: 1406 if realclone:
1415 self.subprocess_check_call( 1407 self.subprocess_check_call(
1416 ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"] 1408 ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"]
1417 ) 1409 )
1470 for x in root.findall("metadata/genomes/genome") 1462 for x in root.findall("metadata/genomes/genome")
1471 ], 1463 ],
1472 ) 1464 )
1473 jc.process_genomes() 1465 jc.process_genomes()
1474 1466
1475 # .add_default_view() replace from https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py
1476 default_session_data = { 1467 default_session_data = {
1477 "visibility": { 1468 "visibility": {
1478 "default_on": [], 1469 "default_on": [],
1479 "default_off": [], 1470 "default_off": [],
1480 }, 1471 },
1498 1489
1499 trackfiles = track.findall("files/trackFile") 1490 trackfiles = track.findall("files/trackFile")
1500 if trackfiles: 1491 if trackfiles:
1501 for x in track.findall("files/trackFile"): 1492 for x in track.findall("files/trackFile"):
1502 track_conf["label"] = x.attrib["label"] 1493 track_conf["label"] = x.attrib["label"]
1494 trackkey = track_conf["label"]
1503 track_conf["useuri"] = x.attrib["useuri"] 1495 track_conf["useuri"] = x.attrib["useuri"]
1504 if is_multi_bigwig: 1496 if is_multi_bigwig:
1505 multi_bigwig_paths.append( 1497 multi_bigwig_paths.append(
1506 ( 1498 (
1507 x.attrib["label"], 1499 x.attrib["label"],
1542 {}, # No metadata for multiple bigwig 1534 {}, # No metadata for multiple bigwig
1543 ) 1535 )
1544 ) 1536 )
1545 track_conf["category"] = track.attrib["cat"] 1537 track_conf["category"] = track.attrib["cat"]
1546 track_conf["format"] = track.attrib["format"] 1538 track_conf["format"] = track.attrib["format"]
1547 if track.find("options/style"):
1548 track_conf["style"] = {
1549 item.tag: parse_style_conf(item) for item in track.find("options/style")
1550 }
1551 else:
1552 track_conf["style"] = {}
1553 if track.find("options/style_labels"):
1554 track_conf["style_labels"] = {
1555 item.tag: parse_style_conf(item)
1556 for item in track.find("options/style_labels")
1557 }
1558
1559 track_conf["conf"] = etree_to_dict(track.find("options")) 1539 track_conf["conf"] = etree_to_dict(track.find("options"))
1560 track_conf["category"] = track.attrib["cat"] 1540 track_conf["category"] = track.attrib["cat"]
1561 track_conf["format"] = track.attrib["format"] 1541 track_conf["format"] = track.attrib["format"]
1562 keys = jc.process_annotations(track_conf) 1542 keys = jc.process_annotations(track_conf)
1563 1543
1564 if keys: 1544 if keys:
1565 for key in keys: 1545 for key in keys:
1566 default_session_data["visibility"][ 1546 default_session_data["visibility"][
1567 track.attrib.get("visibility", "default_off") 1547 track.attrib.get("visibility", "default_off")
1568 ].append(key) 1548 ].append(key)
1569 if track_conf.get("style", None): 1549 if track.find("options/style"):
1570 default_session_data["style"][key] = track_conf["style"] 1550 default_session_data["style"][key] = {
1571 if track_conf.get("style_lables", None): 1551 item.tag: parse_style_conf(item) for item in track.find("options/style")
1572 default_session_data["style_labels"][key] = track_conf.get( 1552 }
1573 "style_labels", None 1553 logging.warn("### added %s to defsess %s for %s" % (trackkey, default_session_data, key ))
1574 ) 1554 else:
1555 default_session_data["style"][key] = {}
1556 logging.warn("@@@@ no options/style found for %s" % (key))
1557
1558 if track.find("options/style_labels"):
1559 default_session_data["style_labels"][key] = {
1560 item.tag: parse_style_conf(item)
1561 for item in track.find("options/style_labels")
1562 }
1575 default_session_data["defaultLocation"] = root.find( 1563 default_session_data["defaultLocation"] = root.find(
1576 "metadata/general/defaultLocation" 1564 "metadata/general/defaultLocation"
1577 ).text 1565 ).text
1578 default_session_data["session_name"] = root.find( 1566 default_session_data["session_name"] = root.find(
1579 "metadata/general/session_name" 1567 "metadata/general/session_name"