comparison jbrowse2.py @ 7:b04fd993b31e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 53a108d8153c955044ae7eb8cb06bdcfd0036717
author fubar
date Wed, 17 Jan 2024 07:50:52 +0000
parents 79f7265f90bd
children a26c41e304c3
comparison
equal deleted inserted replaced
6:79f7265f90bd 7:b04fd993b31e
15 import xml.etree.ElementTree as ET 15 import xml.etree.ElementTree as ET
16 from collections import defaultdict 16 from collections import defaultdict
17 17
18 logging.basicConfig(level=logging.INFO) 18 logging.basicConfig(level=logging.INFO)
19 log = logging.getLogger("jbrowse") 19 log = logging.getLogger("jbrowse")
20
21 JB2VER = "v2.10.0"
22 # version pinned for cloning
23
20 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") 24 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
21 GALAXY_INFRASTRUCTURE_URL = None 25 GALAXY_INFRASTRUCTURE_URL = None
22 JB2REL = "v2.10.0" 26 JB2REL = "v2.10.0"
23 # version pinned for cloning 27 # version pinned for cloning
24 28
369 return metadata 373 return metadata
370 374
371 375
372 class JbrowseConnector(object): 376 class JbrowseConnector(object):
373 def __init__(self, outdir, genomes): 377 def __init__(self, outdir, genomes):
374 self.debug = False
375 self.usejson = True 378 self.usejson = True
376 self.giURL = GALAXY_INFRASTRUCTURE_URL 379 self.giURL = GALAXY_INFRASTRUCTURE_URL
377 self.outdir = outdir 380 self.outdir = outdir
378 os.makedirs(self.outdir, exist_ok=True) 381 os.makedirs(self.outdir, exist_ok=True)
379 self.genome_paths = genomes 382 self.genome_paths = genomes
385 self.config_json_file = os.path.join(outdir, "config.json") 388 self.config_json_file = os.path.join(outdir, "config.json")
386 self.clone_jbrowse() 389 self.clone_jbrowse()
387 390
388 def subprocess_check_call(self, command, output=None): 391 def subprocess_check_call(self, command, output=None):
389 if output: 392 if output:
390 if self.debug: 393 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output)
391 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output)
392 subprocess.check_call(command, cwd=self.outdir, stdout=output) 394 subprocess.check_call(command, cwd=self.outdir, stdout=output)
393 else: 395 else:
394 log.debug("cd %s && %s", self.outdir, " ".join(command)) 396 log.debug("cd %s && %s", self.outdir, " ".join(command))
395 subprocess.check_call(command, cwd=self.outdir) 397 subprocess.check_call(command, cwd=self.outdir)
396 398
397 def subprocess_popen(self, command): 399 def subprocess_popen(self, command):
398 if self.debug: 400 log.debug(command)
399 log.debug(command)
400 p = subprocess.Popen( 401 p = subprocess.Popen(
401 command, 402 command,
402 cwd=self.outdir, 403 cwd=self.outdir,
403 shell=True, 404 shell=True,
404 stdin=subprocess.PIPE, 405 stdin=subprocess.PIPE,
412 log.error(output) 413 log.error(output)
413 log.error(err) 414 log.error(err)
414 raise RuntimeError("Command failed with exit code %s" % (retcode)) 415 raise RuntimeError("Command failed with exit code %s" % (retcode))
415 416
416 def subprocess_check_output(self, command): 417 def subprocess_check_output(self, command):
417 if self.debug: 418 log.debug(" ".join(command))
418 log.debug(" ".join(command))
419 return subprocess.check_output(command, cwd=self.outdir) 419 return subprocess.check_output(command, cwd=self.outdir)
420 420
421 def symlink_or_copy(self, src, dest): 421 def symlink_or_copy(self, src, dest):
422 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( 422 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool(
423 os.environ["GALAXY_JBROWSE_SYMLINKS"] 423 os.environ["GALAXY_JBROWSE_SYMLINKS"]
463 self.config_json["assemblies"] += assemblies 463 self.config_json["assemblies"] += assemblies
464 else: 464 else:
465 self.config_json["assemblies"] = assemblies 465 self.config_json["assemblies"] = assemblies
466 466
467 def make_assembly(self, fapath, gname): 467 def make_assembly(self, fapath, gname):
468 faname = gname + ".fa.gz" 468 hashData = [
469 fapath,
470 gname,
471 ]
472 hashData = "|".join(hashData).encode("utf-8")
473 ghash = hashlib.md5(hashData).hexdigest()
474 faname = ghash + ".fa.gz"
469 fadest = os.path.join(self.outdir, faname) 475 fadest = os.path.join(self.outdir, faname)
470 # fadest = os.path.realpath(os.path.join(self.outdir, faname))
471 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( 476 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % (
472 fapath, 477 fapath,
473 fadest, 478 fadest,
474 fadest, 479 fadest,
475 fadest, 480 fadest,
554 """ 559 """
555 tId = trackData["label"] 560 tId = trackData["label"]
556 # can be served - if public. 561 # can be served - if public.
557 # dsId = trackData["metadata"]["dataset_id"] 562 # dsId = trackData["metadata"]["dataset_id"]
558 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) 563 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId)
559 hname = trackData["name"] 564 hname = trackData["label"]
560 dest = os.path.join(self.outdir, hname) 565 dest = os.path.join(self.outdir, hname)
561 cmd = ["cp", data, dest] 566 cmd = ["cp", data, dest]
562 # these can be very big. 567 # these can be very big.
563 self.subprocess_check_call(cmd) 568 self.subprocess_check_call(cmd)
564 floc = { 569 floc = {
601 ] 606 ]
602 } 607 }
603 tId = trackData["label"] 608 tId = trackData["label"]
604 fname = "%s.bed" % tId 609 fname = "%s.bed" % tId
605 dest = "%s/%s" % (self.outdir, fname) 610 dest = "%s/%s" % (self.outdir, fname)
606 # self.symlink_or_copy(data, dest)
607 # Process MAF to bed-like. Need build to munge chromosomes
608 gname = self.genome_name 611 gname = self.genome_name
609 cmd = [ 612 cmd = [
610 "bash", 613 "bash",
611 os.path.join(INSTALLED_TO, "convertMAF.sh"), 614 os.path.join(INSTALLED_TO, "convertMAF.sh"),
612 data, 615 data,
720 } 723 }
721 style_json = self._prepare_track_style(trackDict) 724 style_json = self._prepare_track_style(trackDict)
722 trackDict["style"] = style_json 725 trackDict["style"] = style_json
723 self.tracksToAdd.append(trackDict) 726 self.tracksToAdd.append(trackDict)
724 self.trackIdlist.append(tId) 727 self.trackIdlist.append(tId)
725
726 os.unlink(gff3) 728 os.unlink(gff3)
727 729
728 def add_bigwig(self, data, trackData): 730 def add_bigwig(self, data, trackData):
729 url = "%s.bw" % trackData["name"] 731 url = "%s.bigwig" % trackData["label"]
732 # slashes in names cause path trouble
730 dest = os.path.join(self.outdir, url) 733 dest = os.path.join(self.outdir, url)
731 cmd = ["cp", data, dest] 734 cmd = ["cp", data, dest]
732 self.subprocess_check_call(cmd) 735 self.subprocess_check_call(cmd)
733 bwloc = {"uri": url} 736 bwloc = {"uri": url}
734 tId = trackData["label"] 737 tId = trackData["label"]
735 trackDict = { 738 trackDict = {
736 "type": "QuantitativeTrack", 739 "type": "QuantitativeTrack",
737 "trackId": tId, 740 "trackId": tId,
738 "name": url, 741 "name": trackData["name"],
739 "assemblyNames": [ 742 "assemblyNames": [
740 self.genome_name, 743 self.genome_name,
741 ], 744 ],
742 "adapter": { 745 "adapter": {
743 "type": "BigWigAdapter", 746 "type": "BigWigAdapter",
752 } 755 }
753 style_json = self._prepare_track_style(trackDict) 756 style_json = self._prepare_track_style(trackDict)
754 trackDict["style"] = style_json 757 trackDict["style"] = style_json
755 self.tracksToAdd.append(trackDict) 758 self.tracksToAdd.append(trackDict)
756 self.trackIdlist.append(tId) 759 self.trackIdlist.append(tId)
760 logging.debug("#### wig trackData=%s" % str(trackData))
757 761
758 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): 762 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
759 tId = trackData["label"] 763 tId = trackData["label"]
760 fname = "%s.bam" % trackData["label"] 764 fname = "%s.bam" % trackData["label"]
761 dest = "%s/%s" % (self.outdir, fname) 765 dest = "%s/%s" % (self.outdir, fname)
957 else: 961 else:
958 self.config_json["assemblies"] = [ 962 self.config_json["assemblies"] = [
959 asstrack, 963 asstrack,
960 ] 964 ]
961 965
962 style_json = self._prepare_track_style(trackData)
963 url = "%s.paf" % (trackData["label"]) 966 url = "%s.paf" % (trackData["label"])
964 dest = "%s/%s" % (self.outdir, url) 967 dest = "%s/%s" % (self.outdir, url)
965 self.symlink_or_copy(os.path.realpath(data), dest) 968 self.symlink_or_copy(os.path.realpath(data), dest)
966 969 trackDict = {
967 if self.usejson: 970 "type": "SyntenyTrack",
968 trackDict = { 971 "trackId": tId,
969 "type": "SyntenyTrack", 972 "assemblyNames": [self.genome_name, pgname],
970 "trackId": tId, 973 "name": tname,
974 "adapter": {
975 "type": "PAFAdapter",
976 "pafLocation": {"uri": url},
971 "assemblyNames": [self.genome_name, pgname], 977 "assemblyNames": [self.genome_name, pgname],
972 "name": tname, 978 },
973 "adapter": { 979 }
974 "type": "PAFAdapter", 980 style_json = self._prepare_track_style(trackDict)
975 "pafLocation": {"uri": url}, 981 trackDict["style"] = style_json
976 "assemblyNames": [self.genome_name, pgname], 982 self.tracksToAdd.append(trackDict)
977 }, 983 self.trackIdlist.append(tId)
978 "config": style_json,
979 }
980 self.tracksToAdd.append(trackDict)
981 self.trackIdlist.append(tId)
982 else:
983 self._add_track(
984 trackData["label"],
985 trackData["key"],
986 trackData["category"],
987 dest,
988 assemblies=[self.genome_name, pgname],
989 config=style_json,
990 )
991 984
992 def add_hicab(self, data, trackData, hicOpts, **kwargs): 985 def add_hicab(self, data, trackData, hicOpts, **kwargs):
993 rel_dest = os.path.join("data", trackData["label"] + ".hic") 986 rel_dest = os.path.join("data", trackData["label"] + ".hic")
994 dest = os.path.join(self.outdir, rel_dest) 987 dest = os.path.join(self.outdir, rel_dest)
995 988
996 self.symlink_or_copy(os.path.realpath(data), dest) 989 self.symlink_or_copy(os.path.realpath(data), dest)
997
998 style_json = self._prepare_track_style(trackData)
999 990
1000 self._add_track( 991 self._add_track(
1001 trackData["label"], 992 trackData["label"],
1002 trackData["key"], 993 trackData["key"],
1003 trackData["category"], 994 trackData["category"],
1004 rel_dest, 995 rel_dest,
1005 config=style_json, 996 config={},
1006 ) 997 )
1007 998
1008 def add_sparql(self, url, query, query_refnames, trackData): 999 def add_sparql(self, url, query, query_refnames, trackData):
1009 1000
1010 json_track_data = { 1001 json_track_data = {
1059 "category": category, 1050 "category": category,
1060 "style": {}, 1051 "style": {},
1061 } 1052 }
1062 1053
1063 outputTrackConfig["key"] = track_human_label 1054 outputTrackConfig["key"] = track_human_label
1064 if self.debug: 1055
1065 log.info(
1066 "Processing category = %s, track_human_label = %s",
1067 category,
1068 track_human_label,
1069 )
1070 # We add extra data to hash for the case of REST + SPARQL. 1056 # We add extra data to hash for the case of REST + SPARQL.
1071 if ( 1057 if (
1072 "conf" in track 1058 "conf" in track
1073 and "options" in track["conf"] 1059 and "options" in track["conf"]
1074 and "url" in track["conf"]["options"] 1060 and "url" in track["conf"]["options"]
1075 ): 1061 ):
1076 rest_url = track["conf"]["options"]["url"] 1062 rest_url = track["conf"]["options"]["url"]
1077 else: 1063 else:
1078 rest_url = "" 1064 rest_url = ""
1079 1065 outputTrackConfig["trackset"] = track.get("trackset", {})
1080 # I chose to use track['category'] instead of 'category' here. This 1066 # I chose to use track['category'] instead of 'category' here. This
1081 # is intentional. This way re-running the tool on a different date 1067 # is intentional. This way re-running the tool on a different date
1082 # will not generate different hashes and make comparison of outputs 1068 # will not generate different hashes and make comparison of outputs
1083 # much simpler. 1069 # much simpler.
1084 hashData = [ 1070 hashData = [
1163 with open(self.config_json_file, "r") as config_file: 1149 with open(self.config_json_file, "r") as config_file:
1164 config_json = json.load(config_file) 1150 config_json = json.load(config_file)
1165 1151
1166 for track_conf in self.tracksToAdd: 1152 for track_conf in self.tracksToAdd:
1167 track_types[track_conf["trackId"]] = track_conf["type"] 1153 track_types[track_conf["trackId"]] = track_conf["type"]
1168 1154 tId = track_conf["trackId"]
1169 for on_track in data["visibility"]["default_on"]: 1155 if tId in data["visibility"]["default_on"]:
1170 style_data = {"type": "LinearBasicDisplay", "height": 100} 1156 style_data = {"type": "LinearBasicDisplay"}
1171 if on_track in data["style"]: 1157 if "displays" in track_conf:
1172 if "display" in data["style"][on_track]: 1158 style_data["type"] = track_conf["displays"][0]["type"]
1173 style_data["type"] = data["style"][on_track]["display"] 1159 if track_conf.get("style_labels", None):
1174 del data["style"][on_track]["display"] 1160 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
1175 style_data.update(data["style"][on_track]) 1161 # TODO move this to per track displays?
1176 if on_track in data["style_labels"]: 1162 style_data["labels"] = track_conf["style_labels"]
1177 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work 1163 tracks_data.append(
1178 # TODO move this to per track displays? 1164 {
1179 style_data["labels"] = data["style_labels"][on_track] 1165 "type": track_types[tId],
1180 1166 "configuration": tId,
1181 tracks_data.append( 1167 "displays": [style_data],
1182 { 1168 }
1183 "type": track_types[on_track], 1169 )
1184 "configuration": on_track,
1185 "displays": [style_data],
1186 }
1187 )
1188 1170
1189 # The view for the assembly we're adding 1171 # The view for the assembly we're adding
1190 view_json = {"type": "LinearGenomeView", "tracks": tracks_data} 1172 view_json = {"type": "LinearGenomeView", "tracks": tracks_data}
1191 1173
1192 refName = None 1174 refName = None
1197 start = int(loc_match.group(2)) 1179 start = int(loc_match.group(2))
1198 end = int(loc_match.group(3)) 1180 end = int(loc_match.group(3))
1199 elif self.genome_name is not None: 1181 elif self.genome_name is not None:
1200 refName = self.genome_name 1182 refName = self.genome_name
1201 start = 0 1183 start = 0
1202 end = 100000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708 1184 end = 10000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708
1203 1185
1204 if refName is not None: 1186 if refName is not None:
1205 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome 1187 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome
1206 view_json["displayedRegions"] = [ 1188 view_json["displayedRegions"] = [
1207 { 1189 {
1263 with open(config_path, "w") as config_file: 1245 with open(config_path, "w") as config_file:
1264 json.dump(config_json, config_file, indent=2) 1246 json.dump(config_json, config_file, indent=2)
1265 1247
1266 def clone_jbrowse(self): 1248 def clone_jbrowse(self):
1267 """Clone a JBrowse directory into a destination directory.""" 1249 """Clone a JBrowse directory into a destination directory."""
1250 # dest = os.path.realpath(self.outdir)
1268 dest = self.outdir 1251 dest = self.outdir
1269 cmd = ["jbrowse", "create", "-t", JB2REL, "-f", dest] 1252 cmd = ["rm", "-rf", dest + "/*"]
1253 self.subprocess_check_call(cmd)
1254 cmd = ["jbrowse", "create", dest, "-t", JB2VER, "-f"]
1270 self.subprocess_check_call(cmd) 1255 self.subprocess_check_call(cmd)
1271 for fn in [ 1256 for fn in [
1272 "asset-manifest.json", 1257 "asset-manifest.json",
1273 "favicon.ico", 1258 "favicon.ico",
1274 "robots.txt", 1259 "robots.txt",
1276 "version.txt", 1261 "version.txt",
1277 "test_data", 1262 "test_data",
1278 ]: 1263 ]:
1279 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] 1264 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)]
1280 self.subprocess_check_call(cmd) 1265 self.subprocess_check_call(cmd)
1281 cmd = ["cp", os.path.join(INSTALLED_TO, "webserver.py"), self.outdir] 1266 cmd = ["cp", os.path.join(INSTALLED_TO, "jb2_webserver.py"), self.outdir]
1282 self.subprocess_check_call(cmd) 1267 self.subprocess_check_call(cmd)
1283 1268
1284 1269
1285 def parse_style_conf(item): 1270 def parse_style_conf(item):
1286 if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]: 1271 if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]:
1384 "bigwig_multiple", 1369 "bigwig_multiple",
1385 "MultiBigWig", # Giving an hardcoded name for now 1370 "MultiBigWig", # Giving an hardcoded name for now
1386 {}, # No metadata for multiple bigwig 1371 {}, # No metadata for multiple bigwig
1387 ) 1372 )
1388 ) 1373 )
1389
1390 track_conf["category"] = track.attrib["cat"] 1374 track_conf["category"] = track.attrib["cat"]
1391 track_conf["format"] = track.attrib["format"] 1375 track_conf["format"] = track.attrib["format"]
1392 track_conf["style"] = { 1376 if track.find("options/style"):
1393 item.tag: parse_style_conf(item) for item in track.find("options/style") 1377 track_conf["style"] = {
1394 } 1378 item.tag: parse_style_conf(item) for item in track.find("options/style")
1395 1379 }
1396 track_conf["style"] = { 1380 if track.find("options/style_labels"):
1397 item.tag: parse_style_conf(item) for item in track.find("options/style") 1381 track_conf["style_labels"] = {
1398 } 1382 item.tag: parse_style_conf(item)
1399 1383 for item in track.find("options/style_labels")
1400 track_conf["style_labels"] = { 1384 }
1401 item.tag: parse_style_conf(item)
1402 for item in track.find("options/style_labels")
1403 }
1404 1385
1405 track_conf["conf"] = etree_to_dict(track.find("options")) 1386 track_conf["conf"] = etree_to_dict(track.find("options"))
1406 keys = jc.process_annotations(track_conf) 1387 keys = jc.process_annotations(track_conf)
1407 1388
1408 if keys: 1389 if keys:
1409 for key in keys: 1390 for key in keys:
1410 default_session_data["visibility"][ 1391 default_session_data["visibility"][
1411 track.attrib.get("visibility", "default_off") 1392 track.attrib.get("visibility", "default_off")
1412 ].append(key) 1393 ].append(key)
1413 default_session_data["style"][key] = track_conf[ 1394 if track_conf.get("style", None):
1414 "style" 1395 default_session_data["style"][key] = track_conf[
1415 ] # TODO do we need this anymore? 1396 "style"
1416 default_session_data["style_labels"][key] = track_conf["style_labels"] 1397 ] # TODO do we need this anymore?
1398 if track_conf.get("style_lables", None):
1399 default_session_data["style_labels"][key] = track_conf.get(
1400 "style_labels", None
1401 )
1417 1402
1418 default_session_data["defaultLocation"] = root.find( 1403 default_session_data["defaultLocation"] = root.find(
1419 "metadata/general/defaultLocation" 1404 "metadata/general/defaultLocation"
1420 ).text 1405 ).text
1421 default_session_data["session_name"] = root.find( 1406 default_session_data["session_name"] = root.find(
1442 jc.add_general_configuration(general_data) 1427 jc.add_general_configuration(general_data)
1443 x = open(args.xml, "r").read() 1428 x = open(args.xml, "r").read()
1444 jc.config_json["tracks"] = jc.tracksToAdd 1429 jc.config_json["tracks"] = jc.tracksToAdd
1445 if jc.usejson: 1430 if jc.usejson:
1446 jc.write_config() 1431 jc.write_config()
1447 # jc.add_default_view()
1448 jc.add_default_session(default_session_data) 1432 jc.add_default_session(default_session_data)
1449 1433
1450 # jc.text_index() not sure what broke here. 1434 # jc.text_index() not sure what broke here.