Mercurial > repos > fubar > jbrowse2
comparison jbrowse2.py @ 7:b04fd993b31e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 53a108d8153c955044ae7eb8cb06bdcfd0036717
author | fubar |
---|---|
date | Wed, 17 Jan 2024 07:50:52 +0000 |
parents | 79f7265f90bd |
children | a26c41e304c3 |
comparison
equal
deleted
inserted
replaced
6:79f7265f90bd | 7:b04fd993b31e |
---|---|
15 import xml.etree.ElementTree as ET | 15 import xml.etree.ElementTree as ET |
16 from collections import defaultdict | 16 from collections import defaultdict |
17 | 17 |
18 logging.basicConfig(level=logging.INFO) | 18 logging.basicConfig(level=logging.INFO) |
19 log = logging.getLogger("jbrowse") | 19 log = logging.getLogger("jbrowse") |
20 | |
21 JB2VER = "v2.10.0" | |
22 # version pinned for cloning | |
23 | |
20 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") | 24 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") |
21 GALAXY_INFRASTRUCTURE_URL = None | 25 GALAXY_INFRASTRUCTURE_URL = None |
22 JB2REL = "v2.10.0" | 26 JB2REL = "v2.10.0" |
23 # version pinned for cloning | 27 # version pinned for cloning |
24 | 28 |
369 return metadata | 373 return metadata |
370 | 374 |
371 | 375 |
372 class JbrowseConnector(object): | 376 class JbrowseConnector(object): |
373 def __init__(self, outdir, genomes): | 377 def __init__(self, outdir, genomes): |
374 self.debug = False | |
375 self.usejson = True | 378 self.usejson = True |
376 self.giURL = GALAXY_INFRASTRUCTURE_URL | 379 self.giURL = GALAXY_INFRASTRUCTURE_URL |
377 self.outdir = outdir | 380 self.outdir = outdir |
378 os.makedirs(self.outdir, exist_ok=True) | 381 os.makedirs(self.outdir, exist_ok=True) |
379 self.genome_paths = genomes | 382 self.genome_paths = genomes |
385 self.config_json_file = os.path.join(outdir, "config.json") | 388 self.config_json_file = os.path.join(outdir, "config.json") |
386 self.clone_jbrowse() | 389 self.clone_jbrowse() |
387 | 390 |
388 def subprocess_check_call(self, command, output=None): | 391 def subprocess_check_call(self, command, output=None): |
389 if output: | 392 if output: |
390 if self.debug: | 393 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) |
391 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) | |
392 subprocess.check_call(command, cwd=self.outdir, stdout=output) | 394 subprocess.check_call(command, cwd=self.outdir, stdout=output) |
393 else: | 395 else: |
394 log.debug("cd %s && %s", self.outdir, " ".join(command)) | 396 log.debug("cd %s && %s", self.outdir, " ".join(command)) |
395 subprocess.check_call(command, cwd=self.outdir) | 397 subprocess.check_call(command, cwd=self.outdir) |
396 | 398 |
397 def subprocess_popen(self, command): | 399 def subprocess_popen(self, command): |
398 if self.debug: | 400 log.debug(command) |
399 log.debug(command) | |
400 p = subprocess.Popen( | 401 p = subprocess.Popen( |
401 command, | 402 command, |
402 cwd=self.outdir, | 403 cwd=self.outdir, |
403 shell=True, | 404 shell=True, |
404 stdin=subprocess.PIPE, | 405 stdin=subprocess.PIPE, |
412 log.error(output) | 413 log.error(output) |
413 log.error(err) | 414 log.error(err) |
414 raise RuntimeError("Command failed with exit code %s" % (retcode)) | 415 raise RuntimeError("Command failed with exit code %s" % (retcode)) |
415 | 416 |
416 def subprocess_check_output(self, command): | 417 def subprocess_check_output(self, command): |
417 if self.debug: | 418 log.debug(" ".join(command)) |
418 log.debug(" ".join(command)) | |
419 return subprocess.check_output(command, cwd=self.outdir) | 419 return subprocess.check_output(command, cwd=self.outdir) |
420 | 420 |
421 def symlink_or_copy(self, src, dest): | 421 def symlink_or_copy(self, src, dest): |
422 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( | 422 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( |
423 os.environ["GALAXY_JBROWSE_SYMLINKS"] | 423 os.environ["GALAXY_JBROWSE_SYMLINKS"] |
463 self.config_json["assemblies"] += assemblies | 463 self.config_json["assemblies"] += assemblies |
464 else: | 464 else: |
465 self.config_json["assemblies"] = assemblies | 465 self.config_json["assemblies"] = assemblies |
466 | 466 |
467 def make_assembly(self, fapath, gname): | 467 def make_assembly(self, fapath, gname): |
468 faname = gname + ".fa.gz" | 468 hashData = [ |
469 fapath, | |
470 gname, | |
471 ] | |
472 hashData = "|".join(hashData).encode("utf-8") | |
473 ghash = hashlib.md5(hashData).hexdigest() | |
474 faname = ghash + ".fa.gz" | |
469 fadest = os.path.join(self.outdir, faname) | 475 fadest = os.path.join(self.outdir, faname) |
470 # fadest = os.path.realpath(os.path.join(self.outdir, faname)) | |
471 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( | 476 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( |
472 fapath, | 477 fapath, |
473 fadest, | 478 fadest, |
474 fadest, | 479 fadest, |
475 fadest, | 480 fadest, |
554 """ | 559 """ |
555 tId = trackData["label"] | 560 tId = trackData["label"] |
556 # can be served - if public. | 561 # can be served - if public. |
557 # dsId = trackData["metadata"]["dataset_id"] | 562 # dsId = trackData["metadata"]["dataset_id"] |
558 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) | 563 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) |
559 hname = trackData["name"] | 564 hname = trackData["label"] |
560 dest = os.path.join(self.outdir, hname) | 565 dest = os.path.join(self.outdir, hname) |
561 cmd = ["cp", data, dest] | 566 cmd = ["cp", data, dest] |
562 # these can be very big. | 567 # these can be very big. |
563 self.subprocess_check_call(cmd) | 568 self.subprocess_check_call(cmd) |
564 floc = { | 569 floc = { |
601 ] | 606 ] |
602 } | 607 } |
603 tId = trackData["label"] | 608 tId = trackData["label"] |
604 fname = "%s.bed" % tId | 609 fname = "%s.bed" % tId |
605 dest = "%s/%s" % (self.outdir, fname) | 610 dest = "%s/%s" % (self.outdir, fname) |
606 # self.symlink_or_copy(data, dest) | |
607 # Process MAF to bed-like. Need build to munge chromosomes | |
608 gname = self.genome_name | 611 gname = self.genome_name |
609 cmd = [ | 612 cmd = [ |
610 "bash", | 613 "bash", |
611 os.path.join(INSTALLED_TO, "convertMAF.sh"), | 614 os.path.join(INSTALLED_TO, "convertMAF.sh"), |
612 data, | 615 data, |
720 } | 723 } |
721 style_json = self._prepare_track_style(trackDict) | 724 style_json = self._prepare_track_style(trackDict) |
722 trackDict["style"] = style_json | 725 trackDict["style"] = style_json |
723 self.tracksToAdd.append(trackDict) | 726 self.tracksToAdd.append(trackDict) |
724 self.trackIdlist.append(tId) | 727 self.trackIdlist.append(tId) |
725 | |
726 os.unlink(gff3) | 728 os.unlink(gff3) |
727 | 729 |
728 def add_bigwig(self, data, trackData): | 730 def add_bigwig(self, data, trackData): |
729 url = "%s.bw" % trackData["name"] | 731 url = "%s.bigwig" % trackData["label"] |
732 # slashes in names cause path trouble | |
730 dest = os.path.join(self.outdir, url) | 733 dest = os.path.join(self.outdir, url) |
731 cmd = ["cp", data, dest] | 734 cmd = ["cp", data, dest] |
732 self.subprocess_check_call(cmd) | 735 self.subprocess_check_call(cmd) |
733 bwloc = {"uri": url} | 736 bwloc = {"uri": url} |
734 tId = trackData["label"] | 737 tId = trackData["label"] |
735 trackDict = { | 738 trackDict = { |
736 "type": "QuantitativeTrack", | 739 "type": "QuantitativeTrack", |
737 "trackId": tId, | 740 "trackId": tId, |
738 "name": url, | 741 "name": trackData["name"], |
739 "assemblyNames": [ | 742 "assemblyNames": [ |
740 self.genome_name, | 743 self.genome_name, |
741 ], | 744 ], |
742 "adapter": { | 745 "adapter": { |
743 "type": "BigWigAdapter", | 746 "type": "BigWigAdapter", |
752 } | 755 } |
753 style_json = self._prepare_track_style(trackDict) | 756 style_json = self._prepare_track_style(trackDict) |
754 trackDict["style"] = style_json | 757 trackDict["style"] = style_json |
755 self.tracksToAdd.append(trackDict) | 758 self.tracksToAdd.append(trackDict) |
756 self.trackIdlist.append(tId) | 759 self.trackIdlist.append(tId) |
760 logging.debug("#### wig trackData=%s" % str(trackData)) | |
757 | 761 |
758 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): | 762 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): |
759 tId = trackData["label"] | 763 tId = trackData["label"] |
760 fname = "%s.bam" % trackData["label"] | 764 fname = "%s.bam" % trackData["label"] |
761 dest = "%s/%s" % (self.outdir, fname) | 765 dest = "%s/%s" % (self.outdir, fname) |
957 else: | 961 else: |
958 self.config_json["assemblies"] = [ | 962 self.config_json["assemblies"] = [ |
959 asstrack, | 963 asstrack, |
960 ] | 964 ] |
961 | 965 |
962 style_json = self._prepare_track_style(trackData) | |
963 url = "%s.paf" % (trackData["label"]) | 966 url = "%s.paf" % (trackData["label"]) |
964 dest = "%s/%s" % (self.outdir, url) | 967 dest = "%s/%s" % (self.outdir, url) |
965 self.symlink_or_copy(os.path.realpath(data), dest) | 968 self.symlink_or_copy(os.path.realpath(data), dest) |
966 | 969 trackDict = { |
967 if self.usejson: | 970 "type": "SyntenyTrack", |
968 trackDict = { | 971 "trackId": tId, |
969 "type": "SyntenyTrack", | 972 "assemblyNames": [self.genome_name, pgname], |
970 "trackId": tId, | 973 "name": tname, |
974 "adapter": { | |
975 "type": "PAFAdapter", | |
976 "pafLocation": {"uri": url}, | |
971 "assemblyNames": [self.genome_name, pgname], | 977 "assemblyNames": [self.genome_name, pgname], |
972 "name": tname, | 978 }, |
973 "adapter": { | 979 } |
974 "type": "PAFAdapter", | 980 style_json = self._prepare_track_style(trackDict) |
975 "pafLocation": {"uri": url}, | 981 trackDict["style"] = style_json |
976 "assemblyNames": [self.genome_name, pgname], | 982 self.tracksToAdd.append(trackDict) |
977 }, | 983 self.trackIdlist.append(tId) |
978 "config": style_json, | |
979 } | |
980 self.tracksToAdd.append(trackDict) | |
981 self.trackIdlist.append(tId) | |
982 else: | |
983 self._add_track( | |
984 trackData["label"], | |
985 trackData["key"], | |
986 trackData["category"], | |
987 dest, | |
988 assemblies=[self.genome_name, pgname], | |
989 config=style_json, | |
990 ) | |
991 | 984 |
992 def add_hicab(self, data, trackData, hicOpts, **kwargs): | 985 def add_hicab(self, data, trackData, hicOpts, **kwargs): |
993 rel_dest = os.path.join("data", trackData["label"] + ".hic") | 986 rel_dest = os.path.join("data", trackData["label"] + ".hic") |
994 dest = os.path.join(self.outdir, rel_dest) | 987 dest = os.path.join(self.outdir, rel_dest) |
995 | 988 |
996 self.symlink_or_copy(os.path.realpath(data), dest) | 989 self.symlink_or_copy(os.path.realpath(data), dest) |
997 | |
998 style_json = self._prepare_track_style(trackData) | |
999 | 990 |
1000 self._add_track( | 991 self._add_track( |
1001 trackData["label"], | 992 trackData["label"], |
1002 trackData["key"], | 993 trackData["key"], |
1003 trackData["category"], | 994 trackData["category"], |
1004 rel_dest, | 995 rel_dest, |
1005 config=style_json, | 996 config={}, |
1006 ) | 997 ) |
1007 | 998 |
1008 def add_sparql(self, url, query, query_refnames, trackData): | 999 def add_sparql(self, url, query, query_refnames, trackData): |
1009 | 1000 |
1010 json_track_data = { | 1001 json_track_data = { |
1059 "category": category, | 1050 "category": category, |
1060 "style": {}, | 1051 "style": {}, |
1061 } | 1052 } |
1062 | 1053 |
1063 outputTrackConfig["key"] = track_human_label | 1054 outputTrackConfig["key"] = track_human_label |
1064 if self.debug: | 1055 |
1065 log.info( | |
1066 "Processing category = %s, track_human_label = %s", | |
1067 category, | |
1068 track_human_label, | |
1069 ) | |
1070 # We add extra data to hash for the case of REST + SPARQL. | 1056 # We add extra data to hash for the case of REST + SPARQL. |
1071 if ( | 1057 if ( |
1072 "conf" in track | 1058 "conf" in track |
1073 and "options" in track["conf"] | 1059 and "options" in track["conf"] |
1074 and "url" in track["conf"]["options"] | 1060 and "url" in track["conf"]["options"] |
1075 ): | 1061 ): |
1076 rest_url = track["conf"]["options"]["url"] | 1062 rest_url = track["conf"]["options"]["url"] |
1077 else: | 1063 else: |
1078 rest_url = "" | 1064 rest_url = "" |
1079 | 1065 outputTrackConfig["trackset"] = track.get("trackset", {}) |
1080 # I chose to use track['category'] instead of 'category' here. This | 1066 # I chose to use track['category'] instead of 'category' here. This |
1081 # is intentional. This way re-running the tool on a different date | 1067 # is intentional. This way re-running the tool on a different date |
1082 # will not generate different hashes and make comparison of outputs | 1068 # will not generate different hashes and make comparison of outputs |
1083 # much simpler. | 1069 # much simpler. |
1084 hashData = [ | 1070 hashData = [ |
1163 with open(self.config_json_file, "r") as config_file: | 1149 with open(self.config_json_file, "r") as config_file: |
1164 config_json = json.load(config_file) | 1150 config_json = json.load(config_file) |
1165 | 1151 |
1166 for track_conf in self.tracksToAdd: | 1152 for track_conf in self.tracksToAdd: |
1167 track_types[track_conf["trackId"]] = track_conf["type"] | 1153 track_types[track_conf["trackId"]] = track_conf["type"] |
1168 | 1154 tId = track_conf["trackId"] |
1169 for on_track in data["visibility"]["default_on"]: | 1155 if tId in data["visibility"]["default_on"]: |
1170 style_data = {"type": "LinearBasicDisplay", "height": 100} | 1156 style_data = {"type": "LinearBasicDisplay"} |
1171 if on_track in data["style"]: | 1157 if "displays" in track_conf: |
1172 if "display" in data["style"][on_track]: | 1158 style_data["type"] = track_conf["displays"][0]["type"] |
1173 style_data["type"] = data["style"][on_track]["display"] | 1159 if track_conf.get("style_labels", None): |
1174 del data["style"][on_track]["display"] | 1160 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work |
1175 style_data.update(data["style"][on_track]) | 1161 # TODO move this to per track displays? |
1176 if on_track in data["style_labels"]: | 1162 style_data["labels"] = track_conf["style_labels"] |
1177 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work | 1163 tracks_data.append( |
1178 # TODO move this to per track displays? | 1164 { |
1179 style_data["labels"] = data["style_labels"][on_track] | 1165 "type": track_types[tId], |
1180 | 1166 "configuration": tId, |
1181 tracks_data.append( | 1167 "displays": [style_data], |
1182 { | 1168 } |
1183 "type": track_types[on_track], | 1169 ) |
1184 "configuration": on_track, | |
1185 "displays": [style_data], | |
1186 } | |
1187 ) | |
1188 | 1170 |
1189 # The view for the assembly we're adding | 1171 # The view for the assembly we're adding |
1190 view_json = {"type": "LinearGenomeView", "tracks": tracks_data} | 1172 view_json = {"type": "LinearGenomeView", "tracks": tracks_data} |
1191 | 1173 |
1192 refName = None | 1174 refName = None |
1197 start = int(loc_match.group(2)) | 1179 start = int(loc_match.group(2)) |
1198 end = int(loc_match.group(3)) | 1180 end = int(loc_match.group(3)) |
1199 elif self.genome_name is not None: | 1181 elif self.genome_name is not None: |
1200 refName = self.genome_name | 1182 refName = self.genome_name |
1201 start = 0 | 1183 start = 0 |
1202 end = 100000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708 | 1184 end = 10000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708 |
1203 | 1185 |
1204 if refName is not None: | 1186 if refName is not None: |
1205 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome | 1187 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome |
1206 view_json["displayedRegions"] = [ | 1188 view_json["displayedRegions"] = [ |
1207 { | 1189 { |
1263 with open(config_path, "w") as config_file: | 1245 with open(config_path, "w") as config_file: |
1264 json.dump(config_json, config_file, indent=2) | 1246 json.dump(config_json, config_file, indent=2) |
1265 | 1247 |
1266 def clone_jbrowse(self): | 1248 def clone_jbrowse(self): |
1267 """Clone a JBrowse directory into a destination directory.""" | 1249 """Clone a JBrowse directory into a destination directory.""" |
1250 # dest = os.path.realpath(self.outdir) | |
1268 dest = self.outdir | 1251 dest = self.outdir |
1269 cmd = ["jbrowse", "create", "-t", JB2REL, "-f", dest] | 1252 cmd = ["rm", "-rf", dest + "/*"] |
1253 self.subprocess_check_call(cmd) | |
1254 cmd = ["jbrowse", "create", dest, "-t", JB2VER, "-f"] | |
1270 self.subprocess_check_call(cmd) | 1255 self.subprocess_check_call(cmd) |
1271 for fn in [ | 1256 for fn in [ |
1272 "asset-manifest.json", | 1257 "asset-manifest.json", |
1273 "favicon.ico", | 1258 "favicon.ico", |
1274 "robots.txt", | 1259 "robots.txt", |
1276 "version.txt", | 1261 "version.txt", |
1277 "test_data", | 1262 "test_data", |
1278 ]: | 1263 ]: |
1279 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] | 1264 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] |
1280 self.subprocess_check_call(cmd) | 1265 self.subprocess_check_call(cmd) |
1281 cmd = ["cp", os.path.join(INSTALLED_TO, "webserver.py"), self.outdir] | 1266 cmd = ["cp", os.path.join(INSTALLED_TO, "jb2_webserver.py"), self.outdir] |
1282 self.subprocess_check_call(cmd) | 1267 self.subprocess_check_call(cmd) |
1283 | 1268 |
1284 | 1269 |
1285 def parse_style_conf(item): | 1270 def parse_style_conf(item): |
1286 if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]: | 1271 if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]: |
1384 "bigwig_multiple", | 1369 "bigwig_multiple", |
1385 "MultiBigWig", # Giving an hardcoded name for now | 1370 "MultiBigWig", # Giving an hardcoded name for now |
1386 {}, # No metadata for multiple bigwig | 1371 {}, # No metadata for multiple bigwig |
1387 ) | 1372 ) |
1388 ) | 1373 ) |
1389 | |
1390 track_conf["category"] = track.attrib["cat"] | 1374 track_conf["category"] = track.attrib["cat"] |
1391 track_conf["format"] = track.attrib["format"] | 1375 track_conf["format"] = track.attrib["format"] |
1392 track_conf["style"] = { | 1376 if track.find("options/style"): |
1393 item.tag: parse_style_conf(item) for item in track.find("options/style") | 1377 track_conf["style"] = { |
1394 } | 1378 item.tag: parse_style_conf(item) for item in track.find("options/style") |
1395 | 1379 } |
1396 track_conf["style"] = { | 1380 if track.find("options/style_labels"): |
1397 item.tag: parse_style_conf(item) for item in track.find("options/style") | 1381 track_conf["style_labels"] = { |
1398 } | 1382 item.tag: parse_style_conf(item) |
1399 | 1383 for item in track.find("options/style_labels") |
1400 track_conf["style_labels"] = { | 1384 } |
1401 item.tag: parse_style_conf(item) | |
1402 for item in track.find("options/style_labels") | |
1403 } | |
1404 | 1385 |
1405 track_conf["conf"] = etree_to_dict(track.find("options")) | 1386 track_conf["conf"] = etree_to_dict(track.find("options")) |
1406 keys = jc.process_annotations(track_conf) | 1387 keys = jc.process_annotations(track_conf) |
1407 | 1388 |
1408 if keys: | 1389 if keys: |
1409 for key in keys: | 1390 for key in keys: |
1410 default_session_data["visibility"][ | 1391 default_session_data["visibility"][ |
1411 track.attrib.get("visibility", "default_off") | 1392 track.attrib.get("visibility", "default_off") |
1412 ].append(key) | 1393 ].append(key) |
1413 default_session_data["style"][key] = track_conf[ | 1394 if track_conf.get("style", None): |
1414 "style" | 1395 default_session_data["style"][key] = track_conf[ |
1415 ] # TODO do we need this anymore? | 1396 "style" |
1416 default_session_data["style_labels"][key] = track_conf["style_labels"] | 1397 ] # TODO do we need this anymore? |
1398 if track_conf.get("style_lables", None): | |
1399 default_session_data["style_labels"][key] = track_conf.get( | |
1400 "style_labels", None | |
1401 ) | |
1417 | 1402 |
1418 default_session_data["defaultLocation"] = root.find( | 1403 default_session_data["defaultLocation"] = root.find( |
1419 "metadata/general/defaultLocation" | 1404 "metadata/general/defaultLocation" |
1420 ).text | 1405 ).text |
1421 default_session_data["session_name"] = root.find( | 1406 default_session_data["session_name"] = root.find( |
1442 jc.add_general_configuration(general_data) | 1427 jc.add_general_configuration(general_data) |
1443 x = open(args.xml, "r").read() | 1428 x = open(args.xml, "r").read() |
1444 jc.config_json["tracks"] = jc.tracksToAdd | 1429 jc.config_json["tracks"] = jc.tracksToAdd |
1445 if jc.usejson: | 1430 if jc.usejson: |
1446 jc.write_config() | 1431 jc.write_config() |
1447 # jc.add_default_view() | |
1448 jc.add_default_session(default_session_data) | 1432 jc.add_default_session(default_session_data) |
1449 | 1433 |
1450 # jc.text_index() not sure what broke here. | 1434 # jc.text_index() not sure what broke here. |