Mercurial > repos > fubar > jbrowse2
diff jbrowse2.py @ 7:b04fd993b31e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 53a108d8153c955044ae7eb8cb06bdcfd0036717
author | fubar |
---|---|
date | Wed, 17 Jan 2024 07:50:52 +0000 |
parents | 79f7265f90bd |
children | a26c41e304c3 |
line wrap: on
line diff
--- a/jbrowse2.py Tue Jan 09 06:55:34 2024 +0000 +++ b/jbrowse2.py Wed Jan 17 07:50:52 2024 +0000 @@ -17,6 +17,10 @@ logging.basicConfig(level=logging.INFO) log = logging.getLogger("jbrowse") + +JB2VER = "v2.10.0" +# version pinned for cloning + TODAY = datetime.datetime.now().strftime("%Y-%m-%d") GALAXY_INFRASTRUCTURE_URL = None JB2REL = "v2.10.0" @@ -371,7 +375,6 @@ class JbrowseConnector(object): def __init__(self, outdir, genomes): - self.debug = False self.usejson = True self.giURL = GALAXY_INFRASTRUCTURE_URL self.outdir = outdir @@ -387,16 +390,14 @@ def subprocess_check_call(self, command, output=None): if output: - if self.debug: - log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) + log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) subprocess.check_call(command, cwd=self.outdir, stdout=output) else: log.debug("cd %s && %s", self.outdir, " ".join(command)) subprocess.check_call(command, cwd=self.outdir) def subprocess_popen(self, command): - if self.debug: - log.debug(command) + log.debug(command) p = subprocess.Popen( command, cwd=self.outdir, @@ -414,8 +415,7 @@ raise RuntimeError("Command failed with exit code %s" % (retcode)) def subprocess_check_output(self, command): - if self.debug: - log.debug(" ".join(command)) + log.debug(" ".join(command)) return subprocess.check_output(command, cwd=self.outdir) def symlink_or_copy(self, src, dest): @@ -465,9 +465,14 @@ self.config_json["assemblies"] = assemblies def make_assembly(self, fapath, gname): - faname = gname + ".fa.gz" + hashData = [ + fapath, + gname, + ] + hashData = "|".join(hashData).encode("utf-8") + ghash = hashlib.md5(hashData).hexdigest() + faname = ghash + ".fa.gz" fadest = os.path.join(self.outdir, faname) - # fadest = os.path.realpath(os.path.join(self.outdir, faname)) cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( fapath, fadest, @@ -556,7 +561,7 @@ # can be served - if public. # dsId = trackData["metadata"]["dataset_id"] # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) - hname = trackData["name"] + hname = trackData["label"] dest = os.path.join(self.outdir, hname) cmd = ["cp", data, dest] # these can be very big. @@ -603,8 +608,6 @@ tId = trackData["label"] fname = "%s.bed" % tId dest = "%s/%s" % (self.outdir, fname) - # self.symlink_or_copy(data, dest) - # Process MAF to bed-like. Need build to munge chromosomes gname = self.genome_name cmd = [ "bash", @@ -722,11 +725,11 @@ trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) - os.unlink(gff3) def add_bigwig(self, data, trackData): - url = "%s.bw" % trackData["name"] + url = "%s.bigwig" % trackData["label"] + # slashes in names cause path trouble dest = os.path.join(self.outdir, url) cmd = ["cp", data, dest] self.subprocess_check_call(cmd) @@ -735,7 +738,7 @@ trackDict = { "type": "QuantitativeTrack", "trackId": tId, - "name": url, + "name": trackData["name"], "assemblyNames": [ self.genome_name, ], @@ -754,6 +757,7 @@ trackDict["style"] = style_json self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) + logging.debug("#### wig trackData=%s" % str(trackData)) def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): tId = trackData["label"] @@ -959,35 +963,24 @@ asstrack, ] - style_json = self._prepare_track_style(trackData) url = "%s.paf" % (trackData["label"]) dest = "%s/%s" % (self.outdir, url) self.symlink_or_copy(os.path.realpath(data), dest) - - if self.usejson: - trackDict = { - "type": "SyntenyTrack", - "trackId": tId, + trackDict = { + "type": "SyntenyTrack", + "trackId": tId, + "assemblyNames": [self.genome_name, pgname], + "name": tname, + "adapter": { + "type": "PAFAdapter", + "pafLocation": {"uri": url}, "assemblyNames": [self.genome_name, pgname], - "name": tname, - "adapter": { - "type": "PAFAdapter", - "pafLocation": {"uri": url}, - "assemblyNames": [self.genome_name, pgname], - }, - "config": style_json, - } - self.tracksToAdd.append(trackDict) - self.trackIdlist.append(tId) - else: - self._add_track( - trackData["label"], - trackData["key"], - trackData["category"], - dest, - assemblies=[self.genome_name, pgname], - config=style_json, - ) + }, + } + style_json = self._prepare_track_style(trackDict) + trackDict["style"] = style_json + self.tracksToAdd.append(trackDict) + self.trackIdlist.append(tId) def add_hicab(self, data, trackData, hicOpts, **kwargs): rel_dest = os.path.join("data", trackData["label"] + ".hic") @@ -995,14 +988,12 @@ self.symlink_or_copy(os.path.realpath(data), dest) - style_json = self._prepare_track_style(trackData) - self._add_track( trackData["label"], trackData["key"], trackData["category"], rel_dest, - config=style_json, + config={}, ) def add_sparql(self, url, query, query_refnames, trackData): @@ -1061,12 +1052,7 @@ } outputTrackConfig["key"] = track_human_label - if self.debug: - log.info( - "Processing category = %s, track_human_label = %s", - category, - track_human_label, - ) + # We add extra data to hash for the case of REST + SPARQL. if ( "conf" in track @@ -1076,7 +1062,7 @@ rest_url = track["conf"]["options"]["url"] else: rest_url = "" - + outputTrackConfig["trackset"] = track.get("trackset", {}) # I chose to use track['category'] instead of 'category' here. This # is intentional. This way re-running the tool on a different date # will not generate different hashes and make comparison of outputs @@ -1165,26 +1151,22 @@ for track_conf in self.tracksToAdd: track_types[track_conf["trackId"]] = track_conf["type"] - - for on_track in data["visibility"]["default_on"]: - style_data = {"type": "LinearBasicDisplay", "height": 100} - if on_track in data["style"]: - if "display" in data["style"][on_track]: - style_data["type"] = data["style"][on_track]["display"] - del data["style"][on_track]["display"] - style_data.update(data["style"][on_track]) - if on_track in data["style_labels"]: - # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work - # TODO move this to per track displays? - style_data["labels"] = data["style_labels"][on_track] - - tracks_data.append( - { - "type": track_types[on_track], - "configuration": on_track, - "displays": [style_data], - } - ) + tId = track_conf["trackId"] + if tId in data["visibility"]["default_on"]: + style_data = {"type": "LinearBasicDisplay"} + if "displays" in track_conf: + style_data["type"] = track_conf["displays"][0]["type"] + if track_conf.get("style_labels", None): + # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work + # TODO move this to per track displays? + style_data["labels"] = track_conf["style_labels"] + tracks_data.append( + { + "type": track_types[tId], + "configuration": tId, + "displays": [style_data], + } + ) # The view for the assembly we're adding view_json = {"type": "LinearGenomeView", "tracks": tracks_data} @@ -1199,7 +1181,7 @@ elif self.genome_name is not None: refName = self.genome_name start = 0 - end = 100000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708 + end = 10000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708 if refName is not None: # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome @@ -1265,8 +1247,11 @@ def clone_jbrowse(self): """Clone a JBrowse directory into a destination directory.""" + # dest = os.path.realpath(self.outdir) dest = self.outdir - cmd = ["jbrowse", "create", "-t", JB2REL, "-f", dest] + cmd = ["rm", "-rf", dest + "/*"] + self.subprocess_check_call(cmd) + cmd = ["jbrowse", "create", dest, "-t", JB2VER, "-f"] self.subprocess_check_call(cmd) for fn in [ "asset-manifest.json", @@ -1278,7 +1263,7 @@ ]: cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] self.subprocess_check_call(cmd) - cmd = ["cp", os.path.join(INSTALLED_TO, "webserver.py"), self.outdir] + cmd = ["cp", os.path.join(INSTALLED_TO, "jb2_webserver.py"), self.outdir] self.subprocess_check_call(cmd) @@ -1386,21 +1371,17 @@ {}, # No metadata for multiple bigwig ) ) - track_conf["category"] = track.attrib["cat"] track_conf["format"] = track.attrib["format"] - track_conf["style"] = { - item.tag: parse_style_conf(item) for item in track.find("options/style") - } - - track_conf["style"] = { - item.tag: parse_style_conf(item) for item in track.find("options/style") - } - - track_conf["style_labels"] = { - item.tag: parse_style_conf(item) - for item in track.find("options/style_labels") - } + if track.find("options/style"): + track_conf["style"] = { + item.tag: parse_style_conf(item) for item in track.find("options/style") + } + if track.find("options/style_labels"): + track_conf["style_labels"] = { + item.tag: parse_style_conf(item) + for item in track.find("options/style_labels") + } track_conf["conf"] = etree_to_dict(track.find("options")) keys = jc.process_annotations(track_conf) @@ -1410,10 +1391,14 @@ default_session_data["visibility"][ track.attrib.get("visibility", "default_off") ].append(key) - default_session_data["style"][key] = track_conf[ - "style" - ] # TODO do we need this anymore? - default_session_data["style_labels"][key] = track_conf["style_labels"] + if track_conf.get("style", None): + default_session_data["style"][key] = track_conf[ + "style" + ] # TODO do we need this anymore? + if track_conf.get("style_lables", None): + default_session_data["style_labels"][key] = track_conf.get( + "style_labels", None + ) default_session_data["defaultLocation"] = root.find( "metadata/general/defaultLocation" @@ -1444,7 +1429,6 @@ jc.config_json["tracks"] = jc.tracksToAdd if jc.usejson: jc.write_config() - # jc.add_default_view() jc.add_default_session(default_session_data) # jc.text_index() not sure what broke here.