Mercurial > repos > fubar > jbrowse2
diff jbrowse2.py @ 80:dff27c9f6d72 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 7bb0fa234bdbc42768b30e426472a47b2523297f
author | fubar |
---|---|
date | Wed, 03 Apr 2024 01:00:21 +0000 |
parents | 14ecbe46ae9f |
children | e9bcbed05108 |
line wrap: on
line diff
--- a/jbrowse2.py Mon Apr 01 09:36:07 2024 +0000 +++ b/jbrowse2.py Wed Apr 03 01:00:21 2024 +0000 @@ -20,7 +20,7 @@ log = logging.getLogger("jbrowse") JB2VER = "v2.10.3" -# version pinned for cloning +# version pinned if cloning - but not cloning now TODAY = datetime.datetime.now().strftime("%Y-%m-%d") SELF_LOCATION = os.path.dirname(os.path.realpath(__file__)) @@ -401,6 +401,7 @@ class JbrowseConnector(object): def __init__(self, outdir, jbrowse2path): + self.trackCounter = 0 # to avoid name clashes self.assemblies = [] # these require more than a few line diff. self.assmeta = {} self.ass_first_contigs = ( @@ -481,21 +482,22 @@ } return wstyle - def urllib_get_2018(): - # Using a protected member like this is not any more fragile - # than extending the class and using it. I would use it. - url = "https://localhost:6667/my-endpoint" - ssl._create_default_https_context = ssl._create_unverified_context - with urllib.request.urlopen(url=url) as f: - print(f.read().decode("utf-8")) - - def urllib_get_2022(): - # Finally! Able to use the publice API. Happy happy! - url = "https://localhost:6667/my-endpoint" - scontext = ssl.SSLContext(ssl.PROTOCOL_TLS) - scontext.verify_mode = ssl.VerifyMode.CERT_NONE - with urllib.request.urlopen(url=url, context=scontext) as f: - print(f.read().decode("utf-8")) + def getNrow(self, url): + useuri = url.startswith("https://") or url.startswith("http://") + if not useuri: + fl = open(url, "r").readlines() + nrow = len(fl) + else: + try: + scontext = ssl.SSLContext(ssl.PROTOCOL_TLS) + scontext.verify_mode = ssl.VerifyMode.CERT_NONE + with urllib.request.urlopen(url, context=scontext) as f: + fl = f.readlines() + nrow = len(fl) + except Exception: + nrow = 0 + logging.debug("### getNrow returning %d" % nrow) + return nrow def process_genomes(self, genomes): assembly = [] @@ -507,6 +509,8 @@ if genome_node["useuri"] == "yes": useuri = True genome_name = genome_node["label"].strip() + if len(genome_name) == 0: + genome_name = os.path.splitext(os.path.basename(genome_node["path"]))[0] if len(genome_name.split()) > 1: genome_name = genome_name.split()[0] # spaces and cruft break scripts when substituted @@ -684,7 +688,7 @@ if useuri: uri = data else: - uri = "%s.hic" % trackData["label"] + uri = tId # slashes in names cause path trouble dest = os.path.join(self.outdir, uri) cmd = ["cp", data, dest] @@ -720,7 +724,7 @@ ] } categ = trackData["category"] - fname = "%s" % tId + fname = tId dest = "%s/%s" % (self.outdir, fname) gname = trackData["assemblyNames"] @@ -811,59 +815,20 @@ # Replace original gff3 file shutil.copy(gff3_rebased.name, gff3) os.unlink(gff3_rebased.name) - url = "%s.gff3.gz" % trackData["label"] - dest = "%s/%s" % (self.outdir, url) - self._sort_gff(gff3, dest) - tId = trackData["label"] - categ = trackData["category"] - trackDict = { - "type": "FeatureTrack", - "trackId": tId, - "name": trackData["name"], - "assemblyNames": [trackData["assemblyNames"]], - "category": [ - categ, - ], - "adapter": { - "type": "Gff3TabixAdapter", - "gffGzLocation": { - "uri": url, - }, - "index": { - "location": { - "uri": url + ".tbi", - } - }, - }, - "displays": [ - { - "type": "LinearBasicDisplay", - "displayId": "%s-LinearBasicDisplay" % tId, - }, - { - "type": "LinearArcDisplay", - "displayId": "%s-LinearArcDisplay" % tId, - }, - ], - } - style_json = self._prepare_track_style(trackDict) - trackDict["style"] = style_json - self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) - self.trackIdlist.append(tId) - os.unlink(gff3) + self.add_gff(gff3, trackData, **kwargs) def add_bigwig(self, data, trackData): + tId = trackData["label"] useuri = trackData["useuri"].lower() == "yes" if useuri: url = data else: - url = "%s.bigwig" % trackData["label"] + url = tId # slashes in names cause path trouble dest = os.path.join(self.outdir, url) cmd = ["cp", data, dest] self.subprocess_check_call(cmd) bwloc = {"uri": url} - tId = trackData["label"] categ = trackData["category"] trackDict = { "type": "QuantitativeTrack", @@ -897,7 +862,7 @@ if useuri: url = data else: - fname = "%s.bam" % trackData["label"] + fname = tId dest = "%s/%s" % (self.outdir, fname) url = fname bindex = fname + ".bai" @@ -957,8 +922,8 @@ if useuri: url = data else: - fname = "%s.cram" % trackData["label"] - dest = "%s/%s" % (self.outdir, fname) + fname = tId + dest = os.path.join(self.outdir, fname) url = fname self.subprocess_check_call(["cp", data, dest]) if cram_index is not None and os.path.exists(cram_index): @@ -1010,7 +975,7 @@ if useuri: url = data else: - url = "%s.vcf.gz" % tId + url = tId dest = "%s/%s" % (self.outdir, url) cmd = "bgzip -c %s > %s" % (data, dest) self.subprocess_popen(cmd) @@ -1059,7 +1024,7 @@ cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s'" % ( data, dest, - ) # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" + ) self.subprocess_popen(cmd) self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest]) @@ -1071,15 +1036,15 @@ cmd = ["tabix", "-f", "-p", "bed", dest] self.subprocess_check_call(cmd) - def add_gff(self, data, ext, trackData): + def add_gff(self, data, trackData): + tId = trackData["label"] useuri = trackData["useuri"].lower() == "yes" if useuri: url = trackData["path"] else: - url = "%s.%s.gz" % (trackData["label"], ext) + url = tId + ".gz" dest = "%s/%s" % (self.outdir, url) self._sort_gff(data, dest) - tId = trackData["label"] categ = trackData["category"] trackDict = { "type": "FeatureTrack", @@ -1123,7 +1088,7 @@ if useuri: url = data else: - url = "%s.%s.gz" % (trackData["label"], ext) + url = tId dest = "%s/%s" % (self.outdir, url) self._sort_bed(data, dest) trackDict = { @@ -1168,37 +1133,41 @@ def add_paf(self, data, trackData, pafOpts, **kwargs): tname = trackData["name"] tId = trackData["label"] - url = "%s.paf" % tId + url = tId useuri = data.startswith("http://") or data.startswith("https://") if not useuri: dest = "%s/%s" % (self.outdir, url) self.symlink_or_copy(os.path.realpath(data), dest) + nrow = self.getNrow(dest) else: url = data + nrow = self.getNrow(url) categ = trackData["category"] - pgnames = [ - x.strip() for x in pafOpts["genome_label"].split(",") if len(x.strip()) > 0 - ] + pgnames = [x.strip() for x in pafOpts["genome_label"].split(",")] pgpaths = [ x.strip() for x in pafOpts["genome"].split(",") if len(x.strip()) > 0 ] passnames = [trackData["assemblyNames"]] # always first + for i, gp in enumerate(pgpaths): + if len(pgnames[i].strip()) == 0: + # user may have left it blank - cannot make non-optional if want optional tracks. + gn = os.path.basename(gp) + pgnames[i] = os.path.splitext(gn)[0] logging.debug( "### add_paf got pafOpts=%s, pgnames=%s, pgpaths=%s for %s" % (pafOpts, pgnames, pgpaths, tId) ) - for i, gname in enumerate(pgnames): + for i, gp in enumerate(pgpaths): + gname = pgnames[i] if len(gname.split()) > 1: gname = gname.split()[0] passnames.append(gname) # trouble from spacey names in command lines avoidance - useuri = pgpaths[i].startswith("http://") or pgpaths[i].startswith( - "https://" - ) + useuri = gp.startswith("http://") or gp.startswith("https://") if gname not in self.genome_names: # ignore if already there - eg for duplicates among pafs. - asstrack, first_contig = self.make_assembly(pgpaths[i], gname, useuri) + asstrack, first_contig = self.make_assembly(gp, gname, useuri) self.genome_names.append(gname) self.tracksToAdd[gname] = [] self.assemblies.append(asstrack) @@ -1229,26 +1198,37 @@ "displayId": "%s-LinearComparativeDisplay" % tId, }, { - "type": "LinearSyntenyDisplay", + "type": "LinearBasicDisplay", "displayId": "%s-LinearSyntenyDisplay" % tId, }, ], } - style_json = { - "displays": [ - { - "type": "LGVSyntenyDisplay", - "displayId": "%s-LGVSyntenyDisplay" % tId, - } - ] - } + if nrow > 50000: + style_json = { + "displays": [ + { + "type": "LGVSyntenyDisplay", + "displayId": "%s-LGVSyntenyDisplay" % tId, + } + ] + } + else: + style_json = { + "displays": [ + { + "type": "LinearBasicDisplay", + "displayId": "%s-LinearBasicDisplay" % tId, + } + ] + } + trackDict["style"] = style_json self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) self.trackIdlist.append(tId) def process_annotations(self, track): category = track["category"].replace("__pd__date__pd__", TODAY) - for i, ( + for trackIndex, ( dataset_path, dataset_ext, useuri, @@ -1264,7 +1244,6 @@ "category": category, "style": {}, } - outputTrackConfig["assemblyNames"] = track["assemblyNames"] outputTrackConfig["key"] = track_human_label outputTrackConfig["useuri"] = useuri @@ -1272,18 +1251,18 @@ outputTrackConfig["ext"] = dataset_ext outputTrackConfig["trackset"] = track.get("trackset", {}) - outputTrackConfig["label"] = "%s_%i_%s" % ( + outputTrackConfig["label"] = "%s_%d.%s" % ( + track_human_label, + self.trackCounter, dataset_ext, - i, - track_human_label, ) + self.trackCounter += 1 outputTrackConfig["metadata"] = extra_metadata outputTrackConfig["name"] = track_human_label if dataset_ext in ("gff", "gff3"): self.add_gff( dataset_path, - dataset_ext, outputTrackConfig, ) elif dataset_ext in ("hic", "juicebox_hic"): @@ -1292,7 +1271,7 @@ outputTrackConfig, ) elif dataset_ext in ("cool", "mcool", "scool"): - hic_url = "%s_%d.hic" % (track_human_label, i) + hic_url = outputTrackConfig["label"] hic_path = os.path.join(self.outdir, hic_url) self.subprocess_check_call( [ @@ -1388,9 +1367,9 @@ % (default_data, tId) ) style_data = {"type": "LinearBasicDisplay"} - if "displays" in track_conf: - disp = track_conf["displays"][0]["type"] - style_data["type"] = disp + if "displays" in track_conf: + disp = track_conf["displays"][0]["type"] + style_data["type"] = disp if track_conf.get("style_labels", None): # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work # TODO move this to per track displays? @@ -1418,7 +1397,6 @@ [gnome, refName, end] = first[0] start = 0 end = int(end) - refName = self.assmeta[gnome][0].get("genome_firstcontig", None) drdict = { "refName": refName, "start": start, @@ -1474,13 +1452,20 @@ def add_defsess_to_index(self, data): """ - This was included on request of the new codeowner from Anthony's IUC PR. - Now fixed to deal with each assembly and tracks separately. - Originally used only the first assembly, putting all tracks there and - generally falling apart when tested with 2 or more. Seems ironic that - this vital feature was never tested given the rejection of my original IUC PR - because it was not there. And no, reviewer, I do not want this important piece of history - removed. I prefer that it remain here since it has caused me considerable discomfort. + Included on request of the new codeowner, from Anthony's IUC PR. + Had to be fixed to keep each assembly with the associated tracks for a default view. + Originally used only the first assembly, putting all tracks there and so breaking some + when tested with 2 or more. Seems ironic that this vital feature could not have ever been tested + given that my declining to add it was the basis for a reviewer's rejection of my original IUC PR. + A simple 2 line diff apparently. + + The technical problem is that this index.html hack breaks the promise of all the form fields + for track controls such as visibility default that were working mostly. They need to be removed from the form by whoever + thought this method was a good solution to the JB2 bug breaking config.json style default + view coordinates. + + And no, dear reviewer of this code, please leave this piece of history. + It is true and I prefer that it remain here to document my considerable discomfort at this unfair treatment. ---------------------------------------------------------- Add some default session settings: set some assemblies/tracks on/off @@ -1626,7 +1611,6 @@ jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path) default_session_data = {} - for ass in root.findall("assembly"): genomes = [ { @@ -1666,7 +1650,6 @@ if trackfiles: for x in track.findall("files/trackFile"): track_conf["label"] = x.attrib["label"] - trackkey = track_conf["label"] track_conf["useuri"] = x.attrib["useuri"] if is_multi_bigwig: multi_bigwig_paths.append( @@ -1766,7 +1749,7 @@ jc.config_json["assemblies"] = assconf logging.debug("assemblies=%s, gnames=%s" % (assconf, jc.genome_names)) jc.write_config() - # jc.add_default_session(default_session_data) + jc.add_default_session(default_session_data) # note that this can be left in the config.json but has NO EFFECT if add_defsess_to_index is called. - jc.add_defsess_to_index(default_session_data) + # jc.add_defsess_to_index(default_session_data) # jc.text_index() not sure what broke here.