diff jbrowse2.py @ 80:dff27c9f6d72 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 7bb0fa234bdbc42768b30e426472a47b2523297f
author fubar
date Wed, 03 Apr 2024 01:00:21 +0000
parents 14ecbe46ae9f
children e9bcbed05108
line wrap: on
line diff
--- a/jbrowse2.py	Mon Apr 01 09:36:07 2024 +0000
+++ b/jbrowse2.py	Wed Apr 03 01:00:21 2024 +0000
@@ -20,7 +20,7 @@
 log = logging.getLogger("jbrowse")
 
 JB2VER = "v2.10.3"
-# version pinned for cloning
+# version pinned if cloning - but not cloning now
 
 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__))
@@ -401,6 +401,7 @@
 
 class JbrowseConnector(object):
     def __init__(self, outdir, jbrowse2path):
+        self.trackCounter = 0  # to avoid name clashes
         self.assemblies = []  # these require more than a few line diff.
         self.assmeta = {}
         self.ass_first_contigs = (
@@ -481,21 +482,22 @@
         }
         return wstyle
 
-    def urllib_get_2018():
-        # Using a protected member like this is not any more fragile
-        # than extending the class and using it. I would use it.
-        url = "https://localhost:6667/my-endpoint"
-        ssl._create_default_https_context = ssl._create_unverified_context
-        with urllib.request.urlopen(url=url) as f:
-            print(f.read().decode("utf-8"))
-
-    def urllib_get_2022():
-        # Finally! Able to use the publice API. Happy happy!
-        url = "https://localhost:6667/my-endpoint"
-        scontext = ssl.SSLContext(ssl.PROTOCOL_TLS)
-        scontext.verify_mode = ssl.VerifyMode.CERT_NONE
-        with urllib.request.urlopen(url=url, context=scontext) as f:
-            print(f.read().decode("utf-8"))
+    def getNrow(self, url):
+        useuri = url.startswith("https://") or url.startswith("http://")
+        if not useuri:
+            fl = open(url, "r").readlines()
+            nrow = len(fl)
+        else:
+            try:
+                scontext = ssl.SSLContext(ssl.PROTOCOL_TLS)
+                scontext.verify_mode = ssl.VerifyMode.CERT_NONE
+                with urllib.request.urlopen(url, context=scontext) as f:
+                    fl = f.readlines()
+                nrow = len(fl)
+            except Exception:
+                nrow = 0
+        logging.debug("### getNrow returning %d" % nrow)
+        return nrow
 
     def process_genomes(self, genomes):
         assembly = []
@@ -507,6 +509,8 @@
             if genome_node["useuri"] == "yes":
                 useuri = True
             genome_name = genome_node["label"].strip()
+            if len(genome_name) == 0:
+                genome_name = os.path.splitext(os.path.basename(genome_node["path"]))[0]
             if len(genome_name.split()) > 1:
                 genome_name = genome_name.split()[0]
                 # spaces and cruft break scripts when substituted
@@ -684,7 +688,7 @@
         if useuri:
             uri = data
         else:
-            uri = "%s.hic" % trackData["label"]
+            uri = tId
             # slashes in names cause path trouble
             dest = os.path.join(self.outdir, uri)
             cmd = ["cp", data, dest]
@@ -720,7 +724,7 @@
             ]
         }
         categ = trackData["category"]
-        fname = "%s" % tId
+        fname = tId
         dest = "%s/%s" % (self.outdir, fname)
         gname = trackData["assemblyNames"]
 
@@ -811,59 +815,20 @@
             # Replace original gff3 file
             shutil.copy(gff3_rebased.name, gff3)
             os.unlink(gff3_rebased.name)
-        url = "%s.gff3.gz" % trackData["label"]
-        dest = "%s/%s" % (self.outdir, url)
-        self._sort_gff(gff3, dest)
-        tId = trackData["label"]
-        categ = trackData["category"]
-        trackDict = {
-            "type": "FeatureTrack",
-            "trackId": tId,
-            "name": trackData["name"],
-            "assemblyNames": [trackData["assemblyNames"]],
-            "category": [
-                categ,
-            ],
-            "adapter": {
-                "type": "Gff3TabixAdapter",
-                "gffGzLocation": {
-                    "uri": url,
-                },
-                "index": {
-                    "location": {
-                        "uri": url + ".tbi",
-                    }
-                },
-            },
-            "displays": [
-                {
-                    "type": "LinearBasicDisplay",
-                    "displayId": "%s-LinearBasicDisplay" % tId,
-                },
-                {
-                    "type": "LinearArcDisplay",
-                    "displayId": "%s-LinearArcDisplay" % tId,
-                },
-            ],
-        }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
-        self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
-        self.trackIdlist.append(tId)
-        os.unlink(gff3)
+        self.add_gff(gff3, trackData, **kwargs)
 
     def add_bigwig(self, data, trackData):
+        tId = trackData["label"]
         useuri = trackData["useuri"].lower() == "yes"
         if useuri:
             url = data
         else:
-            url = "%s.bigwig" % trackData["label"]
+            url = tId
             # slashes in names cause path trouble
             dest = os.path.join(self.outdir, url)
             cmd = ["cp", data, dest]
             self.subprocess_check_call(cmd)
         bwloc = {"uri": url}
-        tId = trackData["label"]
         categ = trackData["category"]
         trackDict = {
             "type": "QuantitativeTrack",
@@ -897,7 +862,7 @@
         if useuri:
             url = data
         else:
-            fname = "%s.bam" % trackData["label"]
+            fname = tId
             dest = "%s/%s" % (self.outdir, fname)
             url = fname
             bindex = fname + ".bai"
@@ -957,8 +922,8 @@
         if useuri:
             url = data
         else:
-            fname = "%s.cram" % trackData["label"]
-            dest = "%s/%s" % (self.outdir, fname)
+            fname = tId
+            dest = os.path.join(self.outdir, fname)
             url = fname
             self.subprocess_check_call(["cp", data, dest])
             if cram_index is not None and os.path.exists(cram_index):
@@ -1010,7 +975,7 @@
         if useuri:
             url = data
         else:
-            url = "%s.vcf.gz" % tId
+            url = tId
             dest = "%s/%s" % (self.outdir, url)
             cmd = "bgzip -c %s  > %s" % (data, dest)
             self.subprocess_popen(cmd)
@@ -1059,7 +1024,7 @@
             cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s'" % (
                 data,
                 dest,
-            )  # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'"
+            )
             self.subprocess_popen(cmd)
             self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest])
 
@@ -1071,15 +1036,15 @@
             cmd = ["tabix", "-f", "-p", "bed", dest]
             self.subprocess_check_call(cmd)
 
-    def add_gff(self, data, ext, trackData):
+    def add_gff(self, data, trackData):
+        tId = trackData["label"]
         useuri = trackData["useuri"].lower() == "yes"
         if useuri:
             url = trackData["path"]
         else:
-            url = "%s.%s.gz" % (trackData["label"], ext)
+            url = tId + ".gz"
             dest = "%s/%s" % (self.outdir, url)
             self._sort_gff(data, dest)
-        tId = trackData["label"]
         categ = trackData["category"]
         trackDict = {
             "type": "FeatureTrack",
@@ -1123,7 +1088,7 @@
         if useuri:
             url = data
         else:
-            url = "%s.%s.gz" % (trackData["label"], ext)
+            url = tId
             dest = "%s/%s" % (self.outdir, url)
             self._sort_bed(data, dest)
         trackDict = {
@@ -1168,37 +1133,41 @@
     def add_paf(self, data, trackData, pafOpts, **kwargs):
         tname = trackData["name"]
         tId = trackData["label"]
-        url = "%s.paf" % tId
+        url = tId
         useuri = data.startswith("http://") or data.startswith("https://")
         if not useuri:
             dest = "%s/%s" % (self.outdir, url)
             self.symlink_or_copy(os.path.realpath(data), dest)
+            nrow = self.getNrow(dest)
         else:
             url = data
+            nrow = self.getNrow(url)
         categ = trackData["category"]
-        pgnames = [
-            x.strip() for x in pafOpts["genome_label"].split(",") if len(x.strip()) > 0
-        ]
+        pgnames = [x.strip() for x in pafOpts["genome_label"].split(",")]
         pgpaths = [
             x.strip() for x in pafOpts["genome"].split(",") if len(x.strip()) > 0
         ]
         passnames = [trackData["assemblyNames"]]  # always first
+        for i, gp in enumerate(pgpaths):
+            if len(pgnames[i].strip()) == 0:
+                # user may have left it blank - cannot make non-optional if want optional tracks.
+                gn = os.path.basename(gp)
+                pgnames[i] = os.path.splitext(gn)[0]
         logging.debug(
             "### add_paf got pafOpts=%s, pgnames=%s, pgpaths=%s for %s"
             % (pafOpts, pgnames, pgpaths, tId)
         )
-        for i, gname in enumerate(pgnames):
+        for i, gp in enumerate(pgpaths):
+            gname = pgnames[i]
             if len(gname.split()) > 1:
                 gname = gname.split()[0]
             passnames.append(gname)
             # trouble from spacey names in command lines avoidance
-            useuri = pgpaths[i].startswith("http://") or pgpaths[i].startswith(
-                "https://"
-            )
+            useuri = gp.startswith("http://") or gp.startswith("https://")
 
             if gname not in self.genome_names:
                 # ignore if already there - eg for duplicates among pafs.
-                asstrack, first_contig = self.make_assembly(pgpaths[i], gname, useuri)
+                asstrack, first_contig = self.make_assembly(gp, gname, useuri)
                 self.genome_names.append(gname)
                 self.tracksToAdd[gname] = []
                 self.assemblies.append(asstrack)
@@ -1229,26 +1198,37 @@
                     "displayId": "%s-LinearComparativeDisplay" % tId,
                 },
                 {
-                    "type": "LinearSyntenyDisplay",
+                    "type": "LinearBasicDisplay",
                     "displayId": "%s-LinearSyntenyDisplay" % tId,
                 },
             ],
         }
-        style_json = {
-            "displays": [
-                {
-                    "type": "LGVSyntenyDisplay",
-                    "displayId": "%s-LGVSyntenyDisplay" % tId,
-                }
-            ]
-        }
+        if nrow > 50000:
+            style_json = {
+                "displays": [
+                    {
+                        "type": "LGVSyntenyDisplay",
+                        "displayId": "%s-LGVSyntenyDisplay" % tId,
+                    }
+                ]
+            }
+        else:
+            style_json = {
+                "displays": [
+                    {
+                        "type": "LinearBasicDisplay",
+                        "displayId": "%s-LinearBasicDisplay" % tId,
+                    }
+                ]
+            }
+
         trackDict["style"] = style_json
         self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
         self.trackIdlist.append(tId)
 
     def process_annotations(self, track):
         category = track["category"].replace("__pd__date__pd__", TODAY)
-        for i, (
+        for trackIndex, (
             dataset_path,
             dataset_ext,
             useuri,
@@ -1264,7 +1244,6 @@
                 "category": category,
                 "style": {},
             }
-
             outputTrackConfig["assemblyNames"] = track["assemblyNames"]
             outputTrackConfig["key"] = track_human_label
             outputTrackConfig["useuri"] = useuri
@@ -1272,18 +1251,18 @@
             outputTrackConfig["ext"] = dataset_ext
 
             outputTrackConfig["trackset"] = track.get("trackset", {})
-            outputTrackConfig["label"] = "%s_%i_%s" % (
+            outputTrackConfig["label"] = "%s_%d.%s" % (
+                track_human_label,
+                self.trackCounter,
                 dataset_ext,
-                i,
-                track_human_label,
             )
+            self.trackCounter += 1
             outputTrackConfig["metadata"] = extra_metadata
             outputTrackConfig["name"] = track_human_label
 
             if dataset_ext in ("gff", "gff3"):
                 self.add_gff(
                     dataset_path,
-                    dataset_ext,
                     outputTrackConfig,
                 )
             elif dataset_ext in ("hic", "juicebox_hic"):
@@ -1292,7 +1271,7 @@
                     outputTrackConfig,
                 )
             elif dataset_ext in ("cool", "mcool", "scool"):
-                hic_url = "%s_%d.hic" % (track_human_label, i)
+                hic_url = outputTrackConfig["label"]
                 hic_path = os.path.join(self.outdir, hic_url)
                 self.subprocess_check_call(
                     [
@@ -1388,9 +1367,9 @@
                         % (default_data, tId)
                     )
                     style_data = {"type": "LinearBasicDisplay"}
-                if "displays" in track_conf:
-                    disp = track_conf["displays"][0]["type"]
-                    style_data["type"] = disp
+                    if "displays" in track_conf:
+                        disp = track_conf["displays"][0]["type"]
+                        style_data["type"] = disp
                 if track_conf.get("style_labels", None):
                     # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
                     # TODO move this to per track displays?
@@ -1418,7 +1397,6 @@
                     [gnome, refName, end] = first[0]
                     start = 0
                     end = int(end)
-                    refName = self.assmeta[gnome][0].get("genome_firstcontig", None)
                     drdict = {
                         "refName": refName,
                         "start": start,
@@ -1474,13 +1452,20 @@
 
     def add_defsess_to_index(self, data):
         """
-        This was included on request of the new codeowner from Anthony's IUC PR.
-        Now fixed to deal with each assembly and tracks separately.
-        Originally used only the first assembly, putting all tracks there and
-        generally falling apart when tested with 2 or more. Seems ironic that
-        this vital feature was never tested given the rejection of my original IUC PR
-        because it was not there. And no, reviewer, I do not want this important piece of history
-        removed. I prefer that it remain here since it has caused me considerable discomfort.
+        Included on request of the new codeowner, from Anthony's IUC PR.
+        Had to be fixed to keep each assembly with the associated tracks for a default view.
+        Originally used only the first assembly, putting all tracks there and so breaking some
+        when tested with 2 or more. Seems ironic that this vital feature could not have ever been tested
+        given that my declining to add it was the basis for a reviewer's rejection of my original IUC PR.
+        A simple 2 line diff apparently.
+
+        The technical problem is that this index.html hack breaks the promise of all the form fields
+        for track controls such as visibility default that were working mostly. They need to be removed from the form by whoever
+        thought this method was a good solution to the JB2 bug breaking config.json style default
+        view coordinates.
+
+        And no, dear reviewer of this code, please leave this piece of history.
+        It is true and I prefer that it remain here to document my considerable discomfort at this unfair treatment.
 
          ----------------------------------------------------------
         Add some default session settings: set some assemblies/tracks on/off
@@ -1626,7 +1611,6 @@
     jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path)
 
     default_session_data = {}
-
     for ass in root.findall("assembly"):
         genomes = [
             {
@@ -1666,7 +1650,6 @@
             if trackfiles:
                 for x in track.findall("files/trackFile"):
                     track_conf["label"] = x.attrib["label"]
-                    trackkey = track_conf["label"]
                     track_conf["useuri"] = x.attrib["useuri"]
                     if is_multi_bigwig:
                         multi_bigwig_paths.append(
@@ -1766,7 +1749,7 @@
     jc.config_json["assemblies"] = assconf
     logging.debug("assemblies=%s, gnames=%s" % (assconf, jc.genome_names))
     jc.write_config()
-    # jc.add_default_session(default_session_data)
+    jc.add_default_session(default_session_data)
     # note that this can be left in the config.json but has NO EFFECT if add_defsess_to_index is called.
-    jc.add_defsess_to_index(default_session_data)
+    # jc.add_defsess_to_index(default_session_data)
     # jc.text_index() not sure what broke here.