diff jbrowse2.py @ 89:408781c080fc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 03400b3f71140ce62dba0db266a1f7b94b6c1a11
author fubar
date Wed, 17 Apr 2024 22:18:47 +0000
parents 3b2ff9864995
children 3c4db8203fad
line wrap: on
line diff
--- a/jbrowse2.py	Sun Apr 14 03:49:10 2024 +0000
+++ b/jbrowse2.py	Wed Apr 17 22:18:47 2024 +0000
@@ -21,7 +21,8 @@
 
 JB2VER = "v2.10.3"
 # version pinned if cloning - but not cloning now
-
+logCommands = True
+# useful for seeing what's being written but not for production setups
 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__))
 GALAXY_INFRASTRUCTURE_URL = None
@@ -427,14 +428,19 @@
 
     def subprocess_check_call(self, command, output=None, cwd=True):
         if output:
-            log.debug("cd %s && %s >  %s", self.get_cwd(cwd), " ".join(command), output)
+            if logCommands:
+                log.debug(
+                    "cd %s && %s >  %s", self.get_cwd(cwd), " ".join(command), output
+                )
             subprocess.check_call(command, cwd=self.get_cwd(cwd), stdout=output)
         else:
-            log.debug("cd %s && %s", self.get_cwd(cwd), " ".join(command))
+            if logCommands:
+                log.debug("cd %s && %s", self.get_cwd(cwd), " ".join(command))
             subprocess.check_call(command, cwd=self.get_cwd(cwd))
 
     def subprocess_popen(self, command, cwd=True):
-        log.debug(command)
+        if logCommands:
+            log.debug(command)
         p = subprocess.Popen(
             command,
             cwd=self.get_cwd(cwd),
@@ -452,7 +458,8 @@
             raise RuntimeError("Command failed with exit code %s" % (retcode))
 
     def subprocess_check_output(self, command):
-        log.debug(" ".join(command))
+        if logCommands:
+            log.debug(" ".join(command))
         return subprocess.check_output(command, cwd=self.outdir)
 
     def symlink_or_copy(self, src, dest):
@@ -491,14 +498,14 @@
                 nrow = len(fl)
             except Exception:
                 nrow = 0
-        logging.debug("### getNrow returning %d" % nrow)
+        logging.debug("### getNrow %s returning %d" % (url, nrow))
         return nrow
 
     def process_genomes(self, genomes):
         assembly = []
         assmeta = []
         useuri = False
-        genome_names = []
+        primaryGenome = None
         for i, genome_node in enumerate(genomes):
             this_genome = {}
             if genome_node["useuri"] == "yes":
@@ -509,17 +516,18 @@
             if len(genome_name.split()) > 1:
                 genome_name = genome_name.split()[0]
                 # spaces and cruft break scripts when substituted
-            if genome_name not in genome_names:
-                # pafs with shared references
+            if not primaryGenome:
+                primaryGenome = genome_name
+            if genome_name not in self.genome_names:
+                self.genome_names.append(genome_name)
                 fapath = genome_node["path"]
                 if not useuri:
                     fapath = os.path.realpath(fapath)
                 assem, first_contig = self.make_assembly(fapath, genome_name, useuri)
                 assembly.append(assem)
                 self.ass_first_contigs.append(first_contig)
-                if len(genome_names) == 0:
+                if genome_name == primaryGenome:  # first one
                     this_genome["genome_name"] = genome_name  # first one for all tracks
-                    genome_names.append(genome_name)
                     this_genome["genome_sequence_adapter"] = assem["sequence"][
                         "adapter"
                     ]
@@ -551,10 +559,9 @@
                             )
                 assmeta.append(this_genome)
         self.assemblies += assembly
-        self.assmeta[genome_names[0]] = assmeta
-        self.tracksToAdd[genome_names[0]] = []
-        self.genome_names += genome_names
-        return this_genome["genome_name"]
+        self.assmeta[primaryGenome] = assmeta
+        self.tracksToAdd[primaryGenome] = []
+        return primaryGenome
 
     def make_assembly(self, fapath, gname, useuri):
         """added code to grab the first contig name and length for broken default session from Anthony and Helena's code
@@ -743,10 +750,11 @@
         sampu = list(dict.fromkeys(samp))
         samples = [x.split(".")[0] for x in sampu]
         samples.sort()
-        logging.debug(
-            "$$$$ cmd=%s, mafss=%s samp=%s samples=%s"
-            % (" ".join(cmd), mafss, samp, samples)
-        )
+        if logCommands:
+            logging.debug(
+                "$$$$ cmd=%s, mafss=%s samp=%s samples=%s"
+                % (" ".join(cmd), mafss, samp, samples)
+            )
         trackDict = {
             "type": "MafTrack",
             "trackId": tId,
@@ -1172,34 +1180,28 @@
             url = data
             nrow = self.getNrow(url)
         categ = trackData["category"]
-        pgnames = [x.strip() for x in pafOpts["genome_label"].split(",")]
-        pgpaths = [
-            x.strip() for x in pafOpts["genome"].split(",") if len(x.strip()) > 0
-        ]
+        pg = pafOpts["genome"].split(",")
+        pgc = [x.strip() for x in pg if x.strip() > ""]
+        gnomes = [x.split(":") for x in pgc]
         passnames = [trackData["assemblyNames"]]  # always first
-        for i, gp in enumerate(pgpaths):
-            if len(pgnames[i].strip()) == 0:
-                # user may have left it blank - cannot make non-optional if want optional tracks.
-                gn = os.path.basename(gp)
-                pgnames[i] = os.path.splitext(gn)[0]
-        logging.debug(
-            "### add_paf got pafOpts=%s, pgnames=%s, pgpaths=%s for %s"
-            % (pafOpts, pgnames, pgpaths, tId)
-        )
-        for i, gp in enumerate(pgpaths):
-            gname = pgnames[i]
+        for i, (gpath, gname) in enumerate(gnomes):
+            # may have been forgotten by user for uri
+            if len(gname) == 0:
+                gn = os.path.basename(gpath)
+                gname = os.path.splitext(gn)[0]
+            # trouble from spacey names in command lines avoidance
             if len(gname.split()) > 1:
                 gname = gname.split()[0]
-            passnames.append(gname)
-            # trouble from spacey names in command lines avoidance
-            useuri = gp.startswith("http://") or gp.startswith("https://")
-
+            if gname not in passnames:
+                passnames.append(gname)
+            useuri = gpath.startswith("http://") or gpath.startswith("https://")
             if gname not in self.genome_names:
                 # ignore if already there - eg for duplicates among pafs.
-                asstrack, first_contig = self.make_assembly(gp, gname, useuri)
+                asstrack, first_contig = self.make_assembly(gpath, gname, useuri)
                 self.genome_names.append(gname)
                 self.tracksToAdd[gname] = []
                 self.assemblies.append(asstrack)
+                self.ass_first_contigs.append(first_contig)
         trackDict = {
             "type": "SyntenyTrack",
             "trackId": tId,
@@ -1271,15 +1273,15 @@
             outputTrackConfig["ext"] = dataset_ext
 
             outputTrackConfig["trackset"] = track.get("trackset", {})
-            outputTrackConfig["label"] = "%s_%d.%s" % (
-                track_human_label,
-                self.trackCounter,
-                dataset_ext,
-            )
-            self.trackCounter += 1
+            outputTrackConfig["label"] = track["label"]
             outputTrackConfig["metadata"] = extra_metadata
             outputTrackConfig["name"] = track_human_label
-
+            if track["label"] in self.trackIdlist:
+                logging.error(
+                    "### not adding %s already in %s"
+                    % (track["label"], self.trackIdlist)
+                )
+                yield None
             if dataset_ext in ("gff", "gff3"):
                 self.add_gff(
                     dataset_path,
@@ -1416,9 +1418,6 @@
                 "minimized": False,
                 "tracks": tracks_data,
             }
-            logging.debug(
-                "Looking for %s in self.ass_ %s" % (gnome, self.ass_first_contigs)
-            )
             first = [x for x in self.ass_first_contigs if x[0] == gnome]
             if len(first) > 0:
                 [gnome, refName, end] = first[0]
@@ -1477,6 +1476,8 @@
 
     def add_defsess_to_index(self, data):
         """
+        PROBABLY NOW BROKEN by changes since this was deprecated temporarily as at April 18
+
         Included on request of the new codeowner, from Anthony's IUC PR.
         Had to be fixed to keep each assembly with the associated tracks for a default view.
         Originally used only the first assembly, putting all tracks there and so breaking some
@@ -1626,6 +1627,7 @@
     jc = JbrowseConnector(outdir=args.outdir, jbrowse2path=args.jbrowse2path)
 
     default_session_data = {}
+    trackI = 0
     for ass in root.findall("assembly"):
         genomes = [
             {
@@ -1636,9 +1638,9 @@
             }
             for x in ass.findall("metadata/genomes/genome")
         ]
-        assref_name = jc.process_genomes(genomes)
-        if not default_session_data.get(assref_name, None):
-            default_session_data[assref_name] = {
+        primaryGenome = jc.process_genomes(genomes)
+        if not default_session_data.get(primaryGenome, None):
+            default_session_data[primaryGenome] = {
                 "tracks": [],
                 "style": {},
                 "style_labels": {},
@@ -1650,7 +1652,7 @@
         for track in ass.find("tracks"):
             track_conf = {}
             track_conf["trackfiles"] = []
-            track_conf["assemblyNames"] = assref_name
+            track_conf["assemblyNames"] = primaryGenome
             is_multi_bigwig = False
             try:
                 if track.find("options/wiggle/multibigwig") and (
@@ -1664,13 +1666,14 @@
             trackfiles = track.findall("files/trackFile")
             if trackfiles:
                 for x in trackfiles:
-                    track_conf["label"] = x.attrib["label"]
+                    track_conf["label"] = "%s_%d" % (x.attrib["label"], trackI)
+                    trackI += 1
                     track_conf["useuri"] = x.attrib["useuri"]
                     if is_multi_bigwig:
                         multi_bigwig_paths.append(
                             (
-                                x.attrib["label"],
-                                x.attrib["useuri"],
+                                track_conf["label"],
+                                track_conf["useuri"],
                                 os.path.realpath(x.attrib["path"]),
                             )
                         )
@@ -1685,7 +1688,7 @@
                                     x.attrib["path"],
                                     x.attrib["ext"],
                                     x.attrib["useuri"],
-                                    x.attrib["label"],
+                                    track_conf["label"],
                                     metadata,
                                 )
                             else:
@@ -1693,7 +1696,7 @@
                                     os.path.realpath(x.attrib["path"]),
                                     x.attrib["ext"],
                                     x.attrib["useuri"],
-                                    x.attrib["label"],
+                                    track_conf["label"],
                                     metadata,
                                 )
                             track_conf["trackfiles"].append(tfa)
@@ -1714,14 +1717,13 @@
             track_conf["format"] = track.attrib["format"]
             track_conf["conf"] = etree_to_dict(track.find("options"))
             keys = jc.process_annotations(track_conf)
-
             if keys:
                 for key in keys:
                     vis = track.attrib.get("visibility", "default_off")
                     if not vis:
                         vis = "default_off"
-                    default_session_data[assref_name]["visibility"][vis].append(key)
-                    trakdat = jc.tracksToAdd[assref_name]
+                    default_session_data[primaryGenome]["visibility"][vis].append(key)
+                    trakdat = jc.tracksToAdd[primaryGenome]
                     stile = {}
                     for trak in trakdat:
                         if trak["trackId"] == key:
@@ -1732,21 +1734,20 @@
                             for item in track.find("options/style")
                         }
                         stile.update(supdate)
-                    default_session_data[assref_name]["style"][key] = stile
-                    logging.debug("@@@ for %s got style=%s" % (key, stile))
+                    default_session_data[primaryGenome]["style"][key] = stile
                     if track.find("options/style_labels"):
-                        default_session_data[assref_name]["style_labels"][key] = {
+                        default_session_data[primaryGenome]["style_labels"][key] = {
                             item.tag: parse_style_conf(item)
                             for item in track.find("options/style_labels")
                         }
-                default_session_data[assref_name]["tracks"].append(key)
+                    default_session_data[primaryGenome]["tracks"].append(key)
     default_session_data["defaultLocation"] = root.find(
         "metadata/general/defaultLocation"
     ).text
     default_session_data["session_name"] = root.find(
         "metadata/general/session_name"
     ).text
-    logging.debug("default_session=%s" % (default_session_data))
+    logging.debug("default_session=%s" % (json.dumps(default_session_data, indent=2)))
     jc.zipOut = root.find("metadata/general/zipOut").text == "true"
     general_data = {
         "analytics": root.find("metadata/general/analytics").text,
@@ -1759,12 +1760,28 @@
     jc.add_general_configuration(general_data)
     trackconf = jc.config_json.get("tracks", [])
     for gnome in jc.genome_names:
-        trackconf += jc.tracksToAdd[gnome]
+        gtracks = jc.tracksToAdd[gnome]
+        if len(gtracks) > 0:
+            logging.debug(
+                "for genome %s adding gtracks %s"
+                % (gnome, json.dumps(gtracks, indent=2))
+            )
+            trackconf += gtracks
     jc.config_json["tracks"] = trackconf
     assconf = jc.config_json.get("assemblies", [])
     assconf += jc.assemblies
     jc.config_json["assemblies"] = assconf
-    logging.debug("assemblies=%s, gnames=%s" % (assconf, jc.genome_names))
+    logging.debug(
+        "assmeta=%s, first_contigs=%s, assemblies=%s, gnames=%s, trackidlist=%s, tracks=%s"
+        % (
+            jc.assmeta,
+            jc.ass_first_contigs,
+            json.dumps(assconf, indent=2),
+            jc.genome_names,
+            jc.trackIdlist,
+            json.dumps(trackconf, indent=2),
+        )
+    )
     jc.write_config()
     jc.add_default_session(default_session_data)
     # note that this can be left in the config.json but has NO EFFECT if add_defsess_to_index is called.