diff jbrowse2.py @ 23:39b717d934a8 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit be2268f4c11d54bdd44789dd88dd9017cad27887-dirty
author fubar
date Sat, 03 Feb 2024 10:17:27 +0000
parents bde6b1d09f7d
children fb6cc7bc24df
line wrap: on
line diff
--- a/jbrowse2.py	Thu Feb 01 01:58:58 2024 +0000
+++ b/jbrowse2.py	Sat Feb 03 10:17:27 2024 +0000
@@ -231,9 +231,7 @@
         elif "scaling" in track:
             if track["scaling"]["method"] == "ignore":
                 if track["scaling"]["scheme"]["color"] != "__auto__":
-                    trackConfig["style"]["color"] = track["scaling"]["scheme"][
-                        "color"
-                    ]
+                    trackConfig["style"]["color"] = track["scaling"]["scheme"]["color"]
                 else:
                     trackConfig["style"]["color"] = self.hex_from_rgb(
                         *self._get_colours()
@@ -260,18 +258,13 @@
                             "blue": blue,
                         }
                     )
-                    trackConfig["style"]["color"] = color_function.replace(
-                        "\n", ""
-                    )
+                    trackConfig["style"]["color"] = color_function.replace("\n", "")
                 elif trackFormat == "gene_calls":
                     # Default values, based on GFF3 spec
                     min_val = 0
                     max_val = 1000
                     # Get min/max and build a scoring function since JBrowse doesn't
-                    if (
-                        scales["type"] == "automatic"
-                        or scales["type"] == "__auto__"
-                    ):
+                    if scales["type"] == "automatic" or scales["type"] == "__auto__":
                         min_val, max_val = self.min_max_gff(gff3)
                     else:
                         min_val = scales.get("min", 0)
@@ -279,9 +272,7 @@
 
                     if scheme["color"] == "__auto__":
                         user_color = "undefined"
-                        auto_color = "'%s'" % self.hex_from_rgb(
-                            *self._get_colours()
-                        )
+                        auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
                     elif scheme["color"].startswith("#"):
                         user_color = "'%s'" % self.hex_from_rgb(
                             *self.rgb_from_hex(scheme["color"][1:])
@@ -289,9 +280,7 @@
                         auto_color = "undefined"
                     else:
                         user_color = "undefined"
-                        auto_color = "'%s'" % self.hex_from_rgb(
-                            *self._get_colours()
-                        )
+                        auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
 
                     color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(
                         **{
@@ -303,9 +292,7 @@
                         }
                     )
 
-                    trackConfig["style"]["color"] = color_function.replace(
-                        "\n", ""
-                    )
+                    trackConfig["style"]["color"] = color_function.replace("\n", "")
         return trackConfig
 
 
@@ -381,7 +368,7 @@
             galaxy=GALAXY_INFRASTRUCTURE_URL,
             encoded_id=metadata.get("dataset_id", ""),
             tool_id=metadata.get("tool_tool_id", ""),
-            tool_version=metadata.get("tool_tool_version",""),
+            tool_version=metadata.get("tool_tool_version", ""),
         )
     return metadata
 
@@ -402,9 +389,7 @@
 
     def subprocess_check_call(self, command, output=None):
         if output:
-            log.debug(
-                "cd %s && %s >  %s", self.outdir, " ".join(command), output
-            )
+            log.debug("cd %s && %s >  %s", self.outdir, " ".join(command), output)
             subprocess.check_call(command, cwd=self.outdir, stdout=output)
         else:
             log.debug("cd %s && %s", self.outdir, " ".join(command))
@@ -615,7 +600,7 @@
             "plugins": [
                 {
                     "name": "MafViewer",
-                    "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js"
+                    "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js",
                 }
             ]
         }
@@ -634,9 +619,7 @@
         self.subprocess_check_call(cmd)
         # Construct samples list
         # We could get this from galaxy metadata, not sure how easily.
-        ps = subprocess.Popen(
-            ["grep", "^s [^ ]*", "-o", data], stdout=subprocess.PIPE
-        )
+        ps = subprocess.Popen(["grep", "^s [^ ]*", "-o", data], stdout=subprocess.PIPE)
         output = subprocess.check_output(("sort", "-u"), stdin=ps.stdout)
         ps.wait()
         outp = output.decode("ascii")
@@ -796,9 +779,7 @@
         url = fname
         self.subprocess_check_call(["cp", data, dest])
         bloc = {"uri": url}
-        if bam_index is not None and os.path.exists(
-            os.path.realpath(bam_index)
-        ):
+        if bam_index is not None and os.path.exists(os.path.realpath(bam_index)):
             # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
             self.subprocess_check_call(
                 ["cp", os.path.realpath(bam_index), dest + ".bai"]
@@ -809,9 +790,7 @@
             #      => no index generated by galaxy, but there might be one next to the symlink target
             #      this trick allows to skip the bam sorting made by galaxy if already done outside
             if os.path.exists(os.path.realpath(data) + ".bai"):
-                self.symlink_or_copy(
-                    os.path.realpath(data) + ".bai", dest + ".bai"
-                )
+                self.symlink_or_copy(os.path.realpath(data) + ".bai", dest + ".bai")
             else:
                 log.warn("Could not find a bam index (.bai file) for %s", data)
         trackDict = {
@@ -847,9 +826,7 @@
         url = fname
         self.subprocess_check_call(["cp", data, dest])
         bloc = {"uri": url}
-        if cram_index is not None and os.path.exists(
-            os.path.realpath(cram_index)
-        ):
+        if cram_index is not None and os.path.exists(os.path.realpath(cram_index)):
             # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
             self.subprocess_check_call(
                 ["cp", os.path.realpath(cram_index), dest + ".crai"]
@@ -860,13 +837,9 @@
             #      => no index generated by galaxy, but there might be one next to the symlink target
             #      this trick allows to skip the bam sorting made by galaxy if already done outside
             if os.path.exists(os.path.realpath(data) + ".crai"):
-                self.symlink_or_copy(
-                    os.path.realpath(data) + ".crai", dest + ".crai"
-                )
+                self.symlink_or_copy(os.path.realpath(data) + ".crai", dest + ".crai")
             else:
-                log.warn(
-                    "Could not find a cram index (.crai file) for %s", data
-                )
+                log.warn("Could not find a cram index (.crai file) for %s", data)
         trackDict = {
             "type": "AlignmentsTrack",
             "trackId": tId,
@@ -875,9 +848,11 @@
             "adapter": {
                 "type": "CramAdapter",
                 "cramLocation": bloc,
-                "craiLocation": {"uri": fname + ".crai",},
+                "craiLocation": {
+                    "uri": fname + ".crai",
+                },
                 "sequenceAdapter": self.genome_sequence_adapter,
-                },
+            },
             "displays": [
                 {
                     "type": "LinearAlignmentsDisplay",
@@ -941,19 +916,17 @@
     def _sort_gff(self, data, dest):
         # Only index if not already done
         if not os.path.exists(dest + ".gz"):
-            cmd = "jbrowse sort-gff %s | bgzip -c > %s.gz" % (
+            cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s.gz'" % (
                 data,
                 dest,
             )  # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'"
             self.subprocess_popen(cmd)
-            self.subprocess_check_call(
-                ["tabix", "-f", "-p", "gff", dest + ".gz"]
-            )
+            self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"])
 
     def _sort_bed(self, data, dest):
         # Only index if not already done
         if not os.path.exists(dest):
-            cmd = "sort -k1,1 -k2,2n %s | bgzip -c > %s" % (data, dest)
+            cmd = "sort -k1,1 -k2,2n '%s' | bgzip -c > '%s'" % (data, dest)
             self.subprocess_popen(cmd)
             cmd = ["tabix", "-f", "-p", "bed", dest]
             self.subprocess_check_call(cmd)
@@ -1174,6 +1147,29 @@
                     dataset_path,
                     outputTrackConfig,
                 )
+            elif dataset_ext in ("cool", "mcool", "scool"):
+                hictempd = tempfile.mkdtemp()
+                hic_path = os.path.join(
+                    self.outdir, "%s_%d_%s.hic" % (track_human_label, i, dataset_ext)
+                )
+                self.subprocess_check_call(
+                    [
+                        "hictk",
+                        "convert",
+                        "-f",
+                        "--output-fmt",
+                        "hic",
+                        "--tmpdir",
+                        hictempd,
+                        dataset_path,
+                        hic_path,
+                    ]
+                )
+                self.add_hic(
+                    hic_path,
+                    outputTrackConfig,
+                )
+                shutil.rmtree(hictempd)
             elif dataset_ext in ("bed",):
                 self.add_bed(
                     dataset_path,
@@ -1191,9 +1187,9 @@
                     outputTrackConfig,
                 )
             elif dataset_ext == "bam":
-                real_indexes = track["conf"]["options"]["pileup"][
-                    "bam_indices"
-                ]["bam_index"]
+                real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][
+                    "bam_index"
+                ]
                 if not isinstance(real_indexes, list):
                     real_indexes = [real_indexes]
 
@@ -1204,9 +1200,9 @@
                     bam_index=real_indexes[i],
                 )
             elif dataset_ext == "cram":
-                real_indexes = track["conf"]["options"]["cram"][
-                    "cram_indices"
-                ]["cram_index"]
+                real_indexes = track["conf"]["options"]["cram"]["cram_indices"][
+                    "cram_index"
+                ]
                 if not isinstance(real_indexes, list):
                     real_indexes = [real_indexes]
 
@@ -1282,7 +1278,7 @@
 
         if data.get("defaultLocation", ""):
             ddl = data["defaultLocation"]
-            loc_match = re.search(r"^([^:]+):(\d*)\.*(\d*)$", ddl)
+            loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl)
             if loc_match:
                 refName = loc_match.group(1)
                 drdict["refName"] = refName
@@ -1349,18 +1345,14 @@
             config_json.update(self.config_json)
         config_data = {}
 
-        config_data["disableAnalytics"] = (
-            data.get("analytics", "false") == "true"
-        )
+        config_data["disableAnalytics"] = data.get("analytics", "false") == "true"
 
         config_data["theme"] = {
             "palette": {
                 "primary": {"main": data.get("primary_color", "#0D233F")},
                 "secondary": {"main": data.get("secondary_color", "#721E63")},
                 "tertiary": {"main": data.get("tertiary_color", "#135560")},
-                "quaternary": {
-                    "main": data.get("quaternary_color", "#FFB11D")
-                },
+                "quaternary": {"main": data.get("quaternary_color", "#FFB11D")},
             },
             "typography": {"fontSize": int(data.get("font_size", 10))},
         }
@@ -1414,9 +1406,7 @@
     parser = argparse.ArgumentParser(description="", epilog="")
     parser.add_argument("--xml", help="Track Configuration")
     parser.add_argument("--outdir", help="Output directory", default="out")
-    parser.add_argument(
-        "--version", "-V", action="version", version="%(prog)s 2.0.1"
-    )
+    parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1")
     args = parser.parse_args()
     tree = ET.parse(args.xml)
     root = tree.getroot()
@@ -1512,8 +1502,7 @@
         track_conf["format"] = track.attrib["format"]
         if track.find("options/style"):
             track_conf["style"] = {
-                item.tag: parse_style_conf(item)
-                for item in track.find("options/style")
+                item.tag: parse_style_conf(item) for item in track.find("options/style")
             }
         if track.find("options/style_labels"):
             track_conf["style_labels"] = {
@@ -1526,9 +1515,7 @@
         track_conf["format"] = track.attrib["format"]
         try:
             # Only pertains to gff3 + blastxml. TODO?
-            track_conf["style"] = {
-                t.tag: t.text for t in track.find("options/style")
-            }
+            track_conf["style"] = {t.tag: t.text for t in track.find("options/style")}
         except TypeError:
             track_conf["style"] = {}
             pass
@@ -1559,9 +1546,7 @@
         "primary_color": root.find("metadata/general/primary_color").text,
         "secondary_color": root.find("metadata/general/secondary_color").text,
         "tertiary_color": root.find("metadata/general/tertiary_color").text,
-        "quaternary_color": root.find(
-            "metadata/general/quaternary_color"
-        ).text,
+        "quaternary_color": root.find("metadata/general/quaternary_color").text,
         "font_size": root.find("metadata/general/font_size").text,
     }
     jc.add_general_configuration(general_data)