diff jbrowse2/jbrowse2.py @ 6:88b9b105c09b draft

Uploaded
author fubar
date Fri, 05 Jan 2024 01:58:02 +0000
parents 42ca8804cd93
children 234cf4490901
line wrap: on
line diff
--- a/jbrowse2/jbrowse2.py	Thu Jan 04 02:18:18 2024 +0000
+++ b/jbrowse2/jbrowse2.py	Fri Jan 05 01:58:02 2024 +0000
@@ -110,6 +110,7 @@
 class JbrowseConnector(object):
     def __init__(self, jbrowse, outdir, genomes, standalone=None):
         self.debug = False
+        self.usejson = True
         self.giURL = GALAXY_INFRASTRUCTURE_URL
         self.jbrowse = jbrowse
         self.outdir = outdir
@@ -118,20 +119,9 @@
         self.standalone = standalone
         self.trackIdlist = []
         self.tracksToAdd = []
-        self.config_json = {
-            "configuration": {
-                "rpc": {
-                    "defaultDriver": "WebWorkerRpcDriver",
-                    "drivers": {"MainThreadRpcDriver": {}, "WebWorkerRpcDriver": {}},
-                },
-                "logoPath": {"locationType": "UriLocation", "uri": ""},
-            }
-        }
-        self.config_json_file = os.path.join(outdir, "config.json")
-        if standalone == "complete":
-            self.clone_jbrowse(self.jbrowse, self.outdir)
-        elif standalone == "minimal":
-            self.clone_jbrowse(self.jbrowse, self.outdir, minimal=True)
+        self.config_json = {}
+        self.config_json_file = os.path.realpath(os.path.join(outdir, "config.json"))
+        self.clone_jbrowse(self.jbrowse, self.outdir)
 
     def subprocess_check_call(self, command, output=None):
         if output:
@@ -181,44 +171,98 @@
     def process_genomes(self):
         assemblies = []
         for i, genome_node in enumerate(self.genome_paths):
-            log.info("genome_node=%s" % str(genome_node))
-            # We only expect one input genome per run. This for loop is just
-            # easier to write than the alternative / catches any possible
-            # issues.
+            if self.debug:
+                log.info("genome_node=%s" % str(genome_node))
             genome_name = genome_node["meta"]["dataset_dname"]
             dsId = genome_node["meta"]["dataset_id"]
             fapath = genome_node["path"]
-            faname = genome_name + ".fasta"
-            faind = os.path.realpath(os.path.join(self.outdir, faname + ".fai"))
             if self.standalone == "complete":
-                faurl = faname
+                faname = genome_name + ".fa.gz"
                 fadest = os.path.realpath(os.path.join(self.outdir, faname))
-                cmd = ["cp", fapath, fadest]
-                self.subprocess_check_call(cmd)
+                cmd = "bgzip -i -c %s > %s && samtools faidx %s" % (
+                    fapath,
+                    fadest,
+                    fadest,
+                )
+                self.subprocess_popen(cmd)
+                adapter = {
+                    "type": "BgzipFastaAdapter",
+                    "fastaLocation": {
+                        "uri": faname,
+                    },
+                    "faiLocation": {
+                        "uri": faname + ".fai",
+                    },
+                    "gziLocation": {
+                        "uri": faname + ".gzi",
+                    },
+                }
             else:
-                faurl = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId)
-            cmd = ["samtools", "faidx", fapath, "--fai-idx", faind]
-            self.subprocess_check_call(cmd)
+                faurl = "%s/api/datasets/%s/display" % (self.giURL, dsId)
+                faname = genome_name + ".fa.fai"
+                fastalocation = {
+                    "uri": faurl,
+                }
+                failocation = {
+                    "uri": faname,
+                }
+                adapter = {
+                    "type": "IndexedFastaAdapter",
+                    "fastaLocation": fastalocation,
+                    "faiLocation": failocation,
+                }
+
+                cmd = ["samtools", "faidx", fapath, "--fai-idx", faname]
+                self.subprocess_check_call(cmd)
             trackDict = {
                 "name": genome_name,
                 "sequence": {
                     "type": "ReferenceSequenceTrack",
                     "trackId": genome_name,
-                    "adapter": {
-                        "type": "IndexedFastaAdapter",
-                        "fastaLocation": {"uri": faurl, "locationType": "UriLocation"},
-                        "faiLocation": {
-                            "uri": faname + ".fai",
-                            "locationType": "UriLocation",
-                        },
-                    },
+                    "adapter": adapter,
                 },
+                "rendering": {"type": "DivSequenceRenderer"},
             }
             assemblies.append(trackDict)
-        self.config_json["assemblies"] = assemblies
         self.genome_name = genome_name
-        self.genome_path = faurl
-        self.genome_fai_path = faname + ".fai"
+        if self.usejson:
+            self.config_json["assemblies"] = assemblies
+        else:
+            if self.standalone == "complete":
+                cmd = [
+                    "jbrowse",
+                    "add-assembly",
+                    faname,
+                    "-t",
+                    "bgzipFasta",
+                    "-n",
+                    genome_name,
+                    "--load",
+                    "inPlace",
+                    "--faiLocation",
+                    faname + ".fai",
+                    "--gziLocation",
+                    faname + ".gzi",
+                    "--target",
+                    self.outdir,
+                ]
+            else:
+                cmd = [
+                    "jbrowse",
+                    "add-assembly",
+                    faname,
+                    "-t",
+                    "indexedFasta",
+                    "-n",
+                    genome_name,
+                    "--load",
+                    "inPlace",
+                    "--faiLocation",
+                    faname + ".fai",
+                    "--target",
+                    self.outdir,
+                ]
+            self.subprocess_check_call(cmd)
 
     def add_default_view(self):
         cmd = [
@@ -229,10 +273,14 @@
             "-t",
             ",".join(self.trackIdlist),
             "-n",
-            "Default",
+            "JBrowse2 in Galaxy",
             "--target",
-            self.outdir,
-        ]  #
+            self.config_json_file,
+            "-v",
+            " LinearGenomeView",
+        ]
+        if True or self.debug:
+            log.info("### calling set-default-session with cmd=%s" % "  ".join(cmd))
         self.subprocess_check_call(cmd)
 
     def write_config(self):
@@ -268,8 +316,14 @@
             url = hname
             cmd = ["cp", data, dest]
             self.subprocess_check_call(cmd)
+            floc = {
+                "uri": hname,
+            }
         else:
             url = "%s/api/datasets/%s/display?to_ext=hic" % (self.giURL, dsId)
+            floc = {
+                "uri": url,
+            }
         trackDict = {
             "type": "HicTrack",
             "trackId": tId,
@@ -277,11 +331,29 @@
             "assemblyNames": [self.genome_name],
             "adapter": {
                 "type": "HicAdapter",
-                "hicLocation": {"uri": url, "locationType": "UriLocation"},
+                "hicLocation": floc,
             },
         }
-        self.tracksToAdd.append(trackDict)
-        self.trackIdlist.append(tId)
+        if self.usejson:
+            self.tracksToAdd.append(trackDict)
+            self.trackIdlist.append(tId)
+        else:
+            cmd = [
+                "jbrowse",
+                "add-track",
+                url,
+                "-t",
+                "HicTrack",
+                "-a",
+                self.genome_name,
+                "-n",
+                hname,
+                "--load",
+                "inPlace",
+                "--target",
+                self.outdir,
+            ]
+            self.subprocess_check_call(cmd)
 
     def add_maf(self, data, trackData):
         """
@@ -333,9 +405,13 @@
             "adapter": {
                 "type": "MafTabixAdapter",
                 "samples": samples,
-                "bedGzLocation": {"uri": fname + ".sorted.bed.gz"},
+                "bedGzLocation": {
+                    "uri": fname + ".sorted.bed.gz",
+                },
                 "index": {
-                    "location": {"uri": fname + ".sorted.bed.gz.tbi"},
+                    "location": {
+                        "uri": fname + ".sorted.bed.gz.tbi",
+                    },
                 },
             },
             "assemblyNames": [self.genome_name],
@@ -390,9 +466,13 @@
             "assemblyNames": [self.genome_name],
             "adapter": {
                 "type": "Gff3TabixAdapter",
-                "gffGzLocation": {"locationType": "UriLocation", "uri": url},
+                "gffGzLocation": {
+                    "uri": url,
+                },
                 "index": {
-                    "location": {"locationType": "UriLocation", "uri": url + ".tbi"}
+                    "location": {
+                        "uri": url + ".tbi",
+                    }
                 },
             },
             "displays": [
@@ -403,31 +483,52 @@
                 {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},
             ],
         }
-        self.tracksToAdd.append(trackDict)
-        self.trackIdlist.append(tId)
+        if self.usejson:
+            self.tracksToAdd.append(trackDict)
+            self.trackIdlist.append(tId)
+        else:
+            cmd = [
+                "jbrowse",
+                "add-track",
+                url,
+                "-t",
+                "FeatureTrack",
+                "-a",
+                self.genome_name,
+                "--indexFile",
+                url + ".tbi",
+                "-n",
+                trackData["name"],
+                "--load",
+                "inPlace",
+                "--target",
+                self.outdir,
+            ]
+            self.subprocess_check_call(cmd)
         os.unlink(gff3)
 
     def add_bigwig(self, data, trackData):
-        fname = trackData["name"]
+        url = "%s.bw" % trackData["name"]
         if self.standalone == "complete":
-            dest = os.path.realpath(os.path.join(self.outdir, fname))
-            url = fname
+            dest = os.path.realpath(os.path.join(self.outdir, url))
             cmd = ["cp", data, dest]
             self.subprocess_check_call(cmd)
+            bwloc = {"uri": url}
         else:
             dsId = trackData["metadata"]["dataset_id"]
             url = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId)
+            bwloc = {"uri": url}
         tId = trackData["label"]
         trackDict = {
             "type": "QuantitativeTrack",
             "trackId": tId,
-            "name": fname,
+            "name": url,
             "assemblyNames": [
                 self.genome_name,
             ],
             "adapter": {
                 "type": "BigWigAdapter",
-                "bigWigLocation": {"locationType": "UriLocation", "uri": url},
+                "bigWigLocation": bwloc,
             },
             "displays": [
                 {
@@ -436,19 +537,40 @@
                 }
             ],
         }
-        self.tracksToAdd.append(trackDict)
-        self.trackIdlist.append(tId)
+        if self.usejson:
+            self.tracksToAdd.append(trackDict)
+            self.trackIdlist.append(tId)
+        else:
+            cmd = [
+                "jbrowse",
+                "add-track",
+                url,
+                "-t",
+                "QuantitativeTrack",
+                "-a",
+                self.genome_name,
+                "-n",
+                trackData["name"],
+                "--load",
+                "inPlace",
+                "--target",
+                self.outdir,
+            ]
+            self.subprocess_check_call(cmd)
 
     def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
         tId = trackData["label"]
         fname = "%s.bam" % trackData["label"]
         dest = os.path.realpath("%s/%s" % (self.outdir, fname))
-        if self.standalone == "minimal":
+        if self.standalone == "complete":
+            url = fname
+            self.subprocess_check_call(["cp", data, dest])
+            log.info("### copied %s to %s" % (data, dest))
+            bloc = {"uri": url}
+        else:
             dsId = trackData["metadata"]["dataset_id"]
             url = "%s/api/datasets/%s/display?to_ext=bam" % (self.giURL, dsId)
-        else:
-            url = fname
-            self.symlink_or_copy(data, dest)
+            bloc = {"uri": url}
         if bam_index is not None and os.path.exists(os.path.realpath(bam_index)):
             # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
             self.subprocess_check_call(
@@ -470,29 +592,36 @@
             "assemblyNames": [self.genome_name],
             "adapter": {
                 "type": "BamAdapter",
-                "bamLocation": {"locationType": "UriLocation", "uri": url},
+                "bamLocation": bloc,
                 "index": {
-                    "location": {"locationType": "UriLocation", "uri": fname + ".bai"}
-                },
-                "sequenceAdapter": {
-                    "type": "IndexedFastaAdapter",
-                    "fastaLocation": {
-                        "locationType": "UriLocation",
-                        "uri": self.genome_path,
-                    },
-                    "faiLocation": {
-                        "locationType": "UriLocation",
-                        "uri": self.genome_fai_path,
-                    },
-                    "metadataLocation": {
-                        "locationType": "UriLocation",
-                        "uri": "/path/to/fa.metadata.yaml",
-                    },
+                    "location": {
+                        "uri": fname + ".bai",
+                    }
                 },
             },
         }
-        self.tracksToAdd.append(trackDict)
-        self.trackIdlist.append(tId)
+        if self.usejson:
+            self.tracksToAdd.append(trackDict)
+            self.trackIdlist.append(tId)
+        else:
+            cmd = [
+                "jbrowse",
+                "add-track",
+                fname,
+                "-t",
+                "AlignmentsTrack",
+                "-l",
+                "inPlace",
+                "-a",
+                self.genome_name,
+                "--indexFile",
+                fname + ".bai",
+                "-n",
+                trackData["name"],
+                "--target",
+                self.outdir,
+            ]
+            self.subprocess_check_call(cmd)
 
     def add_vcf(self, data, trackData):
         tId = trackData["label"]
@@ -513,9 +642,13 @@
             "assemblyNames": [self.genome_name],
             "adapter": {
                 "type": "VcfTabixAdapter",
-                "vcfGzLocation": {"uri": url, "locationType": "UriLocation"},
+                "vcfGzLocation": {
+                    "uri": url,
+                },
                 "index": {
-                    "location": {"uri": url + ".tbi", "locationType": "UriLocation"}
+                    "location": {
+                        "uri": url + ".tbi",
+                    }
                 },
             },
             "displays": [
@@ -533,8 +666,28 @@
                 },
             ],
         }
-        self.tracksToAdd.append(trackDict)
-        self.trackIdlist.append(tId)
+        if self.usejson:
+            self.tracksToAdd.append(trackDict)
+            self.trackIdlist.append(tId)
+        else:
+            cmd = [
+                "jbrowse",
+                "add-track",
+                url,
+                "-t",
+                "VariantTrack",
+                "-a",
+                self.genome_name,
+                "--indexFile",
+                url + ".tbi",
+                "-n",
+                trackData["name"],
+                "--load",
+                "inPlace",
+                "--target",
+                self.outdir,
+            ]
+            self.subprocess_check_call(cmd)
 
     def _sort_gff(self, data, dest):
         # Only index if not already done
@@ -567,9 +720,13 @@
             "assemblyNames": [self.genome_name],
             "adapter": {
                 "type": "Gff3TabixAdapter",
-                "gffGzLocation": {"locationType": "UriLocation", "uri": url},
+                "gffGzLocation": {
+                    "uri": url,
+                },
                 "index": {
-                    "location": {"uri": url + ".tbi", "locationType": "UriLocation"}
+                    "location": {
+                        "uri": url + ".tbi",
+                    }
                 },
             },
             "displays": [
@@ -580,8 +737,26 @@
                 {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},
             ],
         }
-        self.tracksToAdd.append(trackDict)
-        self.trackIdlist.append(tId)
+        if self.usejson:
+            self.tracksToAdd.append(trackDict)
+            self.trackIdlist.append(tId)
+        else:
+            cmd = [
+                "jbrowse",
+                "add-track",
+                url,
+                "-t",
+                "FeatureTrack",
+                "-a",
+                self.genome_name,
+                "-n",
+                trackData["name"],
+                "--load",
+                "inPlace",
+                "--target",
+                self.outdir,
+            ]
+            self.subprocess_check_call(cmd)
 
     def add_bed(self, data, ext, trackData):
         url = "%s.%s" % (trackData["label"], ext)
@@ -596,9 +771,13 @@
             "assemblyNames": [self.genome_name],
             "adapter": {
                 "type": "BedTabixAdapter",
-                "bedGzLocation": {"locationType": "UriLocation", "uri": url},
+                "bedGzLocation": {
+                    "uri": url,
+                },
                 "index": {
-                    "location": {"uri": url + ".tbi", "locationType": "UriLocation"}
+                    "location": {
+                        "uri": url + ".tbi",
+                    }
                 },
             },
             "displays": [
@@ -609,8 +788,28 @@
                 {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},
             ],
         }
-        self.tracksToAdd.append(trackDict)
-        self.trackIdlist.append(tId)
+        if self.usejson:
+            self.tracksToAdd.append(trackDict)
+            self.trackIdlist.append(tId)
+        else:
+            cmd = [
+                "jbrowse",
+                "add-track",
+                url,
+                "-t",
+                "FeatureTrack",
+                "-a",
+                self.genome_name,
+                "--indexFile",
+                url + ".tbi",
+                "-n",
+                trackData["name"],
+                "--load",
+                "inPlace",
+                "--target",
+                self.outdir,
+            ]
+            self.subprocess_check_call(cmd)
 
     def process_annotations(self, track):
         category = track["category"].replace("__pd__date__pd__", TODAY)
@@ -713,7 +912,7 @@
             else:
                 log.warn("Do not know how to handle %s", dataset_ext)
 
-    def clone_jbrowse(self, jbrowse_dir, destination, minimal=False):
+    def clone_jbrowse(self, jbrowse_dir, destination):
         """Clone a JBrowse directory into a destination directory."""
         cmd = ["jbrowse", "create", "-f", self.outdir]
         self.subprocess_check_call(cmd)
@@ -842,5 +1041,6 @@
         str(jc.config_json),
     )
     jc.config_json["tracks"] = jc.tracksToAdd
-    jc.write_config()
+    if jc.usejson:
+        jc.write_config()
     jc.add_default_view()