changeset 5:42ca8804cd93 draft

urls in minimal, bloat in complete
author fubar
date Thu, 04 Jan 2024 02:18:18 +0000
parents 52842c3f2dda
children 88b9b105c09b
files jbrowse2/datatypes_conf.xml jbrowse2/jbrowse2.py jbrowse2/jbrowse2.xml
diffstat 3 files changed, 48 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/jbrowse2/datatypes_conf.xml	Wed Jan 03 23:27:23 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-<?xml version="1.0"?>
-<datatypes>
-  <registration >
-    <datatype extension="hic" type="galaxy.datatypes.binary:Binary" subclass="true" display_in_upload="true"/>
-    <datatype extension="tbi" type="galaxy.datatypes.binary:Binary" subclass="true" display_in_upload="true"/>
-  </registration>
-</datatypes>
--- a/jbrowse2/jbrowse2.py	Wed Jan 03 23:27:23 2024 +0000
+++ b/jbrowse2/jbrowse2.py	Thu Jan 04 02:18:18 2024 +0000
@@ -187,10 +187,16 @@
             # issues.
             genome_name = genome_node["meta"]["dataset_dname"]
             dsId = genome_node["meta"]["dataset_id"]
+            fapath = genome_node["path"]
             faname = genome_name + ".fasta"
-            faurl = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId)
-            fapath = genome_node["path"]
             faind = os.path.realpath(os.path.join(self.outdir, faname + ".fai"))
+            if self.standalone == "complete":
+                faurl = faname
+                fadest = os.path.realpath(os.path.join(self.outdir, faname))
+                cmd = ["cp", fapath, fadest]
+                self.subprocess_check_call(cmd)
+            else:
+                faurl = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId)
             cmd = ["samtools", "faidx", fapath, "--fai-idx", faind]
             self.subprocess_check_call(cmd)
             trackDict = {
@@ -251,14 +257,23 @@
         """
         log.info("#### trackData=%s" % trackData)
         tId = trackData["label"]
+        dsId = trackData["metadata"]["dataset_id"]
         url = "%s/api/datasets/%s/display?to_ext=hic " % (
             self.giURL,
-            trackData["metadata"]["dataset_id"],
+            dsId,
         )
+        hname = trackData["name"]
+        if self.standalone == "complete":
+            dest = os.path.realpath(os.path.join(self.outdir, hname))
+            url = hname
+            cmd = ["cp", data, dest]
+            self.subprocess_check_call(cmd)
+        else:
+            url = "%s/api/datasets/%s/display?to_ext=hic" % (self.giURL, dsId)
         trackDict = {
             "type": "HicTrack",
             "trackId": tId,
-            "name": trackData["name"],
+            "name": hname,
             "assemblyNames": [self.genome_name],
             "adapter": {
                 "type": "HicAdapter",
@@ -393,15 +408,20 @@
         os.unlink(gff3)
 
     def add_bigwig(self, data, trackData):
-        url = "%s/api/datasets/%s/display" % (
-            self.giURL,
-            trackData["metadata"]["dataset_id"],
-        )
+        fname = trackData["name"]
+        if self.standalone == "complete":
+            dest = os.path.realpath(os.path.join(self.outdir, fname))
+            url = fname
+            cmd = ["cp", data, dest]
+            self.subprocess_check_call(cmd)
+        else:
+            dsId = trackData["metadata"]["dataset_id"]
+            url = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId)
         tId = trackData["label"]
         trackDict = {
             "type": "QuantitativeTrack",
             "trackId": tId,
-            "name": trackData["name"],
+            "name": fname,
             "assemblyNames": [
                 self.genome_name,
             ],
@@ -421,9 +441,14 @@
 
     def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
         tId = trackData["label"]
-        url = "%s.bam" % trackData["label"]
-        dest = os.path.realpath("%s/%s" % (self.outdir, url))
-        self.symlink_or_copy(os.path.realpath(data), dest)
+        fname = "%s.bam" % trackData["label"]
+        dest = os.path.realpath("%s/%s" % (self.outdir, fname))
+        if self.standalone == "minimal":
+            dsId = trackData["metadata"]["dataset_id"]
+            url = "%s/api/datasets/%s/display?to_ext=bam" % (self.giURL, dsId)
+        else:
+            url = fname
+            self.symlink_or_copy(data, dest)
         if bam_index is not None and os.path.exists(os.path.realpath(bam_index)):
             # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
             self.subprocess_check_call(
@@ -447,7 +472,7 @@
                 "type": "BamAdapter",
                 "bamLocation": {"locationType": "UriLocation", "uri": url},
                 "index": {
-                    "location": {"locationType": "UriLocation", "uri": url + ".bai"}
+                    "location": {"locationType": "UriLocation", "uri": fname + ".bai"}
                 },
                 "sequenceAdapter": {
                     "type": "IndexedFastaAdapter",
@@ -475,7 +500,6 @@
             self.giURL,
             trackData["metadata"]["dataset_id"],
         )
-
         url = "%s.vcf.gz" % tId
         dest = os.path.realpath("%s/%s" % (self.outdir, url))
         cmd = "bgzip -c %s  > %s" % (data, dest)
@@ -525,12 +549,10 @@
     def _sort_bed(self, data, dest):
         # Only index if not already done
         if not os.path.exists(dest):
-            cmd = ["sort", "-k1,1", "-k2,2n", data]
-            with open(dest, "w") as handle:
-                self.subprocess_check_call(cmd, output=handle)
-
-            self.subprocess_check_call(["bgzip", "-f", dest])
-            self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"])
+            cmd = "sort -k1,1 -k2,2n %s | bgzip -c > %s" % (data, dest)
+            self.subprocess_popen(cmd)
+            cmd = ["tabix", "-f", "-p", "bed", dest]
+            self.subprocess_check_call(cmd)
 
     def add_gff(self, data, ext, trackData):
         url = "%s.%s" % (trackData["label"], ext)
@@ -563,7 +585,7 @@
 
     def add_bed(self, data, ext, trackData):
         url = "%s.%s" % (trackData["label"], ext)
-        dest = os.path.realpath("%s/%s" % (self.outdir, url))
+        dest = os.path.realpath("%s/%s.gz" % (self.outdir, url))
         self._sort_bed(data, dest)
         tId = trackData["label"]
         url = url + ".gz"
--- a/jbrowse2/jbrowse2.xml	Wed Jan 03 23:27:23 2024 +0000
+++ b/jbrowse2/jbrowse2.xml	Thu Jan 04 02:18:18 2024 +0000
@@ -212,10 +212,11 @@
             </when>
         </conditional>
 
-        <param name="standalone" label="Output JBrowse" type="select">
-            <option value="complete">Complete, for viewing and further local development (JBrowse, tools, documentation, etc.)
+        <param name="standalone" label="Include all reference and track data in the JBrowse2 object" type="select"
+              help="Default is efficient but will not work offline. Including reference sequences, tracks and indexes will allow standalone viewing, at the cost of copying and moving all data" >
+            <option value="complete">Complete: Choose ONLY if need to view offline, or if history cannot be published. WARNING: produces bloated downloads storing redundant copies of all data!
             </option>
-            <option value="minimal" selected="true">Minimal for viewing (Documentation removed)
+            <option value="minimal" selected="true">Sufficient: Uses URLs for Galaxy data. Requires internet access and a published history to download, share and view remotely.
             </option>
         </param>
 
@@ -315,7 +316,7 @@
         <param type="hidden" name="uglyTestingHack" value="" />
     </inputs>
     <outputs>
-        <data format="html" name="output" label="JBrowse on $on_string - $standalone"/>
+        <data format="html" name="output" label="JBrowse2 on $reference_genome.genome.element_identifier - $standalone"/>
     </outputs>
     <tests>
         <test>