diff jbrowse2.py @ 57:94264fe60478 draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 4b5df41484f6bdf316edaf95b53c92d328ec1674-dirty
author fubar
date Thu, 21 Mar 2024 08:01:42 +0000
parents c0097a584a8a
children f807e219cec3
line wrap: on
line diff
--- a/jbrowse2.py	Tue Mar 19 02:33:40 2024 +0000
+++ b/jbrowse2.py	Thu Mar 21 08:01:42 2024 +0000
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+ #!/usr/bin/env python
 # change to accumulating all configuration for config.json based on the default from the clone
 import argparse
 import binascii
@@ -10,7 +10,6 @@
 import shutil
 import struct
 import subprocess
-import sys
 import tempfile
 import urllib.request
 import xml.etree.ElementTree as ET
@@ -480,9 +479,11 @@
                             else:
                                 self.genome_firstcontig = fl
                     else:
-                        fl = urllib.request.urlopen(fapath+".fai").readline()
-                        if fl: # is first row of the text fai so the first contig name
-                            self.genome_firstcontig = fl.decode('utf8').strip().split()[0]
+                        fl = urllib.request.urlopen(fapath + ".fai").readline()
+                        if fl:  # is first row of the text fai so the first contig name
+                            self.genome_firstcontig = (
+                                fl.decode("utf8").strip().split()[0]
+                            )
         if self.config_json.get("assemblies", None):
             self.config_json["assemblies"] += assemblies
         else:
@@ -538,6 +539,16 @@
                 "adapter": adapter,
             },
             "rendering": {"type": "DivSequenceRenderer"},
+            "displays": [
+                {
+                    "type": "LinearReferenceSequenceDisplay",
+                    "displayId": "%s-LinearReferenceSequenceDisplay" % gname,
+                },
+                {
+                    "type": "LinearGCContentDisplay",
+                    "displayId": "%s-LinearGCContentDisplay" % gname,
+                },
+            ],
         }
         return trackDict
 
@@ -604,13 +615,15 @@
             uri = data
         else:
             uri = trackData["hic_url"]
-        categ = trackData['category']
+        categ = trackData["category"]
         trackDict = {
             "type": "HicTrack",
             "trackId": tId,
             "name": uri,
             "assemblyNames": [self.genome_name],
-            "category": [categ,],
+            "category": [
+                categ,
+            ],
             "adapter": {
                 "type": "HicAdapter",
                 "hicLocation": uri,
@@ -622,8 +635,6 @@
                 },
             ],
         }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
         self.tracksToAdd.append(trackDict)
         self.trackIdlist.append(tId)
 
@@ -643,7 +654,7 @@
                 }
             ]
         }
-        categ = trackData['category']
+        categ = trackData["category"]
         fname = "%s.bed" % tId
         dest = "%s/%s" % (self.outdir, fname)
         gname = self.genome_name
@@ -665,11 +676,14 @@
         soutp = outp.split("\n")
         samp = [x.split("s ")[1] for x in soutp if x.startswith("s ")]
         samples = [x.split(".")[0] for x in samp]
+        logging.warn("### maf convert cmd = %s,\nsamples=%s" % (' '.join(cmd), samples))
         trackDict = {
             "type": "MafTrack",
             "trackId": tId,
             "name": trackData["name"],
-            "category": [categ,],
+            "category": [
+                categ,
+            ],
             "adapter": {
                 "type": "MafTabixAdapter",
                 "samples": samples,
@@ -694,8 +708,6 @@
                 },
             ],
         }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
         self.tracksToAdd.append(trackDict)
         self.trackIdlist.append(tId)
         if self.config_json.get("plugins", None):
@@ -717,11 +729,11 @@
         ]
         subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
         gff3_unrebased.close()
+        logging.warn("### blastxml to gff3 cmd = %s" % ' '.join(cmd))
         return gff3_unrebased.name
 
     def add_blastxml(self, data, trackData, blastOpts, **kwargs):
         gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"])
-
         if "parent" in blastOpts and blastOpts["parent"] != "None":
             gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
             cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")]
@@ -729,23 +741,24 @@
                 cmd.append("--protein2dna")
             cmd.extend([os.path.realpath(blastOpts["parent"]), gff3])
             subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
+            logging.warn("### gff3rebase cmd = %s" % ' '.join(cmd))
             gff3_rebased.close()
-
             # Replace original gff3 file
             shutil.copy(gff3_rebased.name, gff3)
             os.unlink(gff3_rebased.name)
-        url = "%s.gff3" % trackData["label"]
+        url = "%s.gff3.gz" % trackData["label"]
         dest = "%s/%s" % (self.outdir, url)
         self._sort_gff(gff3, dest)
-        url = url + ".gz"
         tId = trackData["label"]
-        categ = trackData['category']
+        categ = trackData["category"]
         trackDict = {
             "type": "FeatureTrack",
             "trackId": tId,
             "name": trackData["name"],
             "assemblyNames": [self.genome_name],
-            "category": [categ,],
+            "category": [
+                categ,
+            ],
             "adapter": {
                 "type": "Gff3TabixAdapter",
                 "gffGzLocation": {
@@ -768,8 +781,6 @@
                 },
             ],
         }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
         self.tracksToAdd.append(trackDict)
         self.trackIdlist.append(tId)
         os.unlink(gff3)
@@ -794,12 +805,14 @@
             self.subprocess_check_call(cmd)
         bwloc = {"uri": url}
         tId = trackData["label"]
-        categ = trackData['category']
+        categ = trackData["category"]
         trackDict = {
             "type": "QuantitativeTrack",
             "trackId": tId,
             "name": trackData["name"],
-            "category": [categ,],
+            "category": [
+                categ,
+            ],
             "assemblyNames": [
                 self.genome_name,
             ],
@@ -814,8 +827,6 @@
                 }
             ],
         }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
         self.tracksToAdd.append(trackDict)
         self.trackIdlist.append(tId)
 
@@ -823,21 +834,19 @@
         tId = trackData["label"]
         useuri = trackData["useuri"].lower() == "yes"
         bindex = bam_index
-        categ = trackData['category']
+        categ = trackData["category"]
         if useuri:
             url = data
         else:
             fname = "%s.bam" % trackData["label"]
             dest = "%s/%s" % (self.outdir, fname)
             url = fname
-            bindex = fname + '.bai'
+            bindex = fname + ".bai"
             self.subprocess_check_call(["cp", data, dest])
             if bam_index is not None and os.path.exists(bam_index):
                 if not os.path.exists(bindex):
                     # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
-                    self.subprocess_check_call(
-                        ["cp", bam_index, bindex]
-                    )
+                    self.subprocess_check_call(["cp", bam_index, bindex])
                 else:
                     # Can happen in exotic condition
                     # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
@@ -851,7 +860,9 @@
             "type": "AlignmentsTrack",
             "trackId": tId,
             "name": trackData["name"],
-            "category": [categ,],
+            "category": [
+                categ,
+            ],
             "assemblyNames": [self.genome_name],
             "adapter": {
                 "type": "BamAdapter",
@@ -869,14 +880,12 @@
                 },
             ],
         }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
         self.tracksToAdd.append(trackDict)
         self.trackIdlist.append(tId)
 
     def add_cram(self, data, trackData, cram_index=None, **kwargs):
         tId = trackData["label"]
-        categ = trackData['category']
+        categ = trackData["category"]
         useuri = trackData["useuri"].lower() == "yes"
         if useuri:
             url = data
@@ -886,27 +895,29 @@
             url = fname
             self.subprocess_check_call(["cp", data, dest])
             if cram_index is not None and os.path.exists(cram_index):
-                if not os.path.exists(dest+'.crai'):
+                if not os.path.exists(dest + ".crai"):
                     # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
                     self.subprocess_check_call(
                         ["cp", os.path.realpath(cram_index), dest + ".crai"]
                     )
             else:
-                cpath = os.path.realpath(dest) + '.crai'
+                cpath = os.path.realpath(dest) + ".crai"
                 cmd = ["samtools", "index", "-c", "-o", cpath, os.path.realpath(dest)]
-                logging.debug('executing cmd %s' % ' '.join(cmd))
+                logging.debug("executing cmd %s" % " ".join(cmd))
                 self.subprocess_check_call(cmd)
         trackDict = {
             "type": "AlignmentsTrack",
             "trackId": tId,
             "name": trackData["name"],
-            "category": [categ,],
+            "category": [
+                categ,
+            ],
             "assemblyNames": [self.genome_name],
             "adapter": {
                 "type": "CramAdapter",
                 "cramLocation": {"uri": url},
                 "craiLocation": {
-                    "uri": url + '.crai',
+                    "uri": url + ".crai",
                 },
                 "sequenceAdapter": self.genome_sequence_adapter,
             },
@@ -917,8 +928,6 @@
                 },
             ],
         }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
         self.tracksToAdd.append(trackDict)
         self.trackIdlist.append(tId)
 
@@ -928,7 +937,7 @@
         # self.giURL,
         # trackData["metadata"]["dataset_id"],
         # )
-        categ = trackData['category']
+        categ = trackData["category"]
         useuri = trackData["useuri"].lower() == "yes"
         if useuri:
             url = data
@@ -944,12 +953,12 @@
             "trackId": tId,
             "name": trackData["name"],
             "assemblyNames": [self.genome_name],
-            "category": [categ,],
+            "category": [
+                categ,
+            ],
             "adapter": {
                 "type": "VcfTabixAdapter",
-                "vcfGzLocation": {
-                    "uri": url
-                },
+                "vcfGzLocation": {"uri": url},
                 "index": {
                     "location": {
                         "uri": url + ".tbi",
@@ -971,8 +980,6 @@
                 },
             ],
         }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
         self.tracksToAdd.append(trackDict)
         self.trackIdlist.append(tId)
 
@@ -1003,13 +1010,15 @@
             dest = "%s/%s" % (self.outdir, url)
             self._sort_gff(data, dest)
         tId = trackData["label"]
-        categ = trackData['category']
+        categ = trackData["category"]
         trackDict = {
             "type": "FeatureTrack",
             "trackId": tId,
             "name": trackData["name"],
             "assemblyNames": [self.genome_name],
-            "category": [categ,],
+            "category": [
+                categ,
+            ],
             "adapter": {
                 "type": "Gff3TabixAdapter",
                 "gffGzLocation": {
@@ -1032,14 +1041,12 @@
                 },
             ],
         }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
         self.tracksToAdd.append(trackDict)
         self.trackIdlist.append(tId)
 
     def add_bed(self, data, ext, trackData):
         tId = trackData["label"]
-        categ = trackData['category']
+        categ = trackData["category"]
         useuri = trackData["useuri"].lower() == "yes"
         if useuri:
             url = data
@@ -1053,7 +1060,9 @@
             "name": trackData["name"],
             "assemblyNames": [self.genome_name],
             "adapter": {
-            "category": [categ,],
+                "category": [
+                    categ,
+                ],
                 "type": "BedTabixAdapter",
                 "bedGzLocation": {
                     "uri": url,
@@ -1079,15 +1088,13 @@
                 },
             ],
         }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
         self.tracksToAdd.append(trackDict)
         self.trackIdlist.append(tId)
 
     def add_paf(self, data, trackData, pafOpts, **kwargs):
         tname = trackData["name"]
         tId = trackData["label"]
-        categ = trackData['category']
+        categ = trackData["category"]
         pgnames = [x.strip() for x in pafOpts["genome_label"].split(",")]
         pgpaths = [x.strip() for x in pafOpts["genome"].split(",")]
         passnames = [self.genome_name]  # always first
@@ -1098,7 +1105,9 @@
                 # trouble from spacey names in command lines avoidance
                 if gname not in self.genome_names:
                     # ignore if already there - eg for duplicates among pafs.
-                    useuri = pgpaths[i].startswith('http://') or pgpaths[i].startswith('https://')
+                    useuri = pgpaths[i].startswith("http://") or pgpaths[i].startswith(
+                        "https://"
+                    )
                     asstrack = self.make_assembly(pgpaths[i], gname, useuri)
                     self.genome_names.append(gname)
                     if self.config_json.get("assemblies", None):
@@ -1114,7 +1123,9 @@
             "type": "SyntenyTrack",
             "trackId": tId,
             "assemblyNames": passnames,
-            "category": [categ,],
+            "category": [
+                categ,
+            ],
             "name": tname,
             "adapter": {
                 "type": "PAFAdapter",
@@ -1122,18 +1133,16 @@
                 "assemblyNames": passnames,
             },
             "displays": [
-            {
-            "type": "LinearSyntenyDisplay",
-            "displayId": "%s-LinearSyntenyDisplay" % tId,
-            },
-            {
-            "type": "DotPlotDisplay",
-            "displayId": "%s-DotPlotDisplay" % tId,
-            },
+                {
+                    "type": "LinearSyntenyDisplay",
+                    "displayId": "%s-LinearSyntenyDisplay" % tId,
+                },
+                {
+                    "type": "DotPlotDisplay",
+                    "displayId": "%s-DotPlotDisplay" % tId,
+                },
             ],
         }
-        style_json = self._prepare_track_style(trackDict)
-        trackDict["style"] = style_json
         self.tracksToAdd.append(trackDict)
         self.trackIdlist.append(tId)
 
@@ -1249,7 +1258,7 @@
             # Return non-human label for use in other fields
             yield outputTrackConfig["label"]
 
-    def add_default_session(self, data):
+    def add_default_session(self, default_data):
         """
         Add some default session settings: set some assemblies/tracks on/off
         """
@@ -1267,21 +1276,26 @@
         for track_conf in self.tracksToAdd:
             track_types[track_conf["trackId"]] = track_conf["type"]
             tId = track_conf["trackId"]
-            if tId in data["visibility"]["default_on"]:
+            #if tId in data["visibility"]["default_on"]:
+            style_data = default_data["style"].get(tId,  None)
+            if not style_data:
+                logging.warn("### No style data in default data for %s" % tId)
                 style_data = {"type": "LinearBasicDisplay"}
-                if "displays" in track_conf:
-                    style_data["type"] = track_conf["displays"][0]["type"]
-                if track_conf.get("style_labels", None):
-                    # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
-                    # TODO move this to per track displays?
-                    style_data["labels"] = track_conf["style_labels"]
-                tracks_data.append(
-                    {
-                        "type": track_types[tId],
-                        "configuration": tId,
-                        "displays": [style_data],
-                    }
-                )
+            if "displays" in track_conf:
+                disp = track_conf["displays"][0]["type"]
+                style_data["type"] = disp
+                style_data["configuration"] = "%s-%s" % (tId, disp)
+            if track_conf.get("style_labels", None):
+                # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
+                # TODO move this to per track displays?
+                style_data["labels"] = track_conf["style_labels"]
+            tracks_data.append(
+                {
+                    "type": track_types[tId],
+                    "configuration": tId,
+                    "displays": [style_data],
+                }
+            )
 
         # The view for the assembly we're adding
         view_json = {"type": "LinearGenomeView", "tracks": tracks_data}
@@ -1290,13 +1304,13 @@
         drdict = {
             "reversed": False,
             "assemblyName": self.genome_name,
-            "start": 2000,
-            "end": 200000,
+            "start": 1,
+            "end": 100000,
             "refName": "x",
         }
 
-        if data.get("defaultLocation", ""):
-            ddl = data["defaultLocation"]
+        if default_data.get("defaultLocation", ""):
+            ddl = default_data["defaultLocation"]
             loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl)
             # allow commas like 100,000 but ignore as integer
             if loc_match:
@@ -1324,7 +1338,7 @@
             logging.info(
                 "@@@ no contig name found for default session - please add one!"
             )
-        session_name = data.get("session_name", "New session")
+        session_name = default_data.get("session_name", "New session")
         for key, value in mapped_chars.items():
             session_name = session_name.replace(value, key)
         # Merge with possibly existing defaultSession (if upgrading a jbrowse instance)
@@ -1382,7 +1396,9 @@
         """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now"""
         dest = self.outdir
         if realclone:
-            self.subprocess_check_call(['jbrowse', 'create', dest,"-f", '--tag', f"{JB2VER}"])
+            self.subprocess_check_call(
+                ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"]
+            )
         else:
             shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True)
         for fn in [
@@ -1400,14 +1416,8 @@
 
 
 def parse_style_conf(item):
-    if "type" in item.attrib and item.attrib["type"] in [
-        "boolean",
-        "integer",
-    ]:
-        if item.attrib["type"] == "boolean":
-            return item.text in ("yes", "true", "True")
-        elif item.attrib["type"] == "integer":
-            return int(item.text)
+    if item.text.lower() in ['false','true','yes','no']:
+            return item.text.lower in ("yes", "true")
     else:
         return item.text
 
@@ -1473,6 +1483,7 @@
         trackfiles = track.findall("files/trackFile")
         if trackfiles:
             for x in track.findall("files/trackFile"):
+                track_conf["label"] = x.attrib["label"]
                 track_conf["useuri"] = x.attrib["useuri"]
                 if is_multi_bigwig:
                     multi_bigwig_paths.append(
@@ -1521,6 +1532,11 @@
             track_conf["style"] = {
                 item.tag: parse_style_conf(item) for item in track.find("options/style")
             }
+        else:
+            track_conf["style"] = {}
+        tst = track_conf["style"].get("type", None)
+        if tst:
+            track_conf["style"]["configuration"] = "%s-%s" % (track_conf["label"], tst)
         if track.find("options/style_labels"):
             track_conf["style_labels"] = {
                 item.tag: parse_style_conf(item)
@@ -1530,12 +1546,6 @@
         track_conf["conf"] = etree_to_dict(track.find("options"))
         track_conf["category"] = track.attrib["cat"]
         track_conf["format"] = track.attrib["format"]
-        try:
-            # Only pertains to gff3 + blastxml. TODO?
-            track_conf["style"] = {t.tag: t.text for t in track.find("options/style")}
-        except TypeError:
-            track_conf["style"] = {}
-            pass
         keys = jc.process_annotations(track_conf)
 
         if keys:
@@ -1544,9 +1554,7 @@
                     track.attrib.get("visibility", "default_off")
                 ].append(key)
                 if track_conf.get("style", None):
-                    default_session_data["style"][key] = track_conf[
-                        "style"
-                    ]  # TODO do we need this anymore?
+                    default_session_data["style"][key] = track_conf["style"]
                 if track_conf.get("style_lables", None):
                     default_session_data["style_labels"][key] = track_conf.get(
                         "style_labels", None