changeset 19:bde6b1d09f7d draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 1290bf486bc55c02fecd0327de10a28655a18e81-dirty
author fubar
date Tue, 30 Jan 2024 06:05:03 +0000
parents 2e6c48910819
children 9c7aa5885721
files GFFOutput.py autogenJB2.py autogenJB2.xml gff3_rebase.py jb2_GFF/GFFOutput.py jb2_GFF/GFFParser.py jb2_GFF/__init__.py jb2_GFF/__pycache__/GFFOutput.cpython-310.pyc jb2_GFF/__pycache__/GFFParser.cpython-310.pyc jb2_GFF/__pycache__/__init__.cpython-310.pyc jb2_GFF/_utils.py jbrowse2.py jbrowse2.xml macros.xml
diffstat 14 files changed, 423 insertions(+), 1714 deletions(-) [+]
line wrap: on
line diff
--- a/GFFOutput.py	Mon Jan 29 02:34:43 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,213 +0,0 @@
-"""Output Biopython SeqRecords and SeqFeatures to GFF3 format.
-
-The target format is GFF3, the current GFF standard:
-    http://www.sequenceontology.org/gff3.shtml
-"""
-from six.moves import urllib
-
-from Bio import SeqIO
-
-
-class _IdHandler:
-    """Generate IDs for GFF3 Parent/Child
-    relationships where they don't exist."""
-
-    def __init__(self):
-        self._prefix = "biopygen"
-        self._counter = 1
-        self._seen_ids = []
-
-    def _generate_id(self, quals):
-        """Generate a unique ID not present in our existing IDs."""
-        gen_id = self._get_standard_id(quals)
-        if gen_id is None:
-            while 1:
-                gen_id = "%s%s" % (self._prefix, self._counter)
-                if gen_id not in self._seen_ids:
-                    break
-                self._counter += 1
-        return gen_id
-
-    def _get_standard_id(self, quals):
-        """Retrieve standardized IDs from other sources like NCBI GenBank.
-
-        This tries to find IDs from known key/values when stored differently
-        than GFF3 specifications.
-        """
-        possible_keys = ["transcript_id", "protein_id"]
-        for test_key in possible_keys:
-            if test_key in quals:
-                cur_id = quals[test_key]
-                if isinstance(cur_id, tuple) or isinstance(cur_id, list):
-                    return cur_id[0]
-                else:
-                    return cur_id
-        return None
-
-    def update_quals(self, quals, has_children):
-        """Update a set of qualifiers, adding an ID if necessary."""
-        cur_id = quals.get("ID", None)
-        # if we have an ID, record it
-        if cur_id:
-            if not isinstance(cur_id, list) and not isinstance(cur_id, tuple):
-                cur_id = [cur_id]
-            for add_id in cur_id:
-                self._seen_ids.append(add_id)
-        # if we need one and don't have it, create a new one
-        elif has_children:
-            new_id = self._generate_id(quals)
-            self._seen_ids.append(new_id)
-            quals["ID"] = [new_id]
-        return quals
-
-
-class GFF3Writer:
-    """Write GFF3 files starting with standard Biopython objects."""
-
-    def __init__(self):
-        pass
-
-    def write(self, recs, out_handle, include_fasta=False):
-        """Write the provided records to the given handle in GFF3 format."""
-        id_handler = _IdHandler()
-        self._write_header(out_handle)
-        fasta_recs = []
-        try:
-            recs = iter(recs)
-        except TypeError:
-            recs = [recs]
-        for rec in recs:
-            self._write_rec(rec, out_handle)
-            self._write_annotations(rec.annotations, rec.id, len(rec.seq), out_handle)
-            for sf in rec.features:
-                sf = self._clean_feature(sf)
-                id_handler = self._write_feature(sf, rec.id, out_handle, id_handler)
-            if include_fasta and len(rec.seq) > 0:
-                fasta_recs.append(rec)
-        if len(fasta_recs) > 0:
-            self._write_fasta(fasta_recs, out_handle)
-
-    def _clean_feature(self, feature):
-        quals = {}
-        for key, val in feature.qualifiers.items():
-            if not isinstance(val, (list, tuple)):
-                val = [val]
-            val = [str(x) for x in val]
-            quals[key] = val
-        feature.qualifiers = quals
-        # Support for Biopython 1.68 and above, which removed sub_features
-        if not hasattr(feature, "sub_features"):
-            feature.sub_features = []
-        clean_sub = [self._clean_feature(f) for f in feature.sub_features]
-        feature.sub_features = clean_sub
-        return feature
-
-    def _write_rec(self, rec, out_handle):
-        # if we have a SeqRecord, write out optional directive
-        if len(rec.seq) > 0:
-            out_handle.write("##sequence-region %s 1 %s\n" % (rec.id, len(rec.seq)))
-
-    def _get_phase(self, feature):
-        if "phase" in feature.qualifiers:
-            phase = feature.qualifiers["phase"][0]
-        elif feature.type == "CDS":
-            phase = int(feature.qualifiers.get("codon_start", [1])[0]) - 1
-        else:
-            phase = "."
-        return str(phase)
-
-    def _write_feature(self, feature, rec_id, out_handle, id_handler, parent_id=None):
-        """Write a feature with location information."""
-        if feature.location.strand == 1:
-            strand = "+"
-        elif feature.location.strand == -1:
-            strand = "-"
-        else:
-            strand = "."
-        # remove any standard features from the qualifiers
-        quals = feature.qualifiers.copy()
-        for std_qual in ["source", "score", "phase"]:
-            if std_qual in quals and len(quals[std_qual]) == 1:
-                del quals[std_qual]
-        # add a link to a parent identifier if it exists
-        if parent_id:
-            if "Parent" not in quals:
-                quals["Parent"] = []
-            quals["Parent"].append(parent_id)
-        quals = id_handler.update_quals(quals, len(feature.sub_features) > 0)
-        if feature.type:
-            ftype = feature.type
-        else:
-            ftype = "sequence_feature"
-        parts = [
-            str(rec_id),
-            feature.qualifiers.get("source", ["feature"])[0],
-            ftype,
-            str(feature.location.start + 1),  # 1-based indexing
-            str(feature.location.end),
-            feature.qualifiers.get("score", ["."])[0],
-            strand,
-            self._get_phase(feature),
-            self._format_keyvals(quals),
-        ]
-        out_handle.write("\t".join(parts) + "\n")
-        for sub_feature in feature.sub_features:
-            id_handler = self._write_feature(
-                sub_feature,
-                rec_id,
-                out_handle,
-                id_handler,
-                quals["ID"][0],
-            )
-        return id_handler
-
-    def _format_keyvals(self, keyvals):
-        format_kvs = []
-        for key in sorted(keyvals.keys()):
-            values = keyvals[key]
-            key = key.strip()
-            format_vals = []
-            if not isinstance(values, list) or isinstance(values, tuple):
-                values = [values]
-            for val in values:
-                val = urllib.parse.quote(str(val).strip(), safe=":/ ")
-                if (key and val) and val not in format_vals:
-                    format_vals.append(val)
-            format_kvs.append("%s=%s" % (key, ",".join(format_vals)))
-        return ";".join(format_kvs)
-
-    def _write_annotations(self, anns, rec_id, size, out_handle):
-        """Add annotations which refer to an entire sequence."""
-        format_anns = self._format_keyvals(anns)
-        if format_anns:
-            parts = [
-                rec_id,
-                "annotation",
-                "remark",
-                "1",
-                str(size if size > 1 else 1),
-                ".",
-                ".",
-                ".",
-                format_anns,
-            ]
-            out_handle.write("\t".join(parts) + "\n")
-
-    def _write_header(self, out_handle):
-        """Write out standard header directives."""
-        out_handle.write("##gff-version 3\n")
-
-    def _write_fasta(self, recs, out_handle):
-        """Write sequence records using the ##FASTA directive."""
-        out_handle.write("##FASTA\n")
-        SeqIO.write(recs, out_handle, "fasta")
-
-
-def write(recs, out_handle, include_fasta=False):
-    """High level interface to write GFF3 files from SeqRecords and SeqFeatures.
-
-    If include_fasta is True, the GFF3 file will include sequence information
-    using the ##FASTA directive.
-    """
-    writer = GFF3Writer()
-    return writer.write(recs, out_handle, include_fasta)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/autogenJB2.py	Tue Jan 30 06:05:03 2024 +0000
@@ -0,0 +1,88 @@
+import argparse
+import os
+
+from jbrowse2 import jbrowseConnector as jbC
+
+
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="", epilog="")
+    parser.add_argument("--yaml", help="Track Configuration")
+    parser.add_argument("--outdir", help="Output directory", default="out")
+    parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.0.1")
+    args = parser.parse_args()
+    if os.path.exists(args.outdir):
+        root = args.outdir
+        dirList =  os.scandir(root)
+        subdirs = [f.path for f in dirList if f.is_dir()]
+        genome_paths = [f.path for f in dirList if f.name.startswith('genome') and f.is_file()]
+        if len(genome_paths) > 0:
+            genome_fnames = [os.path.basename(x).split('_')[2:] for x in genome_paths]  # expect genome_1_genomename.fasta etc
+            jc = jbC(
+                outdir=args.outdir,
+                genomes=[
+                    {
+                        "path":  x,
+                        "meta": {"name" : genome_fnames[i], },
+                    }
+                    for i,x in enumerate(genome_paths)
+                ],
+            )
+            jc.process_genomes()
+            # .add_default_view() replace from https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py
+            default_session_data = {
+                "visibility": {
+                    "default_on": [],
+                    "default_off": [],
+                },
+                "style": {},
+                "style_labels": {},
+            }
+
+            track_paths = [x for x in genome_paths if not x.startswith('genome') and  x.is_file()]
+            for i, track in enumerate(track_paths):
+                track_conf = {}
+                track_conf['format']= os.path.basename(track).split('_')[0]
+                track_conf["name"] = os.path.basename(track).split('_')[2:]   # expect genome_1_genomename.fasta etc
+                fext = os.path.splitext(os.path.basename(track)).replace('.','')
+                track_conf["label"] = "%s_%i" % (os.path.basename(track), i)
+                track_conf["trackfiles"] = []
+                keys = jc.process_annotations(track_conf)
+
+                if keys:
+                    for key in keys:
+                        default_session_data["visibility"][
+                            track.attrib.get("visibility", "default_off")
+                        ].append(key)
+                        if track_conf.get("style", None):
+                            default_session_data["style"][key] = track_conf[
+                                "style"
+                            ]  # TODO do we need this anymore?
+                        if track_conf.get("style_lables", None):
+                            default_session_data["style_labels"][key] = track_conf.get(
+                                "style_labels", None
+                            )
+            # default_session_data["defaultLocation"] = root.find(
+                # "metadata/general/defaultLocation"
+            # ).text
+            # default_session_data["session_name"] = root.find(
+                # "metadata/general/session_name"
+            # ).text
+            # general_data = {
+                # "analytics": root.find("metadata/general/analytics").text,
+                # "primary_color": root.find("metadata/general/primary_color").text,
+                # "secondary_color": root.find("metadata/general/secondary_color").text,
+                # "tertiary_color": root.find("metadata/general/tertiary_color").text,
+                # "quaternary_color": root.find("metadata/general/quaternary_color").text,
+                # "font_size": root.find("metadata/general/font_size").text,
+            # }
+            # jc.add_general_configuration(general_data)
+            trackconf = jc.config_json.get("tracks", None)
+            if trackconf:
+                jc.config_json["tracks"].update(jc.tracksToAdd)
+            else:
+                jc.config_json["tracks"] = jc.tracksToAdd
+            jc.write_config()
+            jc.add_default_session(default_session_data)
+            # jc.text_index() not sure what broke here.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/autogenJB2.xml	Tue Jan 30 06:05:03 2024 +0000
@@ -0,0 +1,148 @@
+ <tool id="autogenjb2" name="autogenjb2" version="2.10.0_0" profile="22.05">
+    <description>Files to JBrowse2</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edamInc"/>
+    <xrefs>
+        <xref type="bio.tools">jbrowse2</xref>
+    </xrefs>
+    <expand macro="requirements"/>
+    <version_command>python '${__tool_directory__}/autogenJB2.py' --version</version_command>
+    <command detect_errors="aggressive"><![CDATA[
+python '$__tool_directory__/autogenJB2.py'
+--outdir '$jbrowseme'
+
+&&
+
+cp '$output.files_path/index.html' '$output'
+
+## Ugly testing hack since I cannot get <extra_files> to test the files I want to test. Hmph.
+#if str($uglyTestingHack) == "enabled":
+ &&   cp '$trackxml' '$output'
+#end if
+  ]]></command>
+    <inputs>
+        <param
+                    label="Collection of files specially named to become tracks"
+                    name="jbrowseme"
+                    type="data_collection">
+        </param>
+        <param type="hidden" name="uglyTestingHack" value="" />
+    </inputs>
+    <outputs>
+        <data format="html" name="output" label="JBrowse2 on $reference_genome.genome.element_identifier"/>
+    </outputs>
+
+    <help><![CDATA[
+
+JBrowse2-in-Galaxy
+==================
+
+JBrowse2-in-Galaxy offers a highly configurable, workflow-compatible
+alternative to JBrowse1-in-Galaxy and Trackster.
+
+Compared to JBrowse1-in-Galaxy, there is no support for alternative codons for unusual genomes,
+and detailed track styling is not yet implemented. Send code.
+JBrowse1 development has now ceased in favour of JBrowse2.
+
+Use and local viewing
+=====================
+
+
+A JBrowse2 history item can be opened by viewing it (the "eye" icon).
+
+The same browser data and setup can also be downloaded as a compressed zip archive by clicking the download ("floppy disk") icon in the history.
+This can be shared and viewed without Galaxy.
+
+A replacement application to serve the browser is required without Galaxy. A local python web server can be started using a script included in each archive,
+assuming that Python3 is already working on your desktop - if not you will have to install it first. Unzip the archive (*unzip [filename].zip*) and change
+directory to the first level in that zip archive. It contains a file named *jb2_webserver.py*
+
+With python3 installed,
+
+*python3 jb2_webserver.py*
+
+will serve the unarchived JBrowse2 configuration from the same directory as the python script automatically. If a new browser window does not open,
+but the script appears to be running, try pointing your web browser to the default of *localhost:8080*
+
+Overview
+--------
+
+JBrowse is a fast, embeddable genome browser built completely with
+JavaScript and HTML5.
+
+The JBrowse-in-Galaxy (JiG) tool was written to help build complex
+JBrowse installations straight from Galaxy. It allows you to build up a JBrowse instance without worrying
+about how to run the command line tools to format your data, and which
+options need to be supplied and where.
+
+The JBrowse-in-Galaxy tool has been rejected by `a Galaxy IUC
+<https://github.com/galaxyproject/tools-iuc/issues>`__, reviewer.
+It is maintained by https://github.com/fubar2 who you can help you
+with missing features or bugs in the tool. For the record, he remains unconvinced by the reviewer's logic,
+and disturbed by the distinctly coercive approach to introducing new code,
+compared to the more usual method of providing a working PR.
+
+Options
+-------
+
+**Reference or Assembly**
+
+Choose either a built-in or select one from your history.
+
+Track coordinates and contig names *must* match this reference precisely
+or they will not display.
+
+**Track Groups** represent a set of tracks in a single category.
+
+Annotation Tracks
+-----------------
+
+GFF3/BED
+~~~~~~~~
+
+Standard feature tracks. They usually highlight genes, mRNAs and other features of interest along a genomic region.
+
+When these contain tens of millions of features, such as repeat regions from a VGP assembly, displaying one at a time leads
+to extremely slow loading times when a large region is in view, unless the "LinearPileupDisplay" display option is
+selected for that track in the styling options section. The default is LinearBasicDisplay, which shows all details and works
+well for relatively sparse bed files. A better option is to make a bigwig track using a set of windows based on the
+lengths of each assembly or reference contig.
+
+BAM Pileups
+~~~~~~~~~~~
+
+We support BAM files and can automatically generate SNP tracks based on
+that bam data.
+
+
+BlastXML
+~~~~~~~~
+
+JiG now supports both blastn and blastp datasets. JiG internally uses a
+blastXML to gapped GFF3 tool to convert your blastxml datasets into a
+format amenable to visualization in JBrowse. This tool is also
+available separately from the IUC on the toolshed.
+
+**Minimum Gap Size** reflects how long a gap must be before it becomes a
+real gap in the processed gff3 file. In the picture above, various sizes
+of gaps can be seen. If the minimum gap size was set much higher, say
+100nt, many of the smaller gaps would disappear, and the features on
+both sides would be merged into one, longer feature. This setting is
+inversely proportional to runtime and output file size. *Do not set this
+to a low value for large datasets*. By setting this number lower, you
+will have extremely large outputs and extremely long runtimes. The
+default was configured based off of the author's experience, but the
+author only works on small viruses. It is *strongly* recommended that
+you filter your blast results before display, e.g. picking out the top
+10 hits or so.
+
+**Protein blast search** option merely informs underlying tools that
+they should adjust feature locations by 3x.
+
+
+@ATTRIBUTION@
+]]></help>
+    <expand macro="citations"/>
+</tool>
--- a/gff3_rebase.py	Mon Jan 29 02:34:43 2024 +0000
+++ b/gff3_rebase.py	Tue Jan 30 06:05:03 2024 +0000
@@ -117,7 +117,7 @@
         start *= 3
         end *= 3
 
-    if parent.location.strand >= 0:
+    if parent.location.strand !=None and parent.location.strand >= 0:
         ns = parent.location.start + start
         ne = parent.location.start + end
         st = +1
@@ -136,7 +136,8 @@
         ns %= 3
     if ne < 0:
         ne %= 3
-
+    if ns > ne:
+        ne, ns = ns, ne # dunno why but sometimes happens
     feature.location = FeatureLocation(ns, ne, strand=st)
 
     if hasattr(feature, "sub_features"):
--- a/jb2_GFF/GFFOutput.py	Mon Jan 29 02:34:43 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,233 +0,0 @@
-"""Output Biopython SeqRecords and SeqFeatures to GFF3 format.
-
-The target format is GFF3, the current GFF standard:
-    http://www.sequenceontology.org/gff3.shtml
-"""
-from six.moves import urllib
-
-from Bio import SeqIO
-
-
-class _IdHandler:
-    """Generate IDs for GFF3 Parent/Child
-    relationships where they don't exist."""
-
-    def __init__(self):
-        self._prefix = "biopygen"
-        self._counter = 1
-        self._seen_ids = []
-
-    def _generate_id(self, quals):
-        """Generate a unique ID not present in our existing IDs."""
-        gen_id = self._get_standard_id(quals)
-        if gen_id is None:
-            while 1:
-                gen_id = "%s%s" % (self._prefix, self._counter)
-                if gen_id not in self._seen_ids:
-                    break
-                self._counter += 1
-        return gen_id
-
-    def _get_standard_id(self, quals):
-        """Retrieve standardized IDs from other sources like NCBI GenBank.
-
-        This tries to find IDs from known key/values when stored differently
-        than GFF3 specifications.
-        """
-        possible_keys = ["transcript_id", "protein_id"]
-        for test_key in possible_keys:
-            if test_key in quals:
-                cur_id = quals[test_key]
-                if isinstance(cur_id, tuple) or isinstance(
-                    cur_id, list
-                ):
-                    return cur_id[0]
-                else:
-                    return cur_id
-        return None
-
-    def update_quals(self, quals, has_children):
-        """Update a set of qualifiers, adding an ID if necessary."""
-        cur_id = quals.get("ID", None)
-        # if we have an ID, record it
-        if cur_id:
-            if not isinstance(cur_id, list) and not isinstance(
-                cur_id, tuple
-            ):
-                cur_id = [cur_id]
-            for add_id in cur_id:
-                self._seen_ids.append(add_id)
-        # if we need one and don't have it, create a new one
-        elif has_children:
-            new_id = self._generate_id(quals)
-            self._seen_ids.append(new_id)
-            quals["ID"] = [new_id]
-        return quals
-
-
-class GFF3Writer:
-    """Write GFF3 files starting with standard Biopython objects."""
-
-    def __init__(self):
-        pass
-
-    def write(self, recs, out_handle, include_fasta=False):
-        """Write the provided records to the given handle in GFF3 format."""
-        id_handler = _IdHandler()
-        self._write_header(out_handle)
-        fasta_recs = []
-        try:
-            recs = iter(recs)
-        except TypeError:
-            recs = [recs]
-        for rec in recs:
-            self._write_rec(rec, out_handle)
-            self._write_annotations(
-                rec.annotations, rec.id, len(rec.seq), out_handle
-            )
-            for sf in rec.features:
-                sf = self._clean_feature(sf)
-                id_handler = self._write_feature(
-                    sf, rec.id, out_handle, id_handler
-                )
-            if include_fasta and len(rec.seq) > 0:
-                fasta_recs.append(rec)
-        if len(fasta_recs) > 0:
-            self._write_fasta(fasta_recs, out_handle)
-
-    def _clean_feature(self, feature):
-        quals = {}
-        for key, val in feature.qualifiers.items():
-            if not isinstance(val, (list, tuple)):
-                val = [val]
-            val = [str(x) for x in val]
-            quals[key] = val
-        feature.qualifiers = quals
-        # Support for Biopython 1.68 and above, which removed sub_features
-        if not hasattr(feature, "sub_features"):
-            feature.sub_features = []
-        clean_sub = [
-            self._clean_feature(f) for f in feature.sub_features
-        ]
-        feature.sub_features = clean_sub
-        return feature
-
-    def _write_rec(self, rec, out_handle):
-        # if we have a SeqRecord, write out optional directive
-        if len(rec.seq) > 0:
-            out_handle.write(
-                "##sequence-region %s 1 %s\n" % (rec.id, len(rec.seq))
-            )
-
-    def _get_phase(self, feature):
-        if "phase" in feature.qualifiers:
-            phase = feature.qualifiers["phase"][0]
-        elif feature.type == "CDS":
-            phase = (
-                int(feature.qualifiers.get("codon_start", [1])[0]) - 1
-            )
-        else:
-            phase = "."
-        return str(phase)
-
-    def _write_feature(
-        self, feature, rec_id, out_handle, id_handler, parent_id=None
-    ):
-        """Write a feature with location information."""
-        if feature.location.strand == 1:
-            strand = "+"
-        elif feature.location.strand == -1:
-            strand = "-"
-        else:
-            strand = "."
-        # remove any standard features from the qualifiers
-        quals = feature.qualifiers.copy()
-        for std_qual in ["source", "score", "phase"]:
-            if std_qual in quals and len(quals[std_qual]) == 1:
-                del quals[std_qual]
-        # add a link to a parent identifier if it exists
-        if parent_id:
-            if "Parent" not in quals:
-                quals["Parent"] = []
-            quals["Parent"].append(parent_id)
-        quals = id_handler.update_quals(
-            quals, len(feature.sub_features) > 0
-        )
-        if feature.type:
-            ftype = feature.type
-        else:
-            ftype = "sequence_feature"
-        parts = [
-            str(rec_id),
-            feature.qualifiers.get("source", ["feature"])[0],
-            ftype,
-            str(feature.location.start + 1),  # 1-based indexing
-            str(feature.location.end),
-            feature.qualifiers.get("score", ["."])[0],
-            strand,
-            self._get_phase(feature),
-            self._format_keyvals(quals),
-        ]
-        out_handle.write("\t".join(parts) + "\n")
-        for sub_feature in feature.sub_features:
-            id_handler = self._write_feature(
-                sub_feature,
-                rec_id,
-                out_handle,
-                id_handler,
-                quals["ID"][0],
-            )
-        return id_handler
-
-    def _format_keyvals(self, keyvals):
-        format_kvs = []
-        for key in sorted(keyvals.keys()):
-            values = keyvals[key]
-            key = key.strip()
-            format_vals = []
-            if not isinstance(values, list) or isinstance(
-                values, tuple
-            ):
-                values = [values]
-            for val in values:
-                val = urllib.parse.quote(str(val).strip(), safe=":/ ")
-                if (key and val) and val not in format_vals:
-                    format_vals.append(val)
-            format_kvs.append("%s=%s" % (key, ",".join(format_vals)))
-        return ";".join(format_kvs)
-
-    def _write_annotations(self, anns, rec_id, size, out_handle):
-        """Add annotations which refer to an entire sequence."""
-        format_anns = self._format_keyvals(anns)
-        if format_anns:
-            parts = [
-                rec_id,
-                "annotation",
-                "remark",
-                "1",
-                str(size if size > 1 else 1),
-                ".",
-                ".",
-                ".",
-                format_anns,
-            ]
-            out_handle.write("\t".join(parts) + "\n")
-
-    def _write_header(self, out_handle):
-        """Write out standard header directives."""
-        out_handle.write("##gff-version 3\n")
-
-    def _write_fasta(self, recs, out_handle):
-        """Write sequence records using the ##FASTA directive."""
-        out_handle.write("##FASTA\n")
-        SeqIO.write(recs, out_handle, "fasta")
-
-
-def write(recs, out_handle, include_fasta=False):
-    """High level interface to write GFF3 files from SeqRecords and SeqFeatures.
-
-    If include_fasta is True, the GFF3 file will include sequence information
-    using the ##FASTA directive.
-    """
-    writer = GFF3Writer()
-    return writer.write(recs, out_handle, include_fasta)
--- a/jb2_GFF/GFFParser.py	Mon Jan 29 02:34:43 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1099 +0,0 @@
-"""Parse GFF files into features attached to Biopython SeqRecord objects.
-
-This deals with GFF3 formatted files, a tab delimited format for storing
-sequence features and annotations:
-
-http://www.sequenceontology.org/gff3.shtml
-
-It will also deal with older GFF versions (GTF/GFF2):
-
-http://www.sanger.ac.uk/Software/formats/GFF/GFF_Spec.shtml
-http://mblab.wustl.edu/GTF22.html
-
-The implementation utilizes map/reduce parsing of GFF using Disco. Disco
-(http://discoproject.org) is a Map-Reduce framework for Python utilizing
-Erlang for parallelization. The code works on a single processor without
-Disco using the same architecture.
-"""
-import os
-import copy
-import json
-import re
-import collections
-import io
-import itertools
-import warnings
-import six
-from six.moves import urllib
-
-from Bio.SeqRecord import SeqRecord
-from Bio import SeqFeature
-from Bio import SeqIO
-from Bio import BiopythonDeprecationWarning
-
-import disco
-
-# Make defaultdict compatible with versions of python older than 2.4
-try:
-    collections.defaultdict
-except AttributeError:
-    import _utils
-
-    collections.defaultdict = _utils.defaultdict
-
-unknown_seq_avail = False
-try:
-    from Bio.Seq import UnknownSeq
-
-    unknown_seq_avail = True
-except ImportError:
-    # Starting with biopython 1.81, has been removed
-    from Bio.Seq import _UndefinedSequenceData
-    from Bio.Seq import Seq
-
-
-warnings.simplefilter("ignore", BiopythonDeprecationWarning)
-
-
-def _gff_line_map(line, params):
-    """Map part of Map-Reduce; parses a line of GFF into a dictionary.
-
-    Given an input line from a GFF file, this:
-    - decides if the file passes our filtering limits
-    - if so:
-        - breaks it into component elements
-        - determines the type of attribute (flat, parent, child or annotation)
-        - generates a dictionary of GFF info which can be serialized as JSON
-    """
-
-    def _merge_keyvals(parts):
-        """Merge key-values escaped by quotes
-        that are improperly split at semicolons."""
-        out = []
-        for i, p in enumerate(parts):
-            if (
-                i > 0
-                and len(p) == 1
-                and p[0].endswith('"')
-                and not p[0].startswith('"')
-            ):
-                if out[-1][-1].startswith('"'):
-                    prev_p = out.pop(-1)
-                    to_merge = prev_p[-1]
-                    prev_p[-1] = "%s; %s" % (to_merge, p[0])
-                    out.append(prev_p)
-            else:
-                out.append(p)
-        return out
-
-    gff3_kw_pat = re.compile(r"\w+=")
-
-    def _split_keyvals(keyval_str):
-        """Split key-value pairs in a GFF2, GTF and GFF3 compatible way.
-
-        GFF3 has key value pairs like:
-          count=9;gene=amx-2;sequence=SAGE:aacggagccg
-        GFF2 and GTF have:
-          Sequence "Y74C9A" ; Note "Clone Y74C9A; Genbank AC024206"
-          name "fgenesh1_pg.C_chr_1000003"; transcriptId 869
-        """
-        quals = collections.defaultdict(list)
-        if keyval_str is None:
-            return quals
-        # ensembl GTF has a stray semi-colon at the end
-        if keyval_str[-1] == ";":
-            keyval_str = keyval_str[:-1]
-        # GFF2/GTF has a semi-colon with at least one space after it.
-        # It can have spaces on both sides; wormbase does this.
-        # GFF3 works with no spaces.
-        # Split at the first one we can recognize as working
-        parts = keyval_str.split(" ; ")
-        if len(parts) == 1:
-            parts = [x.strip() for x in keyval_str.split(";")]
-        # check if we have GFF3 style key-vals (with =)
-        is_gff2 = True
-        if gff3_kw_pat.match(parts[0]):
-            is_gff2 = False
-            key_vals = _merge_keyvals([p.split("=") for p in parts])
-        # otherwise, we are separated by a space with a key as the first item
-        else:
-            pieces = []
-            for p in parts:
-                # fix misplaced semi-colons in keys in some GFF2 files
-                if p and p[0] == ";":
-                    p = p[1:]
-                pieces.append(p.strip().split(" "))
-            key_vals = [(p[0], " ".join(p[1:])) for p in pieces]
-        for item in key_vals:
-            # standard in-spec items are key=value
-            if len(item) == 2:
-                key, val = item
-            # out-of-spec files can have just key values. We set an empty value
-            # which will be changed to true later to standardize.
-            else:
-                assert len(item) == 1, item
-                key = item[0]
-                val = ""
-            # remove quotes in GFF2 files
-            quoted = False
-            if len(val) > 0 and val[0] == '"' and val[-1] == '"':
-                quoted = True
-                val = val[1:-1]
-            if val:
-                if quoted:
-                    quals[key].append(val)
-                else:
-                    quals[key].extend(
-                        [v for v in val.split(",") if v]
-                    )
-            # if we don't have a value, make this a key=True/False style
-            # attribute
-            else:
-                quals[key].append("true")
-        for key, vals in quals.items():
-            quals[key] = [urllib.parse.unquote(v) for v in vals]
-        return quals, is_gff2
-
-    def _nest_gff2_features(gff_parts):
-        """Provide nesting of GFF2 transcript parts with transcript IDs.
-
-        exons and coding sequences are mapped to a parent with a transcript_id
-        in GFF2. This is implemented differently at different genome centers
-        and this function attempts to resolve that and map things to the GFF3
-        way of doing them.
-        """
-        # map protein or transcript ids to a parent
-        for transcript_id in [
-            "transcript_id",
-            "transcriptId",
-            "proteinId",
-        ]:
-            try:
-                gff_parts["quals"]["Parent"] = gff_parts["quals"][
-                    transcript_id
-                ]
-                break
-            except KeyError:
-                pass
-        # case for WormBase GFF -- everything labelled as Transcript or CDS
-        for flat_name in ["Transcript", "CDS"]:
-            if flat_name in gff_parts["quals"]:
-                # parent types
-                if gff_parts["type"] in [flat_name]:
-                    if not gff_parts["id"]:
-                        gff_parts["id"] = gff_parts["quals"][
-                            flat_name
-                        ][0]
-                        gff_parts["quals"]["ID"] = [gff_parts["id"]]
-                # children types
-                elif gff_parts["type"] in [
-                    "intron",
-                    "exon",
-                    "three_prime_UTR",
-                    "coding_exon",
-                    "five_prime_UTR",
-                    "CDS",
-                    "stop_codon",
-                    "start_codon",
-                ]:
-                    gff_parts["quals"]["Parent"] = gff_parts["quals"][
-                        flat_name
-                    ]
-                break
-
-        return gff_parts
-
-    strand_map = {"+": 1, "-": -1, "?": None, None: None}
-    line = line.strip()
-    if line[:2] == "##":
-        return [("directive", line[2:])]
-    elif line and line[0] != "#":
-        parts = line.split("\t")
-        should_do = True
-        if params.limit_info:
-            for limit_name, limit_values in params.limit_info.items():
-                cur_id = tuple(
-                    [parts[i] for i in params.filter_info[limit_name]]
-                )
-                if cur_id not in limit_values:
-                    should_do = False
-                    break
-        if should_do:
-            assert len(parts) >= 8, line
-            # not python2.4 compatible but easier to understand
-            # gff_parts = [(None if p == '.' else p) for p in parts]
-            gff_parts = []
-            for p in parts:
-                if p == ".":
-                    gff_parts.append(None)
-                else:
-                    gff_parts.append(p)
-            gff_info = dict()
-            # collect all of the base qualifiers for this item
-            if len(parts) > 8:
-                quals, is_gff2 = _split_keyvals(gff_parts[8])
-            else:
-                quals, is_gff2 = collections.defaultdict(list), False
-            gff_info["is_gff2"] = is_gff2
-            if gff_parts[1]:
-                quals["source"].append(gff_parts[1])
-            if gff_parts[5]:
-                quals["score"].append(gff_parts[5])
-            if gff_parts[7]:
-                quals["phase"].append(gff_parts[7])
-            gff_info["quals"] = dict(quals)
-            gff_info["rec_id"] = gff_parts[0]
-            # if we are describing a location, then we are a feature
-            if gff_parts[3] and gff_parts[4]:
-                gff_info["location"] = [
-                    int(gff_parts[3]) - 1,
-                    int(gff_parts[4]),
-                ]
-                gff_info["type"] = gff_parts[2]
-                gff_info["id"] = quals.get("ID", [""])[0]
-                gff_info["strand"] = strand_map.get(
-                    gff_parts[6], None
-                )
-                if is_gff2:
-                    gff_info = _nest_gff2_features(gff_info)
-                # features that have parents need to link so we can pick up
-                # the relationship
-                if "Parent" in gff_info["quals"]:
-                    # check for self referential parent/child relationships
-                    # remove the ID, which is not useful
-                    for p in gff_info["quals"]["Parent"]:
-                        if p == gff_info["id"]:
-                            gff_info["id"] = ""
-                            del gff_info["quals"]["ID"]
-                            break
-                    final_key = "child"
-                elif gff_info["id"]:
-                    final_key = "parent"
-                # Handle flat features
-                else:
-                    final_key = "feature"
-            # otherwise, associate these annotations with the full record
-            else:
-                final_key = "annotation"
-            if params.jsonify:
-                return [(final_key, json.dumps(gff_info))]
-            else:
-                return [(final_key, gff_info)]
-    return []
-
-
-def _gff_line_reduce(map_results, out, params):
-    """Reduce part of Map-Reduce; combines results of parsed features."""
-    final_items = dict()
-    for gff_type, final_val in map_results:
-        if params.jsonify and gff_type not in ["directive"]:
-            final_val = json.loads(final_val)
-        try:
-            final_items[gff_type].append(final_val)
-        except KeyError:
-            final_items[gff_type] = [final_val]
-    for key, vals in final_items.items():
-        if params.jsonify:
-            vals = json.dumps(vals)
-        out.add(key, vals)
-
-
-class _MultiIDRemapper:
-    """Provide an ID remapping for cases where a parent has a non-unique ID.
-
-    Real life GFF3 cases have non-unique ID attributes, which we fix here
-    by using the unique sequence region to assign children to the right
-    parent.
-    """
-
-    def __init__(self, base_id, all_parents):
-        self._base_id = base_id
-        self._parents = all_parents
-
-    def remap_id(self, feature_dict):
-        rstart, rend = feature_dict["location"]
-        for index, parent in enumerate(self._parents):
-            pstart, pend = parent["location"]
-            if rstart >= pstart and rend <= pend:
-                if index > 0:
-                    return "%s_%s" % (self._base_id, index + 1)
-                else:
-                    return self._base_id
-        # if we haven't found a location match but parents are umabiguous,
-        # return that
-        if len(self._parents) == 1:
-            return self._base_id
-        raise ValueError(
-            "Did not find remapped ID location: %s, %s, %s"
-            % (
-                self._base_id,
-                [p["location"] for p in self._parents],
-                feature_dict["location"],
-            )
-        )
-
-
-class _AbstractMapReduceGFF:
-    """Base class providing general GFF parsing for local and remote classes.
-
-    This class should be subclassed to provide a concrete class to parse
-    GFF under specific conditions. These classes need to implement
-    the _gff_process function, which returns a dictionary of SeqRecord
-    information.
-    """
-
-    def __init__(self, create_missing=True):
-        """Initialize GFF parser
-
-        create_missing - If True, create blank records for GFF ids not in
-        the base_dict. If False, an error will be raised.
-        """
-        self._create_missing = create_missing
-        self._map_fn = _gff_line_map
-        self._reduce_fn = _gff_line_reduce
-        self._examiner = GFFExaminer()
-
-    def _gff_process(self, gff_files, limit_info, target_lines=None):
-        raise NotImplementedError("Derived class must define")
-
-    def parse(self, gff_files, base_dict=None, limit_info=None):
-        """Parse a GFF file, returning an iterator of SeqRecords.
-
-        limit_info - A dictionary specifying the regions of the GFF file
-        which should be extracted. This allows only relevant portions of a file
-        to be parsed.
-
-        base_dict - A base dictionary of SeqRecord objects which may be
-        pre-populated with sequences and other features. The new features from
-        the GFF file will be added to this dictionary.
-        """
-        for rec in self.parse_in_parts(
-            gff_files, base_dict, limit_info
-        ):
-            yield rec
-
-    def parse_in_parts(
-        self,
-        gff_files,
-        base_dict=None,
-        limit_info=None,
-        target_lines=None,
-    ):
-        """Parse a region of a GFF file specified, returning info as generated.
-
-        target_lines -- The number of lines in the file which should be used
-        for each partial parse. This should be determined based on available
-        memory.
-        """
-        for results in self.parse_simple(
-            gff_files, limit_info, target_lines
-        ):
-            if base_dict is None:
-                cur_dict = dict()
-            else:
-                cur_dict = copy.deepcopy(base_dict)
-            cur_dict = self._results_to_features(cur_dict, results)
-            all_ids = list(cur_dict.keys())
-            all_ids.sort()
-            for cur_id in all_ids:
-                yield cur_dict[cur_id]
-
-    def parse_simple(
-        self, gff_files, limit_info=None, target_lines=1
-    ):
-        """Simple parse which does not build or nest features.
-
-        This returns a simple dictionary representation of each line in the
-        GFF file.
-        """
-        # gracefully handle a single file passed
-        if not isinstance(gff_files, (list, tuple)):
-            gff_files = [gff_files]
-        limit_info = self._normalize_limit_info(limit_info)
-        for results in self._gff_process(
-            gff_files, limit_info, target_lines
-        ):
-            yield results
-
-    def _normalize_limit_info(self, limit_info):
-        """Turn all limit information into tuples for identical comparisons."""
-        final_limit_info = {}
-        if limit_info:
-            for key, values in limit_info.items():
-                final_limit_info[key] = []
-                for v in values:
-                    if isinstance(v, str):
-                        final_limit_info[key].append((v,))
-                    else:
-                        final_limit_info[key].append(tuple(v))
-        return final_limit_info
-
-    def _results_to_features(self, base, results):
-        """Add parsed dictionaries of results to Biopython SeqFeatures."""
-        base = self._add_annotations(
-            base, results.get("annotation", [])
-        )
-        for feature in results.get("feature", []):
-            (_, base) = self._add_toplevel_feature(base, feature)
-        base = self._add_parent_child_features(
-            base, results.get("parent", []), results.get("child", [])
-        )
-        base = self._add_seqs(base, results.get("fasta", []))
-        base = self._add_directives(
-            base, results.get("directive", [])
-        )
-        return base
-
-    def _add_directives(self, base, directives):
-        """Handle any directives or meta-data in the GFF file.
-
-        Relevant items are added as annotation meta-data to each record.
-        """
-        dir_keyvals = collections.defaultdict(list)
-        for directive in directives:
-            parts = directive.split()
-            if len(parts) > 1:
-                key = parts[0]
-                if len(parts) == 2:
-                    val = parts[1]
-                else:
-                    val = tuple(parts[1:])
-                # specific directives that need special handling
-                if (
-                    key == "sequence-region"
-                ):  # convert to Python 0-based coordinates
-                    if len(val) == 2:  # handle regions missing contig
-                        val = (int(val[0]) - 1, int(val[1]))
-                    elif len(val) == 3:
-                        val = (val[0], int(val[1]) - 1, int(val[2]))
-                dir_keyvals[key].append(val)
-        for key, vals in dir_keyvals.items():
-            for rec in base.values():
-                self._add_ann_to_rec(rec, key, vals)
-        return base
-
-    def _get_matching_record_id(self, base, find_id):
-        """Find a matching base record with the test identifier, handling tricky cases.
-
-        NCBI IDs https://en.wikipedia.org/wiki/FASTA_format#NCBI_identifiers
-        """
-        # Straight matches for identifiers
-        if find_id in base:
-            return find_id
-        # NCBI style IDs in find_id
-        elif find_id and find_id.find("|") > 0:
-            for test_id in [
-                x.strip() for x in find_id.split("|")[1:]
-            ]:
-                if test_id and test_id in base:
-                    return test_id
-        # NCBI style IDs in base IDs
-        else:
-            for base_id in base.keys():
-                if base_id.find("|") > 0:
-                    for test_id in [
-                        x.strip() for x in base_id.split("|")[1:]
-                    ]:
-                        if test_id and test_id == find_id:
-                            return base_id
-        return None
-
-    def _add_seqs(self, base, recs):
-        """Add sequence information contained in the GFF3 to records."""
-        for rec in recs:
-            match_id = self._get_matching_record_id(base, rec.id)
-            if match_id:
-                base[match_id].seq = rec.seq
-            else:
-                base[rec.id] = rec
-        return base
-
-    def _add_parent_child_features(self, base, parents, children):
-        """Add nested features with parent child relationships."""
-        multi_remap = self._identify_dup_ids(parents)
-        # add children features
-        children_prep = collections.defaultdict(list)
-        for child_dict in children:
-            child_feature = self._get_feature(child_dict)
-            for pindex, pid in enumerate(
-                child_feature.qualifiers["Parent"]
-            ):
-                if pid in multi_remap:
-                    pid = multi_remap[pid].remap_id(child_dict)
-                    child_feature.qualifiers["Parent"][pindex] = pid
-                children_prep[pid].append(
-                    (child_dict["rec_id"], child_feature)
-                )
-        children = dict(children_prep)
-        # add children to parents that exist
-        for cur_parent_dict in parents:
-            cur_id = cur_parent_dict["id"]
-            if cur_id in multi_remap:
-                cur_parent_dict["id"] = multi_remap[cur_id].remap_id(
-                    cur_parent_dict
-                )
-            cur_parent, base = self._add_toplevel_feature(
-                base, cur_parent_dict
-            )
-            cur_parent, children = self._add_children_to_parent(
-                cur_parent, children
-            )
-        # create parents for children without them (GFF2 or split/bad files)
-        while len(children) > 0:
-            parent_id, cur_children = next(
-                itertools.islice(children.items(), 1)
-            )
-            # one child, do not nest it
-            if len(cur_children) == 1:
-                rec_id, child = cur_children[0]
-                loc = (child.location.start, child.location.end)
-                rec, base = self._get_rec(
-                    base, dict(rec_id=rec_id, location=loc)
-                )
-                rec.features.append(child)
-                del children[parent_id]
-            else:
-                cur_parent, base = self._add_missing_parent(
-                    base, parent_id, cur_children
-                )
-                cur_parent, children = self._add_children_to_parent(
-                    cur_parent, children
-                )
-        return base
-
-    def _identify_dup_ids(self, parents):
-        """Identify duplicated ID attributes in potential nested parents.
-
-        According to the GFF3 spec ID attributes are supposed to be unique
-        for a file, but this is not always true in practice. This looks
-        for duplicates, and provides unique IDs sorted by locations.
-        """
-        multi_ids = collections.defaultdict(list)
-        for parent in parents:
-            multi_ids[parent["id"]].append(parent)
-        multi_ids = [
-            (mid, ps)
-            for (mid, ps) in multi_ids.items()
-            if len(parents) > 1
-        ]
-        multi_remap = dict()
-        for mid, parents in multi_ids:
-            multi_remap[mid] = _MultiIDRemapper(mid, parents)
-        return multi_remap
-
-    def _add_children_to_parent(self, cur_parent, children):
-        """Recursively add children to parent features."""
-        if cur_parent.id in children:
-            cur_children = children[cur_parent.id]
-            ready_children = []
-            for _, cur_child in cur_children:
-                cur_child, _ = self._add_children_to_parent(
-                    cur_child, children
-                )
-                ready_children.append(cur_child)
-            # Support Biopython features for 1.62+
-            # CompoundLocations and pre-1.62
-            if not hasattr(SeqFeature, "CompoundLocation"):
-                cur_parent.location_operator = "join"
-            for cur_child in ready_children:
-                cur_parent.sub_features.append(cur_child)
-            del children[cur_parent.id]
-        return cur_parent, children
-
-    def _add_annotations(self, base, anns):
-        """Add annotation data from the GFF file to records."""
-        # add these as a list of annotations, checking not to overwrite
-        # current values
-        for ann in anns:
-            rec, base = self._get_rec(base, ann)
-            for key, vals in ann["quals"].items():
-                self._add_ann_to_rec(rec, key, vals)
-        return base
-
-    def _add_ann_to_rec(self, rec, key, vals):
-        """Add a key/value annotation to the given SeqRecord."""
-        if key in rec.annotations:
-            try:
-                rec.annotations[key].extend(vals)
-            except AttributeError:
-                rec.annotations[key] = [rec.annotations[key]] + vals
-        else:
-            rec.annotations[key] = vals
-
-    def _get_rec(self, base, info_dict):
-        """Retrieve a record to add features to."""
-        max_loc = info_dict.get("location", (0, 1))[1]
-        match_id = self._get_matching_record_id(
-            base, info_dict["rec_id"]
-        )
-        if match_id:
-            cur_rec = base[match_id]
-            # update generated unknown sequences
-            if unknown_seq_avail and isinstance(
-                cur_rec.seq, UnknownSeq
-            ):
-                cur_rec.seq._length = max(
-                    [max_loc, cur_rec.seq._length]
-                )
-            elif not unknown_seq_avail and isinstance(
-                cur_rec.seq._data, _UndefinedSequenceData
-            ):
-                cur_rec.seq._data._length = max(
-                    [max_loc, cur_rec.seq._data._length]
-                )
-            return cur_rec, base
-        elif self._create_missing:
-            if unknown_seq_avail:
-                new_rec = SeqRecord(
-                    UnknownSeq(max_loc), info_dict["rec_id"]
-                )
-            else:
-                new_rec = SeqRecord(
-                    Seq(None, length=max_loc), info_dict["rec_id"]
-                )
-            base[info_dict["rec_id"]] = new_rec
-            return new_rec, base
-        else:
-            raise KeyError(
-                "Did not find matching record in %s for %s"
-                % (base.keys(), info_dict)
-            )
-
-    def _add_missing_parent(self, base, parent_id, cur_children):
-        """Add a new feature that is missing from the GFF file."""
-        base_rec_id = list(set(c[0] for c in cur_children))
-        child_strands = list(
-            set(c[1].location.strand for c in cur_children)
-        )
-        inferred_strand = (
-            child_strands[0] if len(child_strands) == 1 else None
-        )
-        assert len(base_rec_id) > 0
-        feature_dict = dict(
-            id=parent_id,
-            strand=inferred_strand,
-            type="inferred_parent",
-            quals=dict(ID=[parent_id]),
-            rec_id=base_rec_id[0],
-        )
-        coords = [
-            (c.location.start, c.location.end)
-            for r, c in cur_children
-        ]
-        feature_dict["location"] = (
-            min([c[0] for c in coords]),
-            max([c[1] for c in coords]),
-        )
-        return self._add_toplevel_feature(base, feature_dict)
-
-    def _add_toplevel_feature(self, base, feature_dict):
-        """Add a toplevel non-nested feature to the appropriate record."""
-        new_feature = self._get_feature(feature_dict)
-        rec, base = self._get_rec(base, feature_dict)
-        rec.features.append(new_feature)
-        return new_feature, base
-
-    def _get_feature(self, feature_dict):
-        """Retrieve a Biopython feature from our dictionary representation."""
-        # location = SeqFeature.FeatureLocation(*feature_dict['location'])
-        rstart, rend = feature_dict["location"]
-        new_feature = SeqFeature.SeqFeature(
-            SeqFeature.SimpleLocation(
-                start=rstart, end=rend, strand=feature_dict["strand"]
-            ),
-            feature_dict["type"],
-            id=feature_dict["id"],
-        )
-        # Support for Biopython 1.68 and above, which removed sub_features
-        if not hasattr(new_feature, "sub_features"):
-            new_feature.sub_features = []
-        new_feature.qualifiers = feature_dict["quals"]
-        return new_feature
-
-    def _parse_fasta(self, in_handle):
-        """Parse FASTA sequence information contained in the GFF3 file."""
-        return list(SeqIO.parse(in_handle, "fasta"))
-
-
-class _GFFParserLocalOut:
-    """Provide a collector for local GFF MapReduce file parsing."""
-
-    def __init__(self, smart_breaks=False):
-        self._items = dict()
-        self._smart_breaks = smart_breaks
-        self._missing_keys = collections.defaultdict(int)
-        self._last_parent = None
-        self.can_break = True
-        self.num_lines = 0
-
-    def add(self, key, vals):
-        if self._smart_breaks:
-            # if we are not GFF2 we expect parents and break
-            # based on not having missing ones
-            if key == "directive":
-                if vals[0] == "#":
-                    self.can_break = True
-                self._last_parent = None
-            elif not vals[0].get("is_gff2", False):
-                self._update_missing_parents(key, vals)
-                self.can_break = len(self._missing_keys) == 0
-            # break when we are done with stretches of child features
-            elif key != "child":
-                self.can_break = True
-                self._last_parent = None
-            # break when we have lots of child features in a row
-            # and change between parents
-            else:
-                cur_parent = vals[0]["quals"]["Parent"][0]
-                if self._last_parent:
-                    self.can_break = cur_parent != self._last_parent
-                self._last_parent = cur_parent
-        self.num_lines += 1
-        try:
-            self._items[key].extend(vals)
-        except KeyError:
-            self._items[key] = vals
-
-    def _update_missing_parents(self, key, vals):
-        # smart way of deciding if we can break this.
-        # if this is too much, can go back to not breaking in the
-        # middle of children
-        if key in ["child"]:
-            for val in vals:
-                for p_id in val["quals"]["Parent"]:
-                    self._missing_keys[p_id] += 1
-        for val in vals:
-            try:
-                del self._missing_keys[val["quals"]["ID"][0]]
-            except KeyError:
-                pass
-
-    def has_items(self):
-        return len(self._items) > 0
-
-    def get_results(self):
-        self._last_parent = None
-        return self._items
-
-
-class GFFParser(_AbstractMapReduceGFF):
-    """Local GFF parser providing standardized parsing of GFF files."""
-
-    def __init__(self, line_adjust_fn=None, create_missing=True):
-        _AbstractMapReduceGFF.__init__(
-            self, create_missing=create_missing
-        )
-        self._line_adjust_fn = line_adjust_fn
-
-    def _gff_process(self, gff_files, limit_info, target_lines):
-        """Process GFF addition without any parallelization.
-
-        In addition to limit filtering, this accepts a target_lines attribute
-        which provides a number of lines to parse before returning results.
-        This allows partial parsing of a file to prevent memory issues.
-        """
-        line_gen = self._file_line_generator(gff_files)
-        for out in self._lines_to_out_info(
-            line_gen, limit_info, target_lines
-        ):
-            yield out
-
-    def _file_line_generator(self, gff_files):
-        """Generate single lines from a set of GFF files."""
-        for gff_file in gff_files:
-            if hasattr(gff_file, "read"):
-                need_close = False
-                in_handle = gff_file
-            else:
-                need_close = True
-                in_handle = open(gff_file)
-            while 1:
-                line = in_handle.readline()
-                if not line:
-                    break
-                yield line
-            if need_close:
-                in_handle.close()
-
-    def _lines_to_out_info(self, line_iter, limit_info=None, target_lines=None):
-        """Generate SeqRecord and SeqFeatures from GFF file lines."""
-        params = self._examiner._get_local_params(limit_info)
-        out_info = _GFFParserLocalOut(
-            (target_lines is not None and target_lines > 1)
-        )
-        found_seqs = False
-        for line in line_iter:
-            results = self._map_fn(line, params)
-            if self._line_adjust_fn and results:
-                if results[0][0] not in ["directive"]:
-                    results = [
-                        (
-                            results[0][0],
-                            self._line_adjust_fn(results[0][1]),
-                        )
-                    ]
-            self._reduce_fn(results, out_info, params)
-            if (
-                target_lines
-                and out_info.num_lines >= target_lines
-                and out_info.can_break
-            ):
-                yield out_info.get_results()
-                out_info = _GFFParserLocalOut(
-                    (target_lines is not None and target_lines > 1)
-                )
-            if (
-                results
-                and results[0][0] == "directive"
-                and results[0][1] == "FASTA"
-            ):
-                found_seqs = True
-                break
-
-        class FakeHandle:
-            def __init__(self, line_iter):
-                self._iter = line_iter
-
-            def __iter__(self):
-                return self
-
-            def __next__(self):
-                return next(self._iter)
-
-            next = __next__
-
-            def read(self, size=-1):
-                if size < 0:
-                    return "".join(x for x in self._iter)
-                elif size == 0:
-                    return ""  # Used by Biopython to sniff unicode vs bytes
-                else:
-                    raise NotImplementedError
-
-            def readline(self):
-                try:
-                    return next(self._iter)
-                except StopIteration:
-                    return ""
-
-        if found_seqs:
-            fasta_recs = self._parse_fasta(FakeHandle(line_iter))
-            out_info.add("fasta", fasta_recs)
-        if out_info.has_items():
-            yield out_info.get_results()
-
-
-class DiscoGFFParser(_AbstractMapReduceGFF):
-    """GFF Parser with parallelization  (http://discoproject.org."""
-
-    def __init__(self, disco_host, create_missing=True):
-        """Initialize parser.
-
-        disco_host - Web reference to a Disco host which will be used for
-        parallelizing the GFF reading job.
-        """
-        _AbstractMapReduceGFF.__init__(
-            self, create_missing=create_missing
-        )
-        self._disco_host = disco_host
-
-    def _gff_process(self, gff_files, limit_info, target_lines=None):
-        """Process GFF addition, using Disco to parallelize the process."""
-        assert target_lines is None, "Cannot split parallelized jobs"
-        # make these imports local; only need them when using disco
-        # absolute path names unless they are special disco files
-        full_files = []
-        for f in gff_files:
-            if f.split(":")[0] != "disco":
-                full_files.append(os.path.abspath(f))
-            else:
-                full_files.append(f)
-        results = disco.job(
-            self._disco_host,
-            name="gff_reader",
-            input=full_files,
-            params=disco.Params(
-                limit_info=limit_info,
-                jsonify=True,
-                filter_info=self._examiner._filter_info,
-            ),
-            required_modules=["json", "collections", "re"],
-            map=self._map_fn,
-            reduce=self._reduce_fn,
-        )
-        processed = dict()
-        for out_key, out_val in disco.result_iterator(results):
-            processed[out_key] = json.loads(out_val)
-        yield processed
-
-
-def parse(
-    gff_files, base_dict=None, limit_info=None, target_lines=None
-):
-    """parse GFF files into SeqRecords and SeqFeatures."""
-    parser = GFFParser()
-    for rec in parser.parse_in_parts(
-        gff_files, base_dict, limit_info, target_lines
-    ):
-        yield rec
-
-
-def parse_simple(gff_files, limit_info=None):
-    """Parse GFF files as line by line dictionary of parts."""
-    parser = GFFParser()
-    for rec in parser.parse_simple(gff_files, limit_info=limit_info):
-        if "child" in rec:
-            assert "parent" not in rec
-            yield rec["child"][0]
-        elif "parent" in rec:
-            yield rec["parent"][0]
-        elif "feature" in rec:
-            yield rec["feature"][0]
-        # ignore directive lines
-        else:
-            assert "directive" in rec
-
-
-def _file_or_handle(fn):
-    """Decorator to handle either an input handle or a file."""
-
-    def _file_or_handle_inside(*args, **kwargs):
-        in_file = args[1]
-        if hasattr(in_file, "read"):
-            need_close = False
-            in_handle = in_file
-            if six.PY3 and not isinstance(in_handle, io.TextIOBase):
-                raise TypeError(
-                    "input handle must be opened in text mode"
-                )
-        else:
-            need_close = True
-            in_handle = open(in_file)
-        args = (args[0], in_handle) + args[2:]
-        out = fn(*args, **kwargs)
-        if need_close:
-            in_handle.close()
-        return out
-
-    return _file_or_handle_inside
-
-
-class GFFExaminer:
-    """Provide high level details about a GFF file to refine parsing.
-
-    GFF is a spec and is provided by many different centers. Real life files
-    will present the same information in slightly different ways. Becoming
-    familiar with the file you are dealing with is the best way to extract the
-    information you need. This class provides high level summary details to
-    help in learning.
-    """
-
-    def __init__(self):
-        self._filter_info = dict(
-            gff_id=[0],
-            gff_source_type=[1, 2],
-            gff_source=[1],
-            gff_type=[2],
-        )
-
-    def _get_local_params(self, limit_info=None):
-        class _LocalParams:
-            def __init__(self):
-                self.jsonify = False
-
-        params = _LocalParams()
-        params.limit_info = limit_info
-        params.filter_info = self._filter_info
-        return params
-
-    @_file_or_handle
-    def available_limits(self, gff_handle):
-        """Return dictionary information on possible limits for this file.
-
-        This returns a nested dictionary with the following structure:
-
-        keys -- names of items to filter by
-        values -- dictionary with:
-            keys -- filter choice
-            value -- counts of that filter in this file
-
-        Not a parallelized map-reduce implementation.
-        """
-        cur_limits = dict()
-        for filter_key in self._filter_info.keys():
-            cur_limits[filter_key] = collections.defaultdict(int)
-        for line in gff_handle:
-            # when we hit FASTA sequences, we are done with annotations
-            if line.startswith("##FASTA"):
-                break
-            # ignore empty and comment lines
-            if line.strip() and line.strip()[0] != "#":
-                parts = [p.strip() for p in line.split("\t")]
-                assert len(parts) >= 8, line
-                parts = parts[:9]
-                for (
-                    filter_key,
-                    cur_indexes,
-                ) in self._filter_info.items():
-                    cur_id = tuple([parts[i] for i in cur_indexes])
-                    cur_limits[filter_key][cur_id] += 1
-        # get rid of the default dicts
-        final_dict = dict()
-        for key, value_dict in cur_limits.items():
-            if len(key) == 1:
-                key = key[0]
-            final_dict[key] = dict(value_dict)
-        gff_handle.close()
-        return final_dict
-
-    @_file_or_handle
-    def parent_child_map(self, gff_handle):
-        """Provide a mapping of parent to child relationships in the file.
-
-        Returns a dictionary of parent child relationships:
-
-        keys -- tuple of (source, type) for each parent
-        values -- tuple of (source, type) as children of that parent
-
-        Not a parallelized map-reduce implementation.
-        """
-        # collect all of the parent and child types mapped to IDs
-        parent_sts = dict()
-        child_sts = collections.defaultdict(list)
-        for line in gff_handle:
-            # when we hit FASTA sequences, we are done with annotations
-            if line.startswith("##FASTA"):
-                break
-            if line.strip() and not line.startswith("#"):
-                line_type, line_info = _gff_line_map(
-                    line, self._get_local_params()
-                )[0]
-                if line_type == "parent" or (
-                    line_type == "child" and line_info["id"]
-                ):
-                    parent_sts[line_info["id"]] = (
-                        line_info["quals"].get("source", [""])[0],
-                        line_info["type"],
-                    )
-                if line_type == "child":
-                    for parent_id in line_info["quals"]["Parent"]:
-                        child_sts[parent_id].append(
-                            (
-                                line_info["quals"].get(
-                                    "source", [""]
-                                )[0],
-                                line_info["type"],
-                            )
-                        )
-        # print parent_sts, child_sts
-        # generate a dictionary of the unique final type relationships
-        pc_map = collections.defaultdict(list)
-        for parent_id, parent_type in parent_sts.items():
-            for child_type in child_sts[parent_id]:
-                pc_map[parent_type].append(child_type)
-        pc_final_map = dict()
-        for ptype, ctypes in pc_map.items():
-            unique_ctypes = list(set(ctypes))
-            unique_ctypes.sort()
-            pc_final_map[ptype] = unique_ctypes
-        return pc_final_map
--- a/jb2_GFF/__init__.py	Mon Jan 29 02:34:43 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-"""Top level of GFF parsing providing shortcuts for useful classes.
-"""
-from jb2_GFF.GFFParser import GFFParser, DiscoGFFParser, GFFExaminer, parse, parse_simple
-from jb2_GFF.GFFOutput import GFF3Writer, write
-
-__version__ = "0.6.9"
Binary file jb2_GFF/__pycache__/GFFOutput.cpython-310.pyc has changed
Binary file jb2_GFF/__pycache__/GFFParser.cpython-310.pyc has changed
Binary file jb2_GFF/__pycache__/__init__.cpython-310.pyc has changed
--- a/jb2_GFF/_utils.py	Mon Jan 29 02:34:43 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-class defaultdict(dict):
-    """Back compatible defaultdict:
-    http://code.activestate.com/recipes/523034/"""
-
-    def __init__(self, default_factory=None, *a, **kw):
-        if default_factory is not None and not hasattr(
-            default_factory, "__call__"
-        ):
-            raise TypeError("first argument must be callable")
-        dict.__init__(self, *a, **kw)
-        self.default_factory = default_factory
-
-    def __getitem__(self, key):
-        try:
-            return dict.__getitem__(self, key)
-        except KeyError:
-            return self.__missing__(key)
-
-    def __missing__(self, key):
-        if self.default_factory is None:
-            raise KeyError(key)
-        self[key] = value = self.default_factory()
-        return value
-
-    def __reduce__(self):
-        if self.default_factory is None:
-            args = tuple()
-        else:
-            args = (self.default_factory,)
-        return type(self), args, None, None, self.items()
-
-    def copy(self):
-        return self.__copy__()
-
-    def __copy__(self):
-        return type(self)(self.default_factory, self)
-
-    def __deepcopy__(self, memo):
-        import copy
-
-        return type(self)(
-            self.default_factory, copy.deepcopy(self.items())
-        )
-
-    def __repr__(self):
-        return "defaultdict(%s, %s)" % (
-            self.default_factory,
-            dict.__repr__(self),
-        )
--- a/jbrowse2.py	Mon Jan 29 02:34:43 2024 +0000
+++ b/jbrowse2.py	Tue Jan 30 06:05:03 2024 +0000
@@ -3,7 +3,6 @@
 import argparse
 import binascii
 import datetime
-import hashlib
 import json
 import logging
 import os
@@ -23,7 +22,7 @@
 
 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
 GALAXY_INFRASTRUCTURE_URL = None
-JB2REL = "v2.10.0"
+JB2REL = "v2.10.1"
 # version pinned for cloning
 
 mapped_chars = {
@@ -232,7 +231,9 @@
         elif "scaling" in track:
             if track["scaling"]["method"] == "ignore":
                 if track["scaling"]["scheme"]["color"] != "__auto__":
-                    trackConfig["style"]["color"] = track["scaling"]["scheme"]["color"]
+                    trackConfig["style"]["color"] = track["scaling"]["scheme"][
+                        "color"
+                    ]
                 else:
                     trackConfig["style"]["color"] = self.hex_from_rgb(
                         *self._get_colours()
@@ -259,13 +260,18 @@
                             "blue": blue,
                         }
                     )
-                    trackConfig["style"]["color"] = color_function.replace("\n", "")
+                    trackConfig["style"]["color"] = color_function.replace(
+                        "\n", ""
+                    )
                 elif trackFormat == "gene_calls":
                     # Default values, based on GFF3 spec
                     min_val = 0
                     max_val = 1000
                     # Get min/max and build a scoring function since JBrowse doesn't
-                    if scales["type"] == "automatic" or scales["type"] == "__auto__":
+                    if (
+                        scales["type"] == "automatic"
+                        or scales["type"] == "__auto__"
+                    ):
                         min_val, max_val = self.min_max_gff(gff3)
                     else:
                         min_val = scales.get("min", 0)
@@ -273,7 +279,9 @@
 
                     if scheme["color"] == "__auto__":
                         user_color = "undefined"
-                        auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
+                        auto_color = "'%s'" % self.hex_from_rgb(
+                            *self._get_colours()
+                        )
                     elif scheme["color"].startswith("#"):
                         user_color = "'%s'" % self.hex_from_rgb(
                             *self.rgb_from_hex(scheme["color"][1:])
@@ -281,7 +289,9 @@
                         auto_color = "undefined"
                     else:
                         user_color = "undefined"
-                        auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours())
+                        auto_color = "'%s'" % self.hex_from_rgb(
+                            *self._get_colours()
+                        )
 
                     color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(
                         **{
@@ -293,7 +303,9 @@
                         }
                     )
 
-                    trackConfig["style"]["color"] = color_function.replace("\n", "")
+                    trackConfig["style"]["color"] = color_function.replace(
+                        "\n", ""
+                    )
         return trackConfig
 
 
@@ -336,40 +348,41 @@
     for (key, value) in node.findall("dataset")[0].attrib.items():
         metadata["dataset_%s" % key] = value
 
-    for (key, value) in node.findall("history")[0].attrib.items():
-        metadata["history_%s" % key] = value
-
-    for (key, value) in node.findall("metadata")[0].attrib.items():
-        metadata["metadata_%s" % key] = value
-
-    for (key, value) in node.findall("tool")[0].attrib.items():
-        metadata["tool_%s" % key] = value
+    if node.findall("history"):
+        for (key, value) in node.findall("history")[0].attrib.items():
+            metadata["history_%s" % key] = value
 
-    # Additional Mappings applied:
-    metadata[
-        "dataset_edam_format"
-    ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(
-        metadata["dataset_edam_format"], metadata["dataset_file_ext"]
-    )
-    metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format(
-        metadata["history_user_email"]
-    )
-    metadata["hist_name"] = metadata["history_display_name"]
-    metadata[
-        "history_display_name"
-    ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
-        galaxy=GALAXY_INFRASTRUCTURE_URL,
-        encoded_hist_id=metadata["history_id"],
-        hist_name=metadata["history_display_name"],
-    )
-    metadata[
-        "tool_tool"
-    ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format(
-        galaxy=GALAXY_INFRASTRUCTURE_URL,
-        encoded_id=metadata["dataset_id"],
-        tool_id=metadata["tool_tool_id"],
-        # tool_version=metadata['tool_tool_version'],
-    )
+    if node.findall("metadata"):
+        for (key, value) in node.findall("metadata")[0].attrib.items():
+            metadata["metadata_%s" % key] = value
+            # Additional Mappings applied:
+            metadata[
+                "dataset_edam_format"
+            ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(
+                metadata["dataset_edam_format"], metadata["dataset_file_ext"]
+            )
+            metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format(
+                metadata["history_user_email"]
+            )
+            metadata["hist_name"] = metadata["history_display_name"]
+            metadata[
+                "history_display_name"
+            ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
+                galaxy=GALAXY_INFRASTRUCTURE_URL,
+                encoded_hist_id=metadata["history_id"],
+                hist_name=metadata["history_display_name"],
+            )
+    if node.findall("tool"):
+        for (key, value) in node.findall("tool")[0].attrib.items():
+            metadata["tool_%s" % key] = value
+        metadata[
+            "tool_tool"
+        ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}{tool_version}</a>'.format(
+            galaxy=GALAXY_INFRASTRUCTURE_URL,
+            encoded_id=metadata.get("dataset_id", ""),
+            tool_id=metadata.get("tool_tool_id", ""),
+            tool_version=metadata.get("tool_tool_version",""),
+        )
     return metadata
 
 
@@ -389,7 +402,9 @@
 
     def subprocess_check_call(self, command, output=None):
         if output:
-            log.debug("cd %s && %s >  %s", self.outdir, " ".join(command), output)
+            log.debug(
+                "cd %s && %s >  %s", self.outdir, " ".join(command), output
+            )
             subprocess.check_call(command, cwd=self.outdir, stdout=output)
         else:
             log.debug("cd %s && %s", self.outdir, " ".join(command))
@@ -468,13 +483,8 @@
             self.config_json["assemblies"] = assemblies
 
     def make_assembly(self, fapath, gname):
-        hashData = [
-            fapath,
-            gname,
-        ]
-        hashData = "|".join(hashData).encode("utf-8")
-        ghash = hashlib.md5(hashData).hexdigest()
-        faname = ghash + ".fa.gz"
+
+        faname = gname + ".fa.gz"
         fadest = os.path.join(self.outdir, faname)
         cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % (
             fapath,
@@ -495,6 +505,7 @@
                 "uri": faname + ".gzi",
             },
         }
+        self.genome_sequence_adapter = adapter
         trackDict = {
             "name": gname,
             "sequence": {
@@ -604,7 +615,7 @@
             "plugins": [
                 {
                     "name": "MafViewer",
-                    "url": "https://unpkg.com/browse/jbrowse-plugin-mafviewer@1.0.6/dist/jbrowse-plugin-mafviewer.umd.production.min.js",
+                    "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js"
                 }
             ]
         }
@@ -623,7 +634,9 @@
         self.subprocess_check_call(cmd)
         # Construct samples list
         # We could get this from galaxy metadata, not sure how easily.
-        ps = subprocess.Popen(["grep", "^s [^ ]*", "-o", data], stdout=subprocess.PIPE)
+        ps = subprocess.Popen(
+            ["grep", "^s [^ ]*", "-o", data], stdout=subprocess.PIPE
+        )
         output = subprocess.check_output(("sort", "-u"), stdin=ps.stdout)
         ps.wait()
         outp = output.decode("ascii")
@@ -783,7 +796,9 @@
         url = fname
         self.subprocess_check_call(["cp", data, dest])
         bloc = {"uri": url}
-        if bam_index is not None and os.path.exists(os.path.realpath(bam_index)):
+        if bam_index is not None and os.path.exists(
+            os.path.realpath(bam_index)
+        ):
             # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
             self.subprocess_check_call(
                 ["cp", os.path.realpath(bam_index), dest + ".bai"]
@@ -794,7 +809,9 @@
             #      => no index generated by galaxy, but there might be one next to the symlink target
             #      this trick allows to skip the bam sorting made by galaxy if already done outside
             if os.path.exists(os.path.realpath(data) + ".bai"):
-                self.symlink_or_copy(os.path.realpath(data) + ".bai", dest + ".bai")
+                self.symlink_or_copy(
+                    os.path.realpath(data) + ".bai", dest + ".bai"
+                )
             else:
                 log.warn("Could not find a bam index (.bai file) for %s", data)
         trackDict = {
@@ -823,12 +840,62 @@
         self.tracksToAdd.append(trackDict)
         self.trackIdlist.append(tId)
 
+    def add_cram(self, data, trackData, cramOpts, cram_index=None, **kwargs):
+        tId = trackData["label"]
+        fname = "%s.cram" % trackData["label"]
+        dest = "%s/%s" % (self.outdir, fname)
+        url = fname
+        self.subprocess_check_call(["cp", data, dest])
+        bloc = {"uri": url}
+        if cram_index is not None and os.path.exists(
+            os.path.realpath(cram_index)
+        ):
+            # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
+            self.subprocess_check_call(
+                ["cp", os.path.realpath(cram_index), dest + ".crai"]
+            )
+        else:
+            # Can happen in exotic condition
+            # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
+            #      => no index generated by galaxy, but there might be one next to the symlink target
+            #      this trick allows to skip the bam sorting made by galaxy if already done outside
+            if os.path.exists(os.path.realpath(data) + ".crai"):
+                self.symlink_or_copy(
+                    os.path.realpath(data) + ".crai", dest + ".crai"
+                )
+            else:
+                log.warn(
+                    "Could not find a cram index (.crai file) for %s", data
+                )
+        trackDict = {
+            "type": "AlignmentsTrack",
+            "trackId": tId,
+            "name": trackData["name"],
+            "assemblyNames": [self.genome_name],
+            "adapter": {
+                "type": "CramAdapter",
+                "cramLocation": bloc,
+                "craiLocation": {"uri": fname + ".crai",},
+                "sequenceAdapter": self.genome_sequence_adapter,
+                },
+            "displays": [
+                {
+                    "type": "LinearAlignmentsDisplay",
+                    "displayId": "%s-LinearAlignmentsDisplay" % tId,
+                },
+            ],
+        }
+        style_json = self._prepare_track_style(trackDict)
+        trackDict["style"] = style_json
+        self.tracksToAdd.append(trackDict)
+        self.trackIdlist.append(tId)
+
     def add_vcf(self, data, trackData):
         tId = trackData["label"]
-        url = "%s/api/datasets/%s/display" % (
-            self.giURL,
-            trackData["metadata"]["dataset_id"],
-        )
+        # url = "%s/api/datasets/%s/display" % (
+        # self.giURL,
+        # trackData["metadata"]["dataset_id"],
+        # )
         url = "%s.vcf.gz" % tId
         dest = "%s/%s" % (self.outdir, url)
         cmd = "bgzip -c %s  > %s" % (data, dest)
@@ -879,7 +946,9 @@
                 dest,
             )  # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'"
             self.subprocess_popen(cmd)
-            self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"])
+            self.subprocess_check_call(
+                ["tabix", "-f", "-p", "gff", dest + ".gz"]
+            )
 
     def _sort_bed(self, data, dest):
         # Only index if not already done
@@ -1085,28 +1154,12 @@
 
             outputTrackConfig["key"] = track_human_label
 
-            # We add extra data to hash for the case of REST + SPARQL.
-            if (
-                "conf" in track
-                and "options" in track["conf"]
-                and "url" in track["conf"]["options"]
-            ):
-                rest_url = track["conf"]["options"]["url"]
-            else:
-                rest_url = ""
             outputTrackConfig["trackset"] = track.get("trackset", {})
-            # I chose to use track['category'] instead of 'category' here. This
-            # is intentional. This way re-running the tool on a different date
-            # will not generate different hashes and make comparison of outputs
-            # much simpler.
-            hashData = [
-                str(dataset_path),
+            outputTrackConfig["label"] = "%s_%i_%s" % (
+                dataset_ext,
+                i,
                 track_human_label,
-                track["category"],
-                rest_url,
-            ]
-            hashData = "|".join(hashData).encode("utf-8")
-            outputTrackConfig["label"] = hashlib.md5(hashData).hexdigest() + "_%s" % i
+            )
             outputTrackConfig["metadata"] = extra_metadata
             outputTrackConfig["name"] = track_human_label
 
@@ -1138,17 +1191,10 @@
                     outputTrackConfig,
                 )
             elif dataset_ext == "bam":
-                real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][
-                    "bam_index"
-                ]
+                real_indexes = track["conf"]["options"]["pileup"][
+                    "bam_indices"
+                ]["bam_index"]
                 if not isinstance(real_indexes, list):
-                    # <bam_indices>
-                    #  <bam_index>/path/to/a.bam.bai</bam_index>
-                    # </bam_indices>
-                    #
-                    # The above will result in the 'bam_index' key containing a
-                    # string. If there are two or more indices, the container
-                    # becomes a list. Fun!
                     real_indexes = [real_indexes]
 
                 self.add_bam(
@@ -1157,6 +1203,19 @@
                     track["conf"]["options"]["pileup"],
                     bam_index=real_indexes[i],
                 )
+            elif dataset_ext == "cram":
+                real_indexes = track["conf"]["options"]["cram"][
+                    "cram_indices"
+                ]["cram_index"]
+                if not isinstance(real_indexes, list):
+                    real_indexes = [real_indexes]
+
+                self.add_cram(
+                    dataset_path,
+                    outputTrackConfig,
+                    track["conf"]["options"]["cram"],
+                    cram_index=real_indexes[i],
+                )
             elif dataset_ext == "blastxml":
                 self.add_blastxml(
                     dataset_path,
@@ -1290,14 +1349,18 @@
             config_json.update(self.config_json)
         config_data = {}
 
-        config_data["disableAnalytics"] = data.get("analytics", "false") == "true"
+        config_data["disableAnalytics"] = (
+            data.get("analytics", "false") == "true"
+        )
 
         config_data["theme"] = {
             "palette": {
                 "primary": {"main": data.get("primary_color", "#0D233F")},
                 "secondary": {"main": data.get("secondary_color", "#721E63")},
                 "tertiary": {"main": data.get("tertiary_color", "#135560")},
-                "quaternary": {"main": data.get("quaternary_color", "#FFB11D")},
+                "quaternary": {
+                    "main": data.get("quaternary_color", "#FFB11D")
+                },
             },
             "typography": {"fontSize": int(data.get("font_size", 10))},
         }
@@ -1351,9 +1414,10 @@
     parser = argparse.ArgumentParser(description="", epilog="")
     parser.add_argument("--xml", help="Track Configuration")
     parser.add_argument("--outdir", help="Output directory", default="out")
-    parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1")
+    parser.add_argument(
+        "--version", "-V", action="version", version="%(prog)s 2.0.1"
+    )
     args = parser.parse_args()
-
     tree = ET.parse(args.xml)
     root = tree.getroot()
 
@@ -1448,7 +1512,8 @@
         track_conf["format"] = track.attrib["format"]
         if track.find("options/style"):
             track_conf["style"] = {
-                item.tag: parse_style_conf(item) for item in track.find("options/style")
+                item.tag: parse_style_conf(item)
+                for item in track.find("options/style")
             }
         if track.find("options/style_labels"):
             track_conf["style_labels"] = {
@@ -1461,7 +1526,9 @@
         track_conf["format"] = track.attrib["format"]
         try:
             # Only pertains to gff3 + blastxml. TODO?
-            track_conf["style"] = {t.tag: t.text for t in track.find("options/style")}
+            track_conf["style"] = {
+                t.tag: t.text for t in track.find("options/style")
+            }
         except TypeError:
             track_conf["style"] = {}
             pass
@@ -1492,7 +1559,9 @@
         "primary_color": root.find("metadata/general/primary_color").text,
         "secondary_color": root.find("metadata/general/secondary_color").text,
         "tertiary_color": root.find("metadata/general/tertiary_color").text,
-        "quaternary_color": root.find("metadata/general/quaternary_color").text,
+        "quaternary_color": root.find(
+            "metadata/general/quaternary_color"
+        ).text,
         "font_size": root.find("metadata/general/font_size").text,
     }
     jc.add_general_configuration(general_data)
--- a/jbrowse2.xml	Mon Jan 29 02:34:43 2024 +0000
+++ b/jbrowse2.xml	Tue Jan 30 06:05:03 2024 +0000
@@ -1,4 +1,4 @@
- <tool id="jbrowse2" name="jbrowse2" version="@TOOL_VERSION@+@WRAPPER_VERSION@_2" profile="22.05">
+ <tool id="jbrowse2" name="jbrowse2" version="@TOOL_VERSION@+@WRAPPER_VERSION@_3" profile="22.05">
     <description>genome browser</description>
     <macros>
         <import>macros.xml</import>
@@ -31,17 +31,20 @@
     <metadata>
         <genomes>
             #if str($reference_genome.genome_type_select) == "indexed":
-              <genome path="${reference_genome.genomes.fields.path}" label="${reference_genome.genomes.fields.name}">
-                  <metadata />
+              <genome path="${reference_genome.genome.fields.path}" label="${reference_genome.genome.fields.name}">
+                  <metadata>
+                     <dataset
+                      dname = "${reference_genome.genome.name}" />
+                  </metadata>
               </genome>
             #else
-              <genome path="$reference_genome.genome" label="${reference_genome.genome.element_identifier}">
+              <genome path="$reference_genome.genome" label="${reference_genome.genome.name}">
                 <metadata>
                   <dataset id="${__app__.security.encode_id($reference_genome.genome.id)}" hid="${reference_genome.genome.hid}"
                       size="${reference_genome.genome.get_size(nice_size=True)}"
                       edam_format="${reference_genome.genome.datatype.edam_format}"
                       file_ext="${reference_genome.genome.ext}"
-                      dname = "${reference_genome.genome.element_identifier}" />
+                      dname = "${reference_genome.genome.name}" />
                   <history id="${__app__.security.encode_id($reference_genome.genome.history_id)}"
                       #if $reference_genome.genome.history.user:
                       user_email="${reference_genome.genome.history.user.email}"
@@ -165,7 +168,7 @@
             #else if str($track.data_format.data_format_select) == "synteny":
                 <synteny>
                     <genome>${track.data_format.synteny_genome}</genome>
-                    <genome_label>${track.data_format.synteny_genome.element_identifier}</genome_label>
+                    <genome_label>${track.data_format.synteny_genome.name}</genome_label>
                 </synteny>
             #else if str($track.data_format.data_format_select) == "hic":
                 <hic>
@@ -227,15 +230,15 @@
                     <param type="select" label="Track Type" name="data_format_select">
                         <option value="pileup">BAM Pileup track</option>
                         <option value="wiggle">BigWig track</option>
-                        <!-- <option value="blast">Blast XML track - converted to GFF</option> -->
+                        <option value="blast">Blast XML track - converted to GFF</option>
                         <option value="cram">CRAM</option>
                         <option value="gene_calls" selected="true">GFF/GFF3/BED feature track</option>
                         <option value="hic">HiC (compressed binary) track. Existing cool format must be converted to binary hic - hic_matrix will NOT work.</option>
+                        <option value="maf">Multiple alignment format track. Reference name must match the MAF name exactly to work correctly</option>
                         <option value="sparql">SPARQL</option>
                         <option value="synteny">Synteny track with PAF data</option>
                        <option value="vcf">VCF SNP annotation</option>
                     </param>
-                    <!--
                     <when value="blast">
                         <expand macro="input_conditional" label="BlastXML Track Data" format="blastxml" />
 
@@ -258,7 +261,7 @@
                             truevalue="true"
                             falsevalue="false" />
                         <expand macro="track_visibility" />
-                    </when> -->
+                    </when>
                     <when value="vcf">
                         <expand macro="input_conditional" label="SNP Track Data" format="vcf,vcf_bgzip" />
                         <expand macro="track_styling_vcf"/>
@@ -291,24 +294,24 @@
                         <expand macro="input_conditional" label="CRAM Track Data" format="cram" />
                         <expand macro="track_visibility" />
                     </when>
+                    <when value="maf">
+                        <expand macro="input_conditional" label="MAF Track Data" format="maf" />
+                        <expand macro="track_visibility" />
+                    </when>
                     <when value="wiggle">
                         <expand macro="input_conditional" label="BigWig Track Data" format="bigwig" />
                         <expand macro="track_visibility" />
-
                     </when>
-
                     <when value="synteny">
                         <param label="Comparison genome sequence" help="Paf must use this as the reference to map the real reference sequence"
                             format="fasta"
                             name="synteny_genome"
                             type="data" />
-
                         <expand macro="input_conditional" label="Synteny data" format="paf" help="Make paf with minimap2 mapping real reference onto desired syntenic reference"/>
                         <expand macro="track_visibility" />
                     </when>
 
                     <when value="hic">
-                        <!-- TODO no hic datatype by default, but input for hicConvertFormat? hic_matrix datatype on .eu -->
                         <expand macro="input_conditional" label="HiC data" format="hic" />
                         <expand macro="track_visibility" />
                     </when>
@@ -371,7 +374,7 @@
         <param type="hidden" name="uglyTestingHack" value="" />
     </inputs>
     <outputs>
-        <data format="html" name="output" label="JBrowse2 on $reference_genome.genome.element_identifier"/>
+        <data format="html" name="output" label="JBrowse2 on $reference_genome.genome.name"/>
     </outputs>
     <tests>
          <test>
--- a/macros.xml	Mon Jan 29 02:34:43 2024 +0000
+++ b/macros.xml	Tue Jan 30 06:05:03 2024 +0000
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">2.10.0</token>
+    <token name="@TOOL_VERSION@">2.10.1</token>
     <xml name = "edamInc">
         <edam_topics>
             <edam_topic>topic_3307</edam_topic>
@@ -14,8 +14,8 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">jbrowse2</requirement>
-            <requirement type="package" version="1.82">biopython</requirement>
-            <requirement type="package" version="0.7.0">bcbio-gff</requirement>
+            <requirement type="package" version="1.81">biopython</requirement>
+            <requirement type="package" version="0.7.1">bcbio-gff</requirement>
             <requirement type="package" version="1.19">samtools</requirement>
             <requirement type="package" version="6.0.1">pyyaml</requirement>
             <requirement type="package" version="1.11">tabix</requirement>