Repository 'jbrowse2'
hg clone https://toolshed.g2.bx.psu.edu/repos/fubar/jbrowse2

Changeset 17:4c201a3d4755 (2024-01-28)
Previous changeset 16:1fe91657bfd6 (2024-01-25) Next changeset 18:2e6c48910819 (2024-01-29)
Commit message:
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit a37bfdfc108501b11c7b2aa15efb1bd16f0c4b66
modified:
blastxml_to_gapped_gff3.py
gff3_rebase.py
jb2_webserver.py
jbrowse2.py
jbrowse2.xml
macros.xml
readme.rst
test-data/merlin.fa
added:
Dockerfile
GFFOutput.py
jb2_GFF/GFFOutput.py
jb2_GFF/GFFParser.py
jb2_GFF/__init__.py
jb2_GFF/__pycache__/GFFOutput.cpython-310.pyc
jb2_GFF/__pycache__/GFFParser.cpython-310.pyc
jb2_GFF/__pycache__/__init__.cpython-310.pyc
jb2_GFF/_utils.py
removed:
plants.sh
static/images/bam.png
static/images/bigwig.png
static/images/blast.png
static/images/opacity.png
static/images/sections.png
static/images/styling.png
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 Dockerfile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Dockerfile Sun Jan 28 06:48:52 2024 +0000
b
@@ -0,0 +1,3 @@
+FROM quay.io/bioconda/base-glibc-busybox-bash:3.0
+
+RUN adduser -u 1000 user1000 -D && adduser -u 1001 user1001 -D
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 GFFOutput.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GFFOutput.py Sun Jan 28 06:48:52 2024 +0000
[
@@ -0,0 +1,213 @@
+"""Output Biopython SeqRecords and SeqFeatures to GFF3 format.
+
+The target format is GFF3, the current GFF standard:
+    http://www.sequenceontology.org/gff3.shtml
+"""
+from six.moves import urllib
+
+from Bio import SeqIO
+
+
+class _IdHandler:
+    """Generate IDs for GFF3 Parent/Child
+    relationships where they don't exist."""
+
+    def __init__(self):
+        self._prefix = "biopygen"
+        self._counter = 1
+        self._seen_ids = []
+
+    def _generate_id(self, quals):
+        """Generate a unique ID not present in our existing IDs."""
+        gen_id = self._get_standard_id(quals)
+        if gen_id is None:
+            while 1:
+                gen_id = "%s%s" % (self._prefix, self._counter)
+                if gen_id not in self._seen_ids:
+                    break
+                self._counter += 1
+        return gen_id
+
+    def _get_standard_id(self, quals):
+        """Retrieve standardized IDs from other sources like NCBI GenBank.
+
+        This tries to find IDs from known key/values when stored differently
+        than GFF3 specifications.
+        """
+        possible_keys = ["transcript_id", "protein_id"]
+        for test_key in possible_keys:
+            if test_key in quals:
+                cur_id = quals[test_key]
+                if isinstance(cur_id, tuple) or isinstance(cur_id, list):
+                    return cur_id[0]
+                else:
+                    return cur_id
+        return None
+
+    def update_quals(self, quals, has_children):
+        """Update a set of qualifiers, adding an ID if necessary."""
+        cur_id = quals.get("ID", None)
+        # if we have an ID, record it
+        if cur_id:
+            if not isinstance(cur_id, list) and not isinstance(cur_id, tuple):
+                cur_id = [cur_id]
+            for add_id in cur_id:
+                self._seen_ids.append(add_id)
+        # if we need one and don't have it, create a new one
+        elif has_children:
+            new_id = self._generate_id(quals)
+            self._seen_ids.append(new_id)
+            quals["ID"] = [new_id]
+        return quals
+
+
+class GFF3Writer:
+    """Write GFF3 files starting with standard Biopython objects."""
+
+    def __init__(self):
+        pass
+
+    def write(self, recs, out_handle, include_fasta=False):
+        """Write the provided records to the given handle in GFF3 format."""
+        id_handler = _IdHandler()
+        self._write_header(out_handle)
+        fasta_recs = []
+        try:
+            recs = iter(recs)
+        except TypeError:
+            recs = [recs]
+        for rec in recs:
+            self._write_rec(rec, out_handle)
+            self._write_annotations(rec.annotations, rec.id, len(rec.seq), out_handle)
+            for sf in rec.features:
+                sf = self._clean_feature(sf)
+                id_handler = self._write_feature(sf, rec.id, out_handle, id_handler)
+            if include_fasta and len(rec.seq) > 0:
+                fasta_recs.append(rec)
+        if len(fasta_recs) > 0:
+            self._write_fasta(fasta_recs, out_handle)
+
+    def _clean_feature(self, feature):
+        quals = {}
+        for key, val in feature.qualifiers.items():
+            if not isinstance(val, (list, tuple)):
+                val = [val]
+            val = [str(x) for x in val]
+            quals[key] = val
+        feature.qualifiers = quals
+        # Support for Biopython 1.68 and above, which removed sub_features
+        if not hasattr(feature, "sub_features"):
+            feature.sub_features = []
+        clean_sub = [self._clean_feature(f) for f in feature.sub_features]
+        feature.sub_features = clean_sub
+        return feature
+
+    def _write_rec(self, rec, out_handle):
+        # if we have a SeqRecord, write out optional directive
+        if len(rec.seq) > 0:
+            out_handle.write("##sequence-region %s 1 %s\n" % (rec.id, len(rec.seq)))
+
+    def _get_phase(self, feature):
+        if "phase" in feature.qualifiers:
+            phase = feature.qualifiers["phase"][0]
+        elif feature.type == "CDS":
+            phase = int(feature.qualifiers.get("codon_start", [1])[0]) - 1
+        else:
+            phase = "."
+        return str(phase)
+
+    def _write_feature(self, feature, rec_id, out_handle, id_handler, parent_id=None):
+        """Write a feature with location information."""
+        if feature.location.strand == 1:
+            strand = "+"
+        elif feature.location.strand == -1:
+            strand = "-"
+        else:
+            strand = "."
+        # remove any standard features from the qualifiers
+        quals = feature.qualifiers.copy()
+        for std_qual in ["source", "score", "phase"]:
+            if std_qual in quals and len(quals[std_qual]) == 1:
+                del quals[std_qual]
+        # add a link to a parent identifier if it exists
+        if parent_id:
+            if "Parent" not in quals:
+                quals["Parent"] = []
+            quals["Parent"].append(parent_id)
+        quals = id_handler.update_quals(quals, len(feature.sub_features) > 0)
+        if feature.type:
+            ftype = feature.type
+        else:
+            ftype = "sequence_feature"
+        parts = [
+            str(rec_id),
+            feature.qualifiers.get("source", ["feature"])[0],
+            ftype,
+            str(feature.location.start + 1),  # 1-based indexing
+            str(feature.location.end),
+            feature.qualifiers.get("score", ["."])[0],
+            strand,
+            self._get_phase(feature),
+            self._format_keyvals(quals),
+        ]
+        out_handle.write("\t".join(parts) + "\n")
+        for sub_feature in feature.sub_features:
+            id_handler = self._write_feature(
+                sub_feature,
+                rec_id,
+                out_handle,
+                id_handler,
+                quals["ID"][0],
+            )
+        return id_handler
+
+    def _format_keyvals(self, keyvals):
+        format_kvs = []
+        for key in sorted(keyvals.keys()):
+            values = keyvals[key]
+            key = key.strip()
+            format_vals = []
+            if not isinstance(values, list) or isinstance(values, tuple):
+                values = [values]
+            for val in values:
+                val = urllib.parse.quote(str(val).strip(), safe=":/ ")
+                if (key and val) and val not in format_vals:
+                    format_vals.append(val)
+            format_kvs.append("%s=%s" % (key, ",".join(format_vals)))
+        return ";".join(format_kvs)
+
+    def _write_annotations(self, anns, rec_id, size, out_handle):
+        """Add annotations which refer to an entire sequence."""
+        format_anns = self._format_keyvals(anns)
+        if format_anns:
+            parts = [
+                rec_id,
+                "annotation",
+                "remark",
+                "1",
+                str(size if size > 1 else 1),
+                ".",
+                ".",
+                ".",
+                format_anns,
+            ]
+            out_handle.write("\t".join(parts) + "\n")
+
+    def _write_header(self, out_handle):
+        """Write out standard header directives."""
+        out_handle.write("##gff-version 3\n")
+
+    def _write_fasta(self, recs, out_handle):
+        """Write sequence records using the ##FASTA directive."""
+        out_handle.write("##FASTA\n")
+        SeqIO.write(recs, out_handle, "fasta")
+
+
+def write(recs, out_handle, include_fasta=False):
+    """High level interface to write GFF3 files from SeqRecords and SeqFeatures.
+
+    If include_fasta is True, the GFF3 file will include sequence information
+    using the ##FASTA directive.
+    """
+    writer = GFF3Writer()
+    return writer.write(recs, out_handle, include_fasta)
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 blastxml_to_gapped_gff3.py
--- a/blastxml_to_gapped_gff3.py Thu Jan 25 07:58:28 2024 +0000
+++ b/blastxml_to_gapped_gff3.py Sun Jan 28 06:48:52 2024 +0000
[
@@ -32,7 +32,7 @@
 
         recid = record.query
         if " " in recid:
-            recid = recid[0: recid.index(" ")]
+            recid = recid[0 : recid.index(" ")]
 
         rec = SeqRecord(Seq("ACTG"), id=recid)
         for idx_hit, hit in enumerate(record.alignments):
@@ -72,7 +72,7 @@
                     qualifiers["blast_" + prop] = getattr(hsp, prop, None)
 
                 desc = hit.title.split(" >")[0]
-                qualifiers["description"] = desc[desc.index(" "):]
+                qualifiers["description"] = desc[desc.index(" ") :]
 
                 # This required a fair bit of sketching out/match to figure out
                 # the first time.
@@ -161,9 +161,9 @@
     fm = ""
     fs = ""
     for position in re.finditer("-", query):
-        fq += query[prev: position.start()]
-        fm += match[prev: position.start()]
-        fs += subject[prev: position.start()]
+        fq += query[prev : position.start()]
+        fm += match[prev : position.start()]
+        fs += subject[prev : position.start()]
         prev = position.start() + 1
     fq += query[prev:]
     fm += match[prev:]
@@ -290,7 +290,9 @@
         help="Trim blast hits to be only as long as the parent feature",
     )
     parser.add_argument(
-        "--trim_end", action="store_true", help="Cut blast results off at end of gene"
+        "--trim_end",
+        action="store_true",
+        help="Cut blast results off at end of gene",
     )
     parser.add_argument("--include_seq", action="store_true", help="Include sequence")
     args = parser.parse_args()
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 gff3_rebase.py
--- a/gff3_rebase.py Thu Jan 25 07:58:28 2024 +0000
+++ b/gff3_rebase.py Sun Jan 28 06:48:52 2024 +0000
b
@@ -65,7 +65,10 @@
 
         if hasattr(feature, "sub_features"):
             for x in feature_lambda(
-                feature.sub_features, test, test_kwargs, subfeatures=subfeatures
+                feature.sub_features,
+                test,
+                test_kwargs,
+                subfeatures=subfeatures,
             ):
                 yield x
 
@@ -197,7 +200,9 @@
         help="Child GFF3 annotations to rebase against parent",
     )
     parser.add_argument(
-        "--interpro", action="store_true", help="Interpro specific modifications"
+        "--interpro",
+        action="store_true",
+        help="Interpro specific modifications",
     )
     parser.add_argument(
         "--protein2dna",
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/GFFOutput.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jb2_GFF/GFFOutput.py Sun Jan 28 06:48:52 2024 +0000
[
b'@@ -0,0 +1,233 @@\n+"""Output Biopython SeqRecords and SeqFeatures to GFF3 format.\n+\n+The target format is GFF3, the current GFF standard:\n+    http://www.sequenceontology.org/gff3.shtml\n+"""\n+from six.moves import urllib\n+\n+from Bio import SeqIO\n+\n+\n+class _IdHandler:\n+    """Generate IDs for GFF3 Parent/Child\n+    relationships where they don\'t exist."""\n+\n+    def __init__(self):\n+        self._prefix = "biopygen"\n+        self._counter = 1\n+        self._seen_ids = []\n+\n+    def _generate_id(self, quals):\n+        """Generate a unique ID not present in our existing IDs."""\n+        gen_id = self._get_standard_id(quals)\n+        if gen_id is None:\n+            while 1:\n+                gen_id = "%s%s" % (self._prefix, self._counter)\n+                if gen_id not in self._seen_ids:\n+                    break\n+                self._counter += 1\n+        return gen_id\n+\n+    def _get_standard_id(self, quals):\n+        """Retrieve standardized IDs from other sources like NCBI GenBank.\n+\n+        This tries to find IDs from known key/values when stored differently\n+        than GFF3 specifications.\n+        """\n+        possible_keys = ["transcript_id", "protein_id"]\n+        for test_key in possible_keys:\n+            if test_key in quals:\n+                cur_id = quals[test_key]\n+                if isinstance(cur_id, tuple) or isinstance(\n+                    cur_id, list\n+                ):\n+                    return cur_id[0]\n+                else:\n+                    return cur_id\n+        return None\n+\n+    def update_quals(self, quals, has_children):\n+        """Update a set of qualifiers, adding an ID if necessary."""\n+        cur_id = quals.get("ID", None)\n+        # if we have an ID, record it\n+        if cur_id:\n+            if not isinstance(cur_id, list) and not isinstance(\n+                cur_id, tuple\n+            ):\n+                cur_id = [cur_id]\n+            for add_id in cur_id:\n+                self._seen_ids.append(add_id)\n+        # if we need one and don\'t have it, create a new one\n+        elif has_children:\n+            new_id = self._generate_id(quals)\n+            self._seen_ids.append(new_id)\n+            quals["ID"] = [new_id]\n+        return quals\n+\n+\n+class GFF3Writer:\n+    """Write GFF3 files starting with standard Biopython objects."""\n+\n+    def __init__(self):\n+        pass\n+\n+    def write(self, recs, out_handle, include_fasta=False):\n+        """Write the provided records to the given handle in GFF3 format."""\n+        id_handler = _IdHandler()\n+        self._write_header(out_handle)\n+        fasta_recs = []\n+        try:\n+            recs = iter(recs)\n+        except TypeError:\n+            recs = [recs]\n+        for rec in recs:\n+            self._write_rec(rec, out_handle)\n+            self._write_annotations(\n+                rec.annotations, rec.id, len(rec.seq), out_handle\n+            )\n+            for sf in rec.features:\n+                sf = self._clean_feature(sf)\n+                id_handler = self._write_feature(\n+                    sf, rec.id, out_handle, id_handler\n+                )\n+            if include_fasta and len(rec.seq) > 0:\n+                fasta_recs.append(rec)\n+        if len(fasta_recs) > 0:\n+            self._write_fasta(fasta_recs, out_handle)\n+\n+    def _clean_feature(self, feature):\n+        quals = {}\n+        for key, val in feature.qualifiers.items():\n+            if not isinstance(val, (list, tuple)):\n+                val = [val]\n+            val = [str(x) for x in val]\n+            quals[key] = val\n+        feature.qualifiers = quals\n+        # Support for Biopython 1.68 and above, which removed sub_features\n+        if not hasattr(feature, "sub_features"):\n+            feature.sub_features = []\n+        clean_sub = [\n+            self._clean_feature(f) for f in feature.sub_features\n+        ]\n+        feature.sub_features = clean_sub\n+        return feature\n+\n+    def _write_rec(self, rec, out_handle):\n+        # if we have a SeqRecord, write ou'..b' feature):\n+        if "phase" in feature.qualifiers:\n+            phase = feature.qualifiers["phase"][0]\n+        elif feature.type == "CDS":\n+            phase = (\n+                int(feature.qualifiers.get("codon_start", [1])[0]) - 1\n+            )\n+        else:\n+            phase = "."\n+        return str(phase)\n+\n+    def _write_feature(\n+        self, feature, rec_id, out_handle, id_handler, parent_id=None\n+    ):\n+        """Write a feature with location information."""\n+        if feature.location.strand == 1:\n+            strand = "+"\n+        elif feature.location.strand == -1:\n+            strand = "-"\n+        else:\n+            strand = "."\n+        # remove any standard features from the qualifiers\n+        quals = feature.qualifiers.copy()\n+        for std_qual in ["source", "score", "phase"]:\n+            if std_qual in quals and len(quals[std_qual]) == 1:\n+                del quals[std_qual]\n+        # add a link to a parent identifier if it exists\n+        if parent_id:\n+            if "Parent" not in quals:\n+                quals["Parent"] = []\n+            quals["Parent"].append(parent_id)\n+        quals = id_handler.update_quals(\n+            quals, len(feature.sub_features) > 0\n+        )\n+        if feature.type:\n+            ftype = feature.type\n+        else:\n+            ftype = "sequence_feature"\n+        parts = [\n+            str(rec_id),\n+            feature.qualifiers.get("source", ["feature"])[0],\n+            ftype,\n+            str(feature.location.start + 1),  # 1-based indexing\n+            str(feature.location.end),\n+            feature.qualifiers.get("score", ["."])[0],\n+            strand,\n+            self._get_phase(feature),\n+            self._format_keyvals(quals),\n+        ]\n+        out_handle.write("\\t".join(parts) + "\\n")\n+        for sub_feature in feature.sub_features:\n+            id_handler = self._write_feature(\n+                sub_feature,\n+                rec_id,\n+                out_handle,\n+                id_handler,\n+                quals["ID"][0],\n+            )\n+        return id_handler\n+\n+    def _format_keyvals(self, keyvals):\n+        format_kvs = []\n+        for key in sorted(keyvals.keys()):\n+            values = keyvals[key]\n+            key = key.strip()\n+            format_vals = []\n+            if not isinstance(values, list) or isinstance(\n+                values, tuple\n+            ):\n+                values = [values]\n+            for val in values:\n+                val = urllib.parse.quote(str(val).strip(), safe=":/ ")\n+                if (key and val) and val not in format_vals:\n+                    format_vals.append(val)\n+            format_kvs.append("%s=%s" % (key, ",".join(format_vals)))\n+        return ";".join(format_kvs)\n+\n+    def _write_annotations(self, anns, rec_id, size, out_handle):\n+        """Add annotations which refer to an entire sequence."""\n+        format_anns = self._format_keyvals(anns)\n+        if format_anns:\n+            parts = [\n+                rec_id,\n+                "annotation",\n+                "remark",\n+                "1",\n+                str(size if size > 1 else 1),\n+                ".",\n+                ".",\n+                ".",\n+                format_anns,\n+            ]\n+            out_handle.write("\\t".join(parts) + "\\n")\n+\n+    def _write_header(self, out_handle):\n+        """Write out standard header directives."""\n+        out_handle.write("##gff-version 3\\n")\n+\n+    def _write_fasta(self, recs, out_handle):\n+        """Write sequence records using the ##FASTA directive."""\n+        out_handle.write("##FASTA\\n")\n+        SeqIO.write(recs, out_handle, "fasta")\n+\n+\n+def write(recs, out_handle, include_fasta=False):\n+    """High level interface to write GFF3 files from SeqRecords and SeqFeatures.\n+\n+    If include_fasta is True, the GFF3 file will include sequence information\n+    using the ##FASTA directive.\n+    """\n+    writer = GFF3Writer()\n+    return writer.write(recs, out_handle, include_fasta)\n'
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/GFFParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jb2_GFF/GFFParser.py Sun Jan 28 06:48:52 2024 +0000
[
b'@@ -0,0 +1,1099 @@\n+"""Parse GFF files into features attached to Biopython SeqRecord objects.\n+\n+This deals with GFF3 formatted files, a tab delimited format for storing\n+sequence features and annotations:\n+\n+http://www.sequenceontology.org/gff3.shtml\n+\n+It will also deal with older GFF versions (GTF/GFF2):\n+\n+http://www.sanger.ac.uk/Software/formats/GFF/GFF_Spec.shtml\n+http://mblab.wustl.edu/GTF22.html\n+\n+The implementation utilizes map/reduce parsing of GFF using Disco. Disco\n+(http://discoproject.org) is a Map-Reduce framework for Python utilizing\n+Erlang for parallelization. The code works on a single processor without\n+Disco using the same architecture.\n+"""\n+import os\n+import copy\n+import json\n+import re\n+import collections\n+import io\n+import itertools\n+import warnings\n+import six\n+from six.moves import urllib\n+\n+from Bio.SeqRecord import SeqRecord\n+from Bio import SeqFeature\n+from Bio import SeqIO\n+from Bio import BiopythonDeprecationWarning\n+\n+import disco\n+\n+# Make defaultdict compatible with versions of python older than 2.4\n+try:\n+    collections.defaultdict\n+except AttributeError:\n+    import _utils\n+\n+    collections.defaultdict = _utils.defaultdict\n+\n+unknown_seq_avail = False\n+try:\n+    from Bio.Seq import UnknownSeq\n+\n+    unknown_seq_avail = True\n+except ImportError:\n+    # Starting with biopython 1.81, has been removed\n+    from Bio.Seq import _UndefinedSequenceData\n+    from Bio.Seq import Seq\n+\n+\n+warnings.simplefilter("ignore", BiopythonDeprecationWarning)\n+\n+\n+def _gff_line_map(line, params):\n+    """Map part of Map-Reduce; parses a line of GFF into a dictionary.\n+\n+    Given an input line from a GFF file, this:\n+    - decides if the file passes our filtering limits\n+    - if so:\n+        - breaks it into component elements\n+        - determines the type of attribute (flat, parent, child or annotation)\n+        - generates a dictionary of GFF info which can be serialized as JSON\n+    """\n+\n+    def _merge_keyvals(parts):\n+        """Merge key-values escaped by quotes\n+        that are improperly split at semicolons."""\n+        out = []\n+        for i, p in enumerate(parts):\n+            if (\n+                i > 0\n+                and len(p) == 1\n+                and p[0].endswith(\'"\')\n+                and not p[0].startswith(\'"\')\n+            ):\n+                if out[-1][-1].startswith(\'"\'):\n+                    prev_p = out.pop(-1)\n+                    to_merge = prev_p[-1]\n+                    prev_p[-1] = "%s; %s" % (to_merge, p[0])\n+                    out.append(prev_p)\n+            else:\n+                out.append(p)\n+        return out\n+\n+    gff3_kw_pat = re.compile(r"\\w+=")\n+\n+    def _split_keyvals(keyval_str):\n+        """Split key-value pairs in a GFF2, GTF and GFF3 compatible way.\n+\n+        GFF3 has key value pairs like:\n+          count=9;gene=amx-2;sequence=SAGE:aacggagccg\n+        GFF2 and GTF have:\n+          Sequence "Y74C9A" ; Note "Clone Y74C9A; Genbank AC024206"\n+          name "fgenesh1_pg.C_chr_1000003"; transcriptId 869\n+        """\n+        quals = collections.defaultdict(list)\n+        if keyval_str is None:\n+            return quals\n+        # ensembl GTF has a stray semi-colon at the end\n+        if keyval_str[-1] == ";":\n+            keyval_str = keyval_str[:-1]\n+        # GFF2/GTF has a semi-colon with at least one space after it.\n+        # It can have spaces on both sides; wormbase does this.\n+        # GFF3 works with no spaces.\n+        # Split at the first one we can recognize as working\n+        parts = keyval_str.split(" ; ")\n+        if len(parts) == 1:\n+            parts = [x.strip() for x in keyval_str.split(";")]\n+        # check if we have GFF3 style key-vals (with =)\n+        is_gff2 = True\n+        if gff3_kw_pat.match(parts[0]):\n+            is_gff2 = False\n+            key_vals = _merge_keyvals([p.split("=") for p in parts])\n+        # otherwise, we are separated by a space with a key as the first item\n+        else:\n+            pieces = []\n+          '..b' self.jsonify = False\n+\n+        params = _LocalParams()\n+        params.limit_info = limit_info\n+        params.filter_info = self._filter_info\n+        return params\n+\n+    @_file_or_handle\n+    def available_limits(self, gff_handle):\n+        """Return dictionary information on possible limits for this file.\n+\n+        This returns a nested dictionary with the following structure:\n+\n+        keys -- names of items to filter by\n+        values -- dictionary with:\n+            keys -- filter choice\n+            value -- counts of that filter in this file\n+\n+        Not a parallelized map-reduce implementation.\n+        """\n+        cur_limits = dict()\n+        for filter_key in self._filter_info.keys():\n+            cur_limits[filter_key] = collections.defaultdict(int)\n+        for line in gff_handle:\n+            # when we hit FASTA sequences, we are done with annotations\n+            if line.startswith("##FASTA"):\n+                break\n+            # ignore empty and comment lines\n+            if line.strip() and line.strip()[0] != "#":\n+                parts = [p.strip() for p in line.split("\\t")]\n+                assert len(parts) >= 8, line\n+                parts = parts[:9]\n+                for (\n+                    filter_key,\n+                    cur_indexes,\n+                ) in self._filter_info.items():\n+                    cur_id = tuple([parts[i] for i in cur_indexes])\n+                    cur_limits[filter_key][cur_id] += 1\n+        # get rid of the default dicts\n+        final_dict = dict()\n+        for key, value_dict in cur_limits.items():\n+            if len(key) == 1:\n+                key = key[0]\n+            final_dict[key] = dict(value_dict)\n+        gff_handle.close()\n+        return final_dict\n+\n+    @_file_or_handle\n+    def parent_child_map(self, gff_handle):\n+        """Provide a mapping of parent to child relationships in the file.\n+\n+        Returns a dictionary of parent child relationships:\n+\n+        keys -- tuple of (source, type) for each parent\n+        values -- tuple of (source, type) as children of that parent\n+\n+        Not a parallelized map-reduce implementation.\n+        """\n+        # collect all of the parent and child types mapped to IDs\n+        parent_sts = dict()\n+        child_sts = collections.defaultdict(list)\n+        for line in gff_handle:\n+            # when we hit FASTA sequences, we are done with annotations\n+            if line.startswith("##FASTA"):\n+                break\n+            if line.strip() and not line.startswith("#"):\n+                line_type, line_info = _gff_line_map(\n+                    line, self._get_local_params()\n+                )[0]\n+                if line_type == "parent" or (\n+                    line_type == "child" and line_info["id"]\n+                ):\n+                    parent_sts[line_info["id"]] = (\n+                        line_info["quals"].get("source", [""])[0],\n+                        line_info["type"],\n+                    )\n+                if line_type == "child":\n+                    for parent_id in line_info["quals"]["Parent"]:\n+                        child_sts[parent_id].append(\n+                            (\n+                                line_info["quals"].get(\n+                                    "source", [""]\n+                                )[0],\n+                                line_info["type"],\n+                            )\n+                        )\n+        # print parent_sts, child_sts\n+        # generate a dictionary of the unique final type relationships\n+        pc_map = collections.defaultdict(list)\n+        for parent_id, parent_type in parent_sts.items():\n+            for child_type in child_sts[parent_id]:\n+                pc_map[parent_type].append(child_type)\n+        pc_final_map = dict()\n+        for ptype, ctypes in pc_map.items():\n+            unique_ctypes = list(set(ctypes))\n+            unique_ctypes.sort()\n+            pc_final_map[ptype] = unique_ctypes\n+        return pc_final_map\n'
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/__init__.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jb2_GFF/__init__.py Sun Jan 28 06:48:52 2024 +0000
b
@@ -0,0 +1,6 @@
+"""Top level of GFF parsing providing shortcuts for useful classes.
+"""
+from jb2_GFF.GFFParser import GFFParser, DiscoGFFParser, GFFExaminer, parse, parse_simple
+from jb2_GFF.GFFOutput import GFF3Writer, write
+
+__version__ = "0.6.9"
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/__pycache__/GFFOutput.cpython-310.pyc
b
Binary file jb2_GFF/__pycache__/GFFOutput.cpython-310.pyc has changed
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/__pycache__/GFFParser.cpython-310.pyc
b
Binary file jb2_GFF/__pycache__/GFFParser.cpython-310.pyc has changed
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/__pycache__/__init__.cpython-310.pyc
b
Binary file jb2_GFF/__pycache__/__init__.cpython-310.pyc has changed
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/_utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jb2_GFF/_utils.py Sun Jan 28 06:48:52 2024 +0000
[
@@ -0,0 +1,49 @@
+class defaultdict(dict):
+    """Back compatible defaultdict:
+    http://code.activestate.com/recipes/523034/"""
+
+    def __init__(self, default_factory=None, *a, **kw):
+        if default_factory is not None and not hasattr(
+            default_factory, "__call__"
+        ):
+            raise TypeError("first argument must be callable")
+        dict.__init__(self, *a, **kw)
+        self.default_factory = default_factory
+
+    def __getitem__(self, key):
+        try:
+            return dict.__getitem__(self, key)
+        except KeyError:
+            return self.__missing__(key)
+
+    def __missing__(self, key):
+        if self.default_factory is None:
+            raise KeyError(key)
+        self[key] = value = self.default_factory()
+        return value
+
+    def __reduce__(self):
+        if self.default_factory is None:
+            args = tuple()
+        else:
+            args = (self.default_factory,)
+        return type(self), args, None, None, self.items()
+
+    def copy(self):
+        return self.__copy__()
+
+    def __copy__(self):
+        return type(self)(self.default_factory, self)
+
+    def __deepcopy__(self, memo):
+        import copy
+
+        return type(self)(
+            self.default_factory, copy.deepcopy(self.items())
+        )
+
+    def __repr__(self):
+        return "defaultdict(%s, %s)" % (
+            self.default_factory,
+            dict.__repr__(self),
+        )
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_webserver.py
--- a/jb2_webserver.py Thu Jan 25 07:58:28 2024 +0000
+++ b/jb2_webserver.py Sun Jan 28 06:48:52 2024 +0000
b
@@ -169,7 +169,9 @@
         help=f"Port to listen on (default: {DEFAULT_PORT})",
     )
     parser.add_argument(
-        "--bind", default="0.0.0.0", help="IP address to bind to (default: 0.0.0.0)"
+        "--bind",
+        default="0.0.0.0",
+        help="IP address to bind to (default: 0.0.0.0)",
     )
     args = parser.parse_args()
 
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 jbrowse2.py
--- a/jbrowse2.py Thu Jan 25 07:58:28 2024 +0000
+++ b/jbrowse2.py Sun Jan 28 06:48:52 2024 +0000
[
b'@@ -458,6 +458,10 @@\n                 self.genome_name = (\n                     genome_name  # first one for all tracks - other than paf\n                 )\n+                self.genome_firstcontig = None\n+                fl = open(fapath, "r").readline().strip().split(">", 1)\n+                if len(fl) > 1:\n+                    self.genome_firstcontig = fl[1].strip()\n         if self.config_json.get("assemblies", None):\n             self.config_json["assemblies"] += assemblies\n         else:\n@@ -560,7 +564,7 @@\n         # can be served - if public.\n         # dsId = trackData["metadata"]["dataset_id"]\n         # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId)\n-        hname = trackData["label"]\n+        hname = trackData["name"]\n         dest = os.path.join(self.outdir, hname)\n         cmd = ["cp", data, dest]\n         # these can be very big.\n@@ -648,7 +652,10 @@\n                     "type": "LinearBasicDisplay",\n                     "displayId": "%s-LinearBasicDisplay" % tId,\n                 },\n-                {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},\n+                {\n+                    "type": "LinearArcDisplay",\n+                    "displayId": "%s-LinearArcDisplay" % tId,\n+                },\n             ],\n         }\n         style_json = self._prepare_track_style(trackDict)\n@@ -717,7 +724,10 @@\n                     "type": "LinearBasicDisplay",\n                     "displayId": "%s-LinearBasicDisplay" % tId,\n                 },\n-                {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},\n+                {\n+                    "type": "LinearArcDisplay",\n+                    "displayId": "%s-LinearArcDisplay" % tId,\n+                },\n             ],\n         }\n         style_json = self._prepare_track_style(trackDict)\n@@ -906,7 +916,10 @@\n                     "type": "LinearBasicDisplay",\n                     "displayId": "%s-LinearBasicDisplay" % tId,\n                 },\n-                {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},\n+                {\n+                    "type": "LinearArcDisplay",\n+                    "displayId": "%s-LinearArcDisplay" % tId,\n+                },\n             ],\n         }\n         style_json = self._prepare_track_style(trackDict)\n@@ -945,7 +958,10 @@\n                     "type": "LinearPileupDisplay",\n                     "displayId": "%s-LinearPileupDisplay" % tId,\n                 },\n-                {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},\n+                {\n+                    "type": "LinearArcDisplay",\n+                    "displayId": "%s-LinearArcDisplay" % tId,\n+                },\n             ],\n         }\n         style_json = self._prepare_track_style(trackDict)\n@@ -983,14 +999,14 @@\n                 "assemblyNames": [self.genome_name, pgname],\n             },\n             # "displays": [\n-                # {\n-                    # "type": "LinearSyntenyDisplay",\n-                    # "displayId": "%s-LinearSyntenyDisplay" % tId,\n-                # },\n-                # {\n-                    # "type": "DotPlotDisplay",\n-                    # "displayId": "%s-DotPlotDisplay" % tId,\n-                # },\n+            # {\n+            # "type": "LinearSyntenyDisplay",\n+            # "displayId": "%s-LinearSyntenyDisplay" % tId,\n+            # },\n+            # {\n+            # "type": "DotPlotDisplay",\n+            # "displayId": "%s-DotPlotDisplay" % tId,\n+            # },\n             # ],\n         }\n         style_json = self._prepare_track_style(trackDict)\n@@ -1143,13 +1159,17 @@\n                 )\n             elif dataset_ext == "blastxml":\n                 self.add_blastxml(\n-                    dataset_path, outputTrackConfig, track["conf"]["options"]["blast"]\n+                    dataset_path,\n+                    outputTrackConfig,\n+                    track["conf"]["options"]["blast"],\n                 )\n             '..b'ynteny"],\n                 )\n             else:\n                 log.warn("Do not know how to handle %s", dataset_ext)\n@@ -1194,43 +1214,42 @@\n         view_json = {"type": "LinearGenomeView", "tracks": tracks_data}\n \n         refName = None\n+        drdict = {\n+            "reversed": False,\n+            "assemblyName": self.genome_name,\n+            "start": 0,\n+            "end": 100000,\n+        }\n+\n         if data.get("defaultLocation", ""):\n             ddl = data["defaultLocation"]\n-            loc_match = re.search(\n-                r"^([^:]+):(\\d+)\\.+(\\d+)$", ddl\n-            )\n+            loc_match = re.search(r"^([^:]+):(\\d*)\\.*(\\d*)$", ddl)\n             if loc_match:\n                 refName = loc_match.group(1)\n-                start = int(loc_match.group(2))\n-                end = int(loc_match.group(3))\n+                drdict["refName"] = refName\n+                if loc_match.group(2) > "":\n+                    drdict["start"] = int(loc_match.group(2))\n+                if loc_match.group(3) > "":\n+                    drdict["end"] = int(loc_match.group(3))\n             else:\n                 logging.info(\n                     "@@@ regexp could not match contig:start..end in the supplied location %s - please fix"\n                     % ddl\n                 )\n-        elif self.genome_name is not None:\n-            start = 0\n-            end = 10000  # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708\n+        elif self.genome_firstcontig is not None:\n+            drdict["refName"] = self.genome_firstcontig\n             logging.info(\n-                "@@@ no defaultlocation found for default session - please add one"\n+                "@@@ no defaultlocation found for default session - using %s as first contig found"\n+                % self.genome_firstcontig\n             )\n \n-        if refName is not None:\n+        if drdict.get("refName", None):\n             # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome\n             view_json["displayedRegions"] = [\n-                {\n-                    "refName": refName,\n-                    "start": start,\n-                    "end": end,\n-                    "reversed": False,\n-                    "assemblyName": self.genome_name,\n-                }\n+                drdict,\n             ]\n \n-            logging.info(\n-                "@@@ defaultlocation %s for default session"\n-                % view_json["displayedRegions"]\n-            )\n+            logging.info("@@@ defaultlocation %s for default session" % drdict)\n         else:\n             logging.info(\n                 "@@@ no contig name found for default session - please add one!"\n@@ -1307,12 +1326,19 @@\n         ]:\n             cmd = ["rm", "-rf", os.path.join(self.outdir, fn)]\n             self.subprocess_check_call(cmd)\n-        cmd = ["cp", os.path.join(INSTALLED_TO, "jb2_webserver.py"), self.outdir]\n+        cmd = [\n+            "cp",\n+            os.path.join(INSTALLED_TO, "jb2_webserver.py"),\n+            self.outdir,\n+        ]\n         self.subprocess_check_call(cmd)\n \n \n def parse_style_conf(item):\n-    if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]:\n+    if "type" in item.attrib and item.attrib["type"] in [\n+        "boolean",\n+        "integer",\n+    ]:\n         if item.attrib["type"] == "boolean":\n             return item.text in ("yes", "true", "True")\n         elif item.attrib["type"] == "integer":\n@@ -1379,7 +1405,10 @@\n             for x in track.findall("files/trackFile"):\n                 if is_multi_bigwig:\n                     multi_bigwig_paths.append(\n-                        (x.attrib["label"], os.path.realpath(x.attrib["path"]))\n+                        (\n+                            x.attrib["label"],\n+                            os.path.realpath(x.attrib["path"]),\n+                        )\n                     )\n                 else:\n                     if trackfiles:\n'
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 jbrowse2.xml
--- a/jbrowse2.xml Thu Jan 25 07:58:28 2024 +0000
+++ b/jbrowse2.xml Sun Jan 28 06:48:52 2024 +0000
b
@@ -1,4 +1,4 @@
- <tool id="jbrowse2" name="jbrowse2" version="@TOOL_VERSION@+@WRAPPER_VERSION@_1" profile="22.05">
+ <tool id="jbrowse2" name="jbrowse2" version="@TOOL_VERSION@+@WRAPPER_VERSION@.1" profile="22.05">
     <description>genome browser</description>
     <macros>
         <import>macros.xml</import>
@@ -781,38 +781,38 @@
 about how to run the command line tools to format your data, and which
 options need to be supplied and where.
 
-The JBrowse-in-Galaxy tool is maintained by `the Galaxy IUC
-<https://github.com/galaxyproject/tools-iuc/issues>`__, who you can help you
-with missing features or bugs in the tool.
+The JBrowse-in-Galaxy tool has been rejected by `a Galaxy IUC
+<https://github.com/galaxyproject/tools-iuc/issues>`__, reviewer.
+It is maintained by https://github.com/fubar2 who you can help you
+with missing features or bugs in the tool. For the record, he remains unconvinced by the reviewer's logic,
+and disturbed by the distinctly coercive approach to introducing new code,
+compared to the more usual method of providing a working PR.
 
 Options
 -------
 
-The first option you encounter is the **Reference sequence(s)** to use. This option
-now accepts multiple fasta files, allowing you to build JBrowse2
-instances that contain data for multiple genomes or chrosomomes
-(generally known as "landmark features" in gff3 terminology.)
+**Reference or Assembly**
+
+Choose either a built-in or select one from your history.
 
-**Track Groups** represent a set of tracks in a single category. These
-can be used to let your users understand relationships between large
-groups of tracks.
+Track coordinates and contig names *must* match this reference precisely
+or they will not display.
+
+**Track Groups** represent a set of tracks in a single category.
 
 Annotation Tracks
 -----------------
 
-There are a few different types of tracks supported, each with their own
-set of options:
-
 GFF3/BED
 ~~~~~~~~
 
-These are standard feature tracks. They usually highlight genes,
-mRNAs and other features of interest along a genomic region.
+Standard feature tracks. They usually highlight genes, mRNAs and other features of interest along a genomic region.
 
 When these contain tens of millions of features, such as repeat regions from a VGP assembly, displaying one at a time leads
 to extremely slow loading times when a large region is in view, unless the "LinearPileupDisplay" display option is
 selected for that track in the styling options section. The default is LinearBasicDisplay, which shows all details and works
-well for relatively sparse bed files.
+well for relatively sparse bed files. A better option is to make a bigwig track using a set of windows based on the
+lengths of each assembly or reference contig.
 
 BAM Pileups
 ~~~~~~~~~~~
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 macros.xml
--- a/macros.xml Thu Jan 25 07:58:28 2024 +0000
+++ b/macros.xml Sun Jan 28 06:48:52 2024 +0000
[
@@ -24,7 +24,7 @@
         </requirements>
     </xml>
     <token name="@DATA_DIR@">\$GALAXY_JBROWSE_SHARED_DIR</token>
-    <token name="@WRAPPER_VERSION@">galaxy0</token>
+    <token name="@WRAPPER_VERSION@">galaxy2</token>
     <token name="@ATTRIBUTION@"><![CDATA[
 **Attribution**
 This Galaxy tool relies on the JBrowse2, maintained by the GMOD Community. The Galaxy wrapper is developed by the IUC
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 plants.sh
--- a/plants.sh Thu Jan 25 07:58:28 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-planemo shed_update --shed_target toolshed --owner fubar --name jbrowse2 --shed_key 8d01f2f35d48a0405f72d6d37aedde60 ./
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 readme.rst
--- a/readme.rst Thu Jan 25 07:58:28 2024 +0000
+++ b/readme.rst Sun Jan 28 06:48:52 2024 +0000
b
@@ -46,7 +46,8 @@
     - works well enough to be useful in workflows such as TreeValGal.
     - JB2 seems to set defaults wisely.
     - not yet ideal for users who need fine grained track control.
-    - synteny works.
+    - synteny (paf + reference) now working
+    - rehomed at https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 while IUC reviews are slowly sorted out.
 
 
 Wrapper License (MIT/BSD Style)
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/bam.png
b
Binary file static/images/bam.png has changed
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/bigwig.png
b
Binary file static/images/bigwig.png has changed
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/blast.png
b
Binary file static/images/blast.png has changed
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/opacity.png
b
Binary file static/images/opacity.png has changed
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/sections.png
b
Binary file static/images/sections.png has changed
b
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/styling.png
b
Binary file static/images/styling.png has changed