Previous changeset 16:1fe91657bfd6 (2024-01-25) Next changeset 18:2e6c48910819 (2024-01-29) |
Commit message:
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit a37bfdfc108501b11c7b2aa15efb1bd16f0c4b66 |
modified:
blastxml_to_gapped_gff3.py gff3_rebase.py jb2_webserver.py jbrowse2.py jbrowse2.xml macros.xml readme.rst test-data/merlin.fa |
added:
Dockerfile GFFOutput.py jb2_GFF/GFFOutput.py jb2_GFF/GFFParser.py jb2_GFF/__init__.py jb2_GFF/__pycache__/GFFOutput.cpython-310.pyc jb2_GFF/__pycache__/GFFParser.cpython-310.pyc jb2_GFF/__pycache__/__init__.cpython-310.pyc jb2_GFF/_utils.py |
removed:
plants.sh static/images/bam.png static/images/bigwig.png static/images/blast.png static/images/opacity.png static/images/sections.png static/images/styling.png |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 Dockerfile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Dockerfile Sun Jan 28 06:48:52 2024 +0000 |
b |
@@ -0,0 +1,3 @@ +FROM quay.io/bioconda/base-glibc-busybox-bash:3.0 + +RUN adduser -u 1000 user1000 -D && adduser -u 1001 user1001 -D |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 GFFOutput.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GFFOutput.py Sun Jan 28 06:48:52 2024 +0000 |
[ |
@@ -0,0 +1,213 @@ +"""Output Biopython SeqRecords and SeqFeatures to GFF3 format. + +The target format is GFF3, the current GFF standard: + http://www.sequenceontology.org/gff3.shtml +""" +from six.moves import urllib + +from Bio import SeqIO + + +class _IdHandler: + """Generate IDs for GFF3 Parent/Child + relationships where they don't exist.""" + + def __init__(self): + self._prefix = "biopygen" + self._counter = 1 + self._seen_ids = [] + + def _generate_id(self, quals): + """Generate a unique ID not present in our existing IDs.""" + gen_id = self._get_standard_id(quals) + if gen_id is None: + while 1: + gen_id = "%s%s" % (self._prefix, self._counter) + if gen_id not in self._seen_ids: + break + self._counter += 1 + return gen_id + + def _get_standard_id(self, quals): + """Retrieve standardized IDs from other sources like NCBI GenBank. + + This tries to find IDs from known key/values when stored differently + than GFF3 specifications. + """ + possible_keys = ["transcript_id", "protein_id"] + for test_key in possible_keys: + if test_key in quals: + cur_id = quals[test_key] + if isinstance(cur_id, tuple) or isinstance(cur_id, list): + return cur_id[0] + else: + return cur_id + return None + + def update_quals(self, quals, has_children): + """Update a set of qualifiers, adding an ID if necessary.""" + cur_id = quals.get("ID", None) + # if we have an ID, record it + if cur_id: + if not isinstance(cur_id, list) and not isinstance(cur_id, tuple): + cur_id = [cur_id] + for add_id in cur_id: + self._seen_ids.append(add_id) + # if we need one and don't have it, create a new one + elif has_children: + new_id = self._generate_id(quals) + self._seen_ids.append(new_id) + quals["ID"] = [new_id] + return quals + + +class GFF3Writer: + """Write GFF3 files starting with standard Biopython objects.""" + + def __init__(self): + pass + + def write(self, recs, out_handle, include_fasta=False): + """Write the provided records to the given handle in GFF3 format.""" + id_handler = _IdHandler() + self._write_header(out_handle) + fasta_recs = [] + try: + recs = iter(recs) + except TypeError: + recs = [recs] + for rec in recs: + self._write_rec(rec, out_handle) + self._write_annotations(rec.annotations, rec.id, len(rec.seq), out_handle) + for sf in rec.features: + sf = self._clean_feature(sf) + id_handler = self._write_feature(sf, rec.id, out_handle, id_handler) + if include_fasta and len(rec.seq) > 0: + fasta_recs.append(rec) + if len(fasta_recs) > 0: + self._write_fasta(fasta_recs, out_handle) + + def _clean_feature(self, feature): + quals = {} + for key, val in feature.qualifiers.items(): + if not isinstance(val, (list, tuple)): + val = [val] + val = [str(x) for x in val] + quals[key] = val + feature.qualifiers = quals + # Support for Biopython 1.68 and above, which removed sub_features + if not hasattr(feature, "sub_features"): + feature.sub_features = [] + clean_sub = [self._clean_feature(f) for f in feature.sub_features] + feature.sub_features = clean_sub + return feature + + def _write_rec(self, rec, out_handle): + # if we have a SeqRecord, write out optional directive + if len(rec.seq) > 0: + out_handle.write("##sequence-region %s 1 %s\n" % (rec.id, len(rec.seq))) + + def _get_phase(self, feature): + if "phase" in feature.qualifiers: + phase = feature.qualifiers["phase"][0] + elif feature.type == "CDS": + phase = int(feature.qualifiers.get("codon_start", [1])[0]) - 1 + else: + phase = "." + return str(phase) + + def _write_feature(self, feature, rec_id, out_handle, id_handler, parent_id=None): + """Write a feature with location information.""" + if feature.location.strand == 1: + strand = "+" + elif feature.location.strand == -1: + strand = "-" + else: + strand = "." + # remove any standard features from the qualifiers + quals = feature.qualifiers.copy() + for std_qual in ["source", "score", "phase"]: + if std_qual in quals and len(quals[std_qual]) == 1: + del quals[std_qual] + # add a link to a parent identifier if it exists + if parent_id: + if "Parent" not in quals: + quals["Parent"] = [] + quals["Parent"].append(parent_id) + quals = id_handler.update_quals(quals, len(feature.sub_features) > 0) + if feature.type: + ftype = feature.type + else: + ftype = "sequence_feature" + parts = [ + str(rec_id), + feature.qualifiers.get("source", ["feature"])[0], + ftype, + str(feature.location.start + 1), # 1-based indexing + str(feature.location.end), + feature.qualifiers.get("score", ["."])[0], + strand, + self._get_phase(feature), + self._format_keyvals(quals), + ] + out_handle.write("\t".join(parts) + "\n") + for sub_feature in feature.sub_features: + id_handler = self._write_feature( + sub_feature, + rec_id, + out_handle, + id_handler, + quals["ID"][0], + ) + return id_handler + + def _format_keyvals(self, keyvals): + format_kvs = [] + for key in sorted(keyvals.keys()): + values = keyvals[key] + key = key.strip() + format_vals = [] + if not isinstance(values, list) or isinstance(values, tuple): + values = [values] + for val in values: + val = urllib.parse.quote(str(val).strip(), safe=":/ ") + if (key and val) and val not in format_vals: + format_vals.append(val) + format_kvs.append("%s=%s" % (key, ",".join(format_vals))) + return ";".join(format_kvs) + + def _write_annotations(self, anns, rec_id, size, out_handle): + """Add annotations which refer to an entire sequence.""" + format_anns = self._format_keyvals(anns) + if format_anns: + parts = [ + rec_id, + "annotation", + "remark", + "1", + str(size if size > 1 else 1), + ".", + ".", + ".", + format_anns, + ] + out_handle.write("\t".join(parts) + "\n") + + def _write_header(self, out_handle): + """Write out standard header directives.""" + out_handle.write("##gff-version 3\n") + + def _write_fasta(self, recs, out_handle): + """Write sequence records using the ##FASTA directive.""" + out_handle.write("##FASTA\n") + SeqIO.write(recs, out_handle, "fasta") + + +def write(recs, out_handle, include_fasta=False): + """High level interface to write GFF3 files from SeqRecords and SeqFeatures. + + If include_fasta is True, the GFF3 file will include sequence information + using the ##FASTA directive. + """ + writer = GFF3Writer() + return writer.write(recs, out_handle, include_fasta) |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 blastxml_to_gapped_gff3.py --- a/blastxml_to_gapped_gff3.py Thu Jan 25 07:58:28 2024 +0000 +++ b/blastxml_to_gapped_gff3.py Sun Jan 28 06:48:52 2024 +0000 |
[ |
@@ -32,7 +32,7 @@ recid = record.query if " " in recid: - recid = recid[0: recid.index(" ")] + recid = recid[0 : recid.index(" ")] rec = SeqRecord(Seq("ACTG"), id=recid) for idx_hit, hit in enumerate(record.alignments): @@ -72,7 +72,7 @@ qualifiers["blast_" + prop] = getattr(hsp, prop, None) desc = hit.title.split(" >")[0] - qualifiers["description"] = desc[desc.index(" "):] + qualifiers["description"] = desc[desc.index(" ") :] # This required a fair bit of sketching out/match to figure out # the first time. @@ -161,9 +161,9 @@ fm = "" fs = "" for position in re.finditer("-", query): - fq += query[prev: position.start()] - fm += match[prev: position.start()] - fs += subject[prev: position.start()] + fq += query[prev : position.start()] + fm += match[prev : position.start()] + fs += subject[prev : position.start()] prev = position.start() + 1 fq += query[prev:] fm += match[prev:] @@ -290,7 +290,9 @@ help="Trim blast hits to be only as long as the parent feature", ) parser.add_argument( - "--trim_end", action="store_true", help="Cut blast results off at end of gene" + "--trim_end", + action="store_true", + help="Cut blast results off at end of gene", ) parser.add_argument("--include_seq", action="store_true", help="Include sequence") args = parser.parse_args() |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 gff3_rebase.py --- a/gff3_rebase.py Thu Jan 25 07:58:28 2024 +0000 +++ b/gff3_rebase.py Sun Jan 28 06:48:52 2024 +0000 |
b |
@@ -65,7 +65,10 @@ if hasattr(feature, "sub_features"): for x in feature_lambda( - feature.sub_features, test, test_kwargs, subfeatures=subfeatures + feature.sub_features, + test, + test_kwargs, + subfeatures=subfeatures, ): yield x @@ -197,7 +200,9 @@ help="Child GFF3 annotations to rebase against parent", ) parser.add_argument( - "--interpro", action="store_true", help="Interpro specific modifications" + "--interpro", + action="store_true", + help="Interpro specific modifications", ) parser.add_argument( "--protein2dna", |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/GFFOutput.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jb2_GFF/GFFOutput.py Sun Jan 28 06:48:52 2024 +0000 |
[ |
b'@@ -0,0 +1,233 @@\n+"""Output Biopython SeqRecords and SeqFeatures to GFF3 format.\n+\n+The target format is GFF3, the current GFF standard:\n+ http://www.sequenceontology.org/gff3.shtml\n+"""\n+from six.moves import urllib\n+\n+from Bio import SeqIO\n+\n+\n+class _IdHandler:\n+ """Generate IDs for GFF3 Parent/Child\n+ relationships where they don\'t exist."""\n+\n+ def __init__(self):\n+ self._prefix = "biopygen"\n+ self._counter = 1\n+ self._seen_ids = []\n+\n+ def _generate_id(self, quals):\n+ """Generate a unique ID not present in our existing IDs."""\n+ gen_id = self._get_standard_id(quals)\n+ if gen_id is None:\n+ while 1:\n+ gen_id = "%s%s" % (self._prefix, self._counter)\n+ if gen_id not in self._seen_ids:\n+ break\n+ self._counter += 1\n+ return gen_id\n+\n+ def _get_standard_id(self, quals):\n+ """Retrieve standardized IDs from other sources like NCBI GenBank.\n+\n+ This tries to find IDs from known key/values when stored differently\n+ than GFF3 specifications.\n+ """\n+ possible_keys = ["transcript_id", "protein_id"]\n+ for test_key in possible_keys:\n+ if test_key in quals:\n+ cur_id = quals[test_key]\n+ if isinstance(cur_id, tuple) or isinstance(\n+ cur_id, list\n+ ):\n+ return cur_id[0]\n+ else:\n+ return cur_id\n+ return None\n+\n+ def update_quals(self, quals, has_children):\n+ """Update a set of qualifiers, adding an ID if necessary."""\n+ cur_id = quals.get("ID", None)\n+ # if we have an ID, record it\n+ if cur_id:\n+ if not isinstance(cur_id, list) and not isinstance(\n+ cur_id, tuple\n+ ):\n+ cur_id = [cur_id]\n+ for add_id in cur_id:\n+ self._seen_ids.append(add_id)\n+ # if we need one and don\'t have it, create a new one\n+ elif has_children:\n+ new_id = self._generate_id(quals)\n+ self._seen_ids.append(new_id)\n+ quals["ID"] = [new_id]\n+ return quals\n+\n+\n+class GFF3Writer:\n+ """Write GFF3 files starting with standard Biopython objects."""\n+\n+ def __init__(self):\n+ pass\n+\n+ def write(self, recs, out_handle, include_fasta=False):\n+ """Write the provided records to the given handle in GFF3 format."""\n+ id_handler = _IdHandler()\n+ self._write_header(out_handle)\n+ fasta_recs = []\n+ try:\n+ recs = iter(recs)\n+ except TypeError:\n+ recs = [recs]\n+ for rec in recs:\n+ self._write_rec(rec, out_handle)\n+ self._write_annotations(\n+ rec.annotations, rec.id, len(rec.seq), out_handle\n+ )\n+ for sf in rec.features:\n+ sf = self._clean_feature(sf)\n+ id_handler = self._write_feature(\n+ sf, rec.id, out_handle, id_handler\n+ )\n+ if include_fasta and len(rec.seq) > 0:\n+ fasta_recs.append(rec)\n+ if len(fasta_recs) > 0:\n+ self._write_fasta(fasta_recs, out_handle)\n+\n+ def _clean_feature(self, feature):\n+ quals = {}\n+ for key, val in feature.qualifiers.items():\n+ if not isinstance(val, (list, tuple)):\n+ val = [val]\n+ val = [str(x) for x in val]\n+ quals[key] = val\n+ feature.qualifiers = quals\n+ # Support for Biopython 1.68 and above, which removed sub_features\n+ if not hasattr(feature, "sub_features"):\n+ feature.sub_features = []\n+ clean_sub = [\n+ self._clean_feature(f) for f in feature.sub_features\n+ ]\n+ feature.sub_features = clean_sub\n+ return feature\n+\n+ def _write_rec(self, rec, out_handle):\n+ # if we have a SeqRecord, write ou'..b' feature):\n+ if "phase" in feature.qualifiers:\n+ phase = feature.qualifiers["phase"][0]\n+ elif feature.type == "CDS":\n+ phase = (\n+ int(feature.qualifiers.get("codon_start", [1])[0]) - 1\n+ )\n+ else:\n+ phase = "."\n+ return str(phase)\n+\n+ def _write_feature(\n+ self, feature, rec_id, out_handle, id_handler, parent_id=None\n+ ):\n+ """Write a feature with location information."""\n+ if feature.location.strand == 1:\n+ strand = "+"\n+ elif feature.location.strand == -1:\n+ strand = "-"\n+ else:\n+ strand = "."\n+ # remove any standard features from the qualifiers\n+ quals = feature.qualifiers.copy()\n+ for std_qual in ["source", "score", "phase"]:\n+ if std_qual in quals and len(quals[std_qual]) == 1:\n+ del quals[std_qual]\n+ # add a link to a parent identifier if it exists\n+ if parent_id:\n+ if "Parent" not in quals:\n+ quals["Parent"] = []\n+ quals["Parent"].append(parent_id)\n+ quals = id_handler.update_quals(\n+ quals, len(feature.sub_features) > 0\n+ )\n+ if feature.type:\n+ ftype = feature.type\n+ else:\n+ ftype = "sequence_feature"\n+ parts = [\n+ str(rec_id),\n+ feature.qualifiers.get("source", ["feature"])[0],\n+ ftype,\n+ str(feature.location.start + 1), # 1-based indexing\n+ str(feature.location.end),\n+ feature.qualifiers.get("score", ["."])[0],\n+ strand,\n+ self._get_phase(feature),\n+ self._format_keyvals(quals),\n+ ]\n+ out_handle.write("\\t".join(parts) + "\\n")\n+ for sub_feature in feature.sub_features:\n+ id_handler = self._write_feature(\n+ sub_feature,\n+ rec_id,\n+ out_handle,\n+ id_handler,\n+ quals["ID"][0],\n+ )\n+ return id_handler\n+\n+ def _format_keyvals(self, keyvals):\n+ format_kvs = []\n+ for key in sorted(keyvals.keys()):\n+ values = keyvals[key]\n+ key = key.strip()\n+ format_vals = []\n+ if not isinstance(values, list) or isinstance(\n+ values, tuple\n+ ):\n+ values = [values]\n+ for val in values:\n+ val = urllib.parse.quote(str(val).strip(), safe=":/ ")\n+ if (key and val) and val not in format_vals:\n+ format_vals.append(val)\n+ format_kvs.append("%s=%s" % (key, ",".join(format_vals)))\n+ return ";".join(format_kvs)\n+\n+ def _write_annotations(self, anns, rec_id, size, out_handle):\n+ """Add annotations which refer to an entire sequence."""\n+ format_anns = self._format_keyvals(anns)\n+ if format_anns:\n+ parts = [\n+ rec_id,\n+ "annotation",\n+ "remark",\n+ "1",\n+ str(size if size > 1 else 1),\n+ ".",\n+ ".",\n+ ".",\n+ format_anns,\n+ ]\n+ out_handle.write("\\t".join(parts) + "\\n")\n+\n+ def _write_header(self, out_handle):\n+ """Write out standard header directives."""\n+ out_handle.write("##gff-version 3\\n")\n+\n+ def _write_fasta(self, recs, out_handle):\n+ """Write sequence records using the ##FASTA directive."""\n+ out_handle.write("##FASTA\\n")\n+ SeqIO.write(recs, out_handle, "fasta")\n+\n+\n+def write(recs, out_handle, include_fasta=False):\n+ """High level interface to write GFF3 files from SeqRecords and SeqFeatures.\n+\n+ If include_fasta is True, the GFF3 file will include sequence information\n+ using the ##FASTA directive.\n+ """\n+ writer = GFF3Writer()\n+ return writer.write(recs, out_handle, include_fasta)\n' |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/GFFParser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jb2_GFF/GFFParser.py Sun Jan 28 06:48:52 2024 +0000 |
[ |
b'@@ -0,0 +1,1099 @@\n+"""Parse GFF files into features attached to Biopython SeqRecord objects.\n+\n+This deals with GFF3 formatted files, a tab delimited format for storing\n+sequence features and annotations:\n+\n+http://www.sequenceontology.org/gff3.shtml\n+\n+It will also deal with older GFF versions (GTF/GFF2):\n+\n+http://www.sanger.ac.uk/Software/formats/GFF/GFF_Spec.shtml\n+http://mblab.wustl.edu/GTF22.html\n+\n+The implementation utilizes map/reduce parsing of GFF using Disco. Disco\n+(http://discoproject.org) is a Map-Reduce framework for Python utilizing\n+Erlang for parallelization. The code works on a single processor without\n+Disco using the same architecture.\n+"""\n+import os\n+import copy\n+import json\n+import re\n+import collections\n+import io\n+import itertools\n+import warnings\n+import six\n+from six.moves import urllib\n+\n+from Bio.SeqRecord import SeqRecord\n+from Bio import SeqFeature\n+from Bio import SeqIO\n+from Bio import BiopythonDeprecationWarning\n+\n+import disco\n+\n+# Make defaultdict compatible with versions of python older than 2.4\n+try:\n+ collections.defaultdict\n+except AttributeError:\n+ import _utils\n+\n+ collections.defaultdict = _utils.defaultdict\n+\n+unknown_seq_avail = False\n+try:\n+ from Bio.Seq import UnknownSeq\n+\n+ unknown_seq_avail = True\n+except ImportError:\n+ # Starting with biopython 1.81, has been removed\n+ from Bio.Seq import _UndefinedSequenceData\n+ from Bio.Seq import Seq\n+\n+\n+warnings.simplefilter("ignore", BiopythonDeprecationWarning)\n+\n+\n+def _gff_line_map(line, params):\n+ """Map part of Map-Reduce; parses a line of GFF into a dictionary.\n+\n+ Given an input line from a GFF file, this:\n+ - decides if the file passes our filtering limits\n+ - if so:\n+ - breaks it into component elements\n+ - determines the type of attribute (flat, parent, child or annotation)\n+ - generates a dictionary of GFF info which can be serialized as JSON\n+ """\n+\n+ def _merge_keyvals(parts):\n+ """Merge key-values escaped by quotes\n+ that are improperly split at semicolons."""\n+ out = []\n+ for i, p in enumerate(parts):\n+ if (\n+ i > 0\n+ and len(p) == 1\n+ and p[0].endswith(\'"\')\n+ and not p[0].startswith(\'"\')\n+ ):\n+ if out[-1][-1].startswith(\'"\'):\n+ prev_p = out.pop(-1)\n+ to_merge = prev_p[-1]\n+ prev_p[-1] = "%s; %s" % (to_merge, p[0])\n+ out.append(prev_p)\n+ else:\n+ out.append(p)\n+ return out\n+\n+ gff3_kw_pat = re.compile(r"\\w+=")\n+\n+ def _split_keyvals(keyval_str):\n+ """Split key-value pairs in a GFF2, GTF and GFF3 compatible way.\n+\n+ GFF3 has key value pairs like:\n+ count=9;gene=amx-2;sequence=SAGE:aacggagccg\n+ GFF2 and GTF have:\n+ Sequence "Y74C9A" ; Note "Clone Y74C9A; Genbank AC024206"\n+ name "fgenesh1_pg.C_chr_1000003"; transcriptId 869\n+ """\n+ quals = collections.defaultdict(list)\n+ if keyval_str is None:\n+ return quals\n+ # ensembl GTF has a stray semi-colon at the end\n+ if keyval_str[-1] == ";":\n+ keyval_str = keyval_str[:-1]\n+ # GFF2/GTF has a semi-colon with at least one space after it.\n+ # It can have spaces on both sides; wormbase does this.\n+ # GFF3 works with no spaces.\n+ # Split at the first one we can recognize as working\n+ parts = keyval_str.split(" ; ")\n+ if len(parts) == 1:\n+ parts = [x.strip() for x in keyval_str.split(";")]\n+ # check if we have GFF3 style key-vals (with =)\n+ is_gff2 = True\n+ if gff3_kw_pat.match(parts[0]):\n+ is_gff2 = False\n+ key_vals = _merge_keyvals([p.split("=") for p in parts])\n+ # otherwise, we are separated by a space with a key as the first item\n+ else:\n+ pieces = []\n+ '..b' self.jsonify = False\n+\n+ params = _LocalParams()\n+ params.limit_info = limit_info\n+ params.filter_info = self._filter_info\n+ return params\n+\n+ @_file_or_handle\n+ def available_limits(self, gff_handle):\n+ """Return dictionary information on possible limits for this file.\n+\n+ This returns a nested dictionary with the following structure:\n+\n+ keys -- names of items to filter by\n+ values -- dictionary with:\n+ keys -- filter choice\n+ value -- counts of that filter in this file\n+\n+ Not a parallelized map-reduce implementation.\n+ """\n+ cur_limits = dict()\n+ for filter_key in self._filter_info.keys():\n+ cur_limits[filter_key] = collections.defaultdict(int)\n+ for line in gff_handle:\n+ # when we hit FASTA sequences, we are done with annotations\n+ if line.startswith("##FASTA"):\n+ break\n+ # ignore empty and comment lines\n+ if line.strip() and line.strip()[0] != "#":\n+ parts = [p.strip() for p in line.split("\\t")]\n+ assert len(parts) >= 8, line\n+ parts = parts[:9]\n+ for (\n+ filter_key,\n+ cur_indexes,\n+ ) in self._filter_info.items():\n+ cur_id = tuple([parts[i] for i in cur_indexes])\n+ cur_limits[filter_key][cur_id] += 1\n+ # get rid of the default dicts\n+ final_dict = dict()\n+ for key, value_dict in cur_limits.items():\n+ if len(key) == 1:\n+ key = key[0]\n+ final_dict[key] = dict(value_dict)\n+ gff_handle.close()\n+ return final_dict\n+\n+ @_file_or_handle\n+ def parent_child_map(self, gff_handle):\n+ """Provide a mapping of parent to child relationships in the file.\n+\n+ Returns a dictionary of parent child relationships:\n+\n+ keys -- tuple of (source, type) for each parent\n+ values -- tuple of (source, type) as children of that parent\n+\n+ Not a parallelized map-reduce implementation.\n+ """\n+ # collect all of the parent and child types mapped to IDs\n+ parent_sts = dict()\n+ child_sts = collections.defaultdict(list)\n+ for line in gff_handle:\n+ # when we hit FASTA sequences, we are done with annotations\n+ if line.startswith("##FASTA"):\n+ break\n+ if line.strip() and not line.startswith("#"):\n+ line_type, line_info = _gff_line_map(\n+ line, self._get_local_params()\n+ )[0]\n+ if line_type == "parent" or (\n+ line_type == "child" and line_info["id"]\n+ ):\n+ parent_sts[line_info["id"]] = (\n+ line_info["quals"].get("source", [""])[0],\n+ line_info["type"],\n+ )\n+ if line_type == "child":\n+ for parent_id in line_info["quals"]["Parent"]:\n+ child_sts[parent_id].append(\n+ (\n+ line_info["quals"].get(\n+ "source", [""]\n+ )[0],\n+ line_info["type"],\n+ )\n+ )\n+ # print parent_sts, child_sts\n+ # generate a dictionary of the unique final type relationships\n+ pc_map = collections.defaultdict(list)\n+ for parent_id, parent_type in parent_sts.items():\n+ for child_type in child_sts[parent_id]:\n+ pc_map[parent_type].append(child_type)\n+ pc_final_map = dict()\n+ for ptype, ctypes in pc_map.items():\n+ unique_ctypes = list(set(ctypes))\n+ unique_ctypes.sort()\n+ pc_final_map[ptype] = unique_ctypes\n+ return pc_final_map\n' |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jb2_GFF/__init__.py Sun Jan 28 06:48:52 2024 +0000 |
b |
@@ -0,0 +1,6 @@ +"""Top level of GFF parsing providing shortcuts for useful classes. +""" +from jb2_GFF.GFFParser import GFFParser, DiscoGFFParser, GFFExaminer, parse, parse_simple +from jb2_GFF.GFFOutput import GFF3Writer, write + +__version__ = "0.6.9" |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/__pycache__/GFFOutput.cpython-310.pyc |
b |
Binary file jb2_GFF/__pycache__/GFFOutput.cpython-310.pyc has changed |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/__pycache__/GFFParser.cpython-310.pyc |
b |
Binary file jb2_GFF/__pycache__/GFFParser.cpython-310.pyc has changed |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/__pycache__/__init__.cpython-310.pyc |
b |
Binary file jb2_GFF/__pycache__/__init__.cpython-310.pyc has changed |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_GFF/_utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jb2_GFF/_utils.py Sun Jan 28 06:48:52 2024 +0000 |
[ |
@@ -0,0 +1,49 @@ +class defaultdict(dict): + """Back compatible defaultdict: + http://code.activestate.com/recipes/523034/""" + + def __init__(self, default_factory=None, *a, **kw): + if default_factory is not None and not hasattr( + default_factory, "__call__" + ): + raise TypeError("first argument must be callable") + dict.__init__(self, *a, **kw) + self.default_factory = default_factory + + def __getitem__(self, key): + try: + return dict.__getitem__(self, key) + except KeyError: + return self.__missing__(key) + + def __missing__(self, key): + if self.default_factory is None: + raise KeyError(key) + self[key] = value = self.default_factory() + return value + + def __reduce__(self): + if self.default_factory is None: + args = tuple() + else: + args = (self.default_factory,) + return type(self), args, None, None, self.items() + + def copy(self): + return self.__copy__() + + def __copy__(self): + return type(self)(self.default_factory, self) + + def __deepcopy__(self, memo): + import copy + + return type(self)( + self.default_factory, copy.deepcopy(self.items()) + ) + + def __repr__(self): + return "defaultdict(%s, %s)" % ( + self.default_factory, + dict.__repr__(self), + ) |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 jb2_webserver.py --- a/jb2_webserver.py Thu Jan 25 07:58:28 2024 +0000 +++ b/jb2_webserver.py Sun Jan 28 06:48:52 2024 +0000 |
b |
@@ -169,7 +169,9 @@ help=f"Port to listen on (default: {DEFAULT_PORT})", ) parser.add_argument( - "--bind", default="0.0.0.0", help="IP address to bind to (default: 0.0.0.0)" + "--bind", + default="0.0.0.0", + help="IP address to bind to (default: 0.0.0.0)", ) args = parser.parse_args() |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 jbrowse2.py --- a/jbrowse2.py Thu Jan 25 07:58:28 2024 +0000 +++ b/jbrowse2.py Sun Jan 28 06:48:52 2024 +0000 |
[ |
b'@@ -458,6 +458,10 @@\n self.genome_name = (\n genome_name # first one for all tracks - other than paf\n )\n+ self.genome_firstcontig = None\n+ fl = open(fapath, "r").readline().strip().split(">", 1)\n+ if len(fl) > 1:\n+ self.genome_firstcontig = fl[1].strip()\n if self.config_json.get("assemblies", None):\n self.config_json["assemblies"] += assemblies\n else:\n@@ -560,7 +564,7 @@\n # can be served - if public.\n # dsId = trackData["metadata"]["dataset_id"]\n # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId)\n- hname = trackData["label"]\n+ hname = trackData["name"]\n dest = os.path.join(self.outdir, hname)\n cmd = ["cp", data, dest]\n # these can be very big.\n@@ -648,7 +652,10 @@\n "type": "LinearBasicDisplay",\n "displayId": "%s-LinearBasicDisplay" % tId,\n },\n- {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},\n+ {\n+ "type": "LinearArcDisplay",\n+ "displayId": "%s-LinearArcDisplay" % tId,\n+ },\n ],\n }\n style_json = self._prepare_track_style(trackDict)\n@@ -717,7 +724,10 @@\n "type": "LinearBasicDisplay",\n "displayId": "%s-LinearBasicDisplay" % tId,\n },\n- {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},\n+ {\n+ "type": "LinearArcDisplay",\n+ "displayId": "%s-LinearArcDisplay" % tId,\n+ },\n ],\n }\n style_json = self._prepare_track_style(trackDict)\n@@ -906,7 +916,10 @@\n "type": "LinearBasicDisplay",\n "displayId": "%s-LinearBasicDisplay" % tId,\n },\n- {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},\n+ {\n+ "type": "LinearArcDisplay",\n+ "displayId": "%s-LinearArcDisplay" % tId,\n+ },\n ],\n }\n style_json = self._prepare_track_style(trackDict)\n@@ -945,7 +958,10 @@\n "type": "LinearPileupDisplay",\n "displayId": "%s-LinearPileupDisplay" % tId,\n },\n- {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},\n+ {\n+ "type": "LinearArcDisplay",\n+ "displayId": "%s-LinearArcDisplay" % tId,\n+ },\n ],\n }\n style_json = self._prepare_track_style(trackDict)\n@@ -983,14 +999,14 @@\n "assemblyNames": [self.genome_name, pgname],\n },\n # "displays": [\n- # {\n- # "type": "LinearSyntenyDisplay",\n- # "displayId": "%s-LinearSyntenyDisplay" % tId,\n- # },\n- # {\n- # "type": "DotPlotDisplay",\n- # "displayId": "%s-DotPlotDisplay" % tId,\n- # },\n+ # {\n+ # "type": "LinearSyntenyDisplay",\n+ # "displayId": "%s-LinearSyntenyDisplay" % tId,\n+ # },\n+ # {\n+ # "type": "DotPlotDisplay",\n+ # "displayId": "%s-DotPlotDisplay" % tId,\n+ # },\n # ],\n }\n style_json = self._prepare_track_style(trackDict)\n@@ -1143,13 +1159,17 @@\n )\n elif dataset_ext == "blastxml":\n self.add_blastxml(\n- dataset_path, outputTrackConfig, track["conf"]["options"]["blast"]\n+ dataset_path,\n+ outputTrackConfig,\n+ track["conf"]["options"]["blast"],\n )\n '..b'ynteny"],\n )\n else:\n log.warn("Do not know how to handle %s", dataset_ext)\n@@ -1194,43 +1214,42 @@\n view_json = {"type": "LinearGenomeView", "tracks": tracks_data}\n \n refName = None\n+ drdict = {\n+ "reversed": False,\n+ "assemblyName": self.genome_name,\n+ "start": 0,\n+ "end": 100000,\n+ }\n+\n if data.get("defaultLocation", ""):\n ddl = data["defaultLocation"]\n- loc_match = re.search(\n- r"^([^:]+):(\\d+)\\.+(\\d+)$", ddl\n- )\n+ loc_match = re.search(r"^([^:]+):(\\d*)\\.*(\\d*)$", ddl)\n if loc_match:\n refName = loc_match.group(1)\n- start = int(loc_match.group(2))\n- end = int(loc_match.group(3))\n+ drdict["refName"] = refName\n+ if loc_match.group(2) > "":\n+ drdict["start"] = int(loc_match.group(2))\n+ if loc_match.group(3) > "":\n+ drdict["end"] = int(loc_match.group(3))\n else:\n logging.info(\n "@@@ regexp could not match contig:start..end in the supplied location %s - please fix"\n % ddl\n )\n- elif self.genome_name is not None:\n- start = 0\n- end = 10000 # Booh, hard coded! waiting for https://github.com/GMOD/jbrowse-components/issues/2708\n+ elif self.genome_firstcontig is not None:\n+ drdict["refName"] = self.genome_firstcontig\n logging.info(\n- "@@@ no defaultlocation found for default session - please add one"\n+ "@@@ no defaultlocation found for default session - using %s as first contig found"\n+ % self.genome_firstcontig\n )\n \n- if refName is not None:\n+ if drdict.get("refName", None):\n # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome\n view_json["displayedRegions"] = [\n- {\n- "refName": refName,\n- "start": start,\n- "end": end,\n- "reversed": False,\n- "assemblyName": self.genome_name,\n- }\n+ drdict,\n ]\n \n- logging.info(\n- "@@@ defaultlocation %s for default session"\n- % view_json["displayedRegions"]\n- )\n+ logging.info("@@@ defaultlocation %s for default session" % drdict)\n else:\n logging.info(\n "@@@ no contig name found for default session - please add one!"\n@@ -1307,12 +1326,19 @@\n ]:\n cmd = ["rm", "-rf", os.path.join(self.outdir, fn)]\n self.subprocess_check_call(cmd)\n- cmd = ["cp", os.path.join(INSTALLED_TO, "jb2_webserver.py"), self.outdir]\n+ cmd = [\n+ "cp",\n+ os.path.join(INSTALLED_TO, "jb2_webserver.py"),\n+ self.outdir,\n+ ]\n self.subprocess_check_call(cmd)\n \n \n def parse_style_conf(item):\n- if "type" in item.attrib and item.attrib["type"] in ["boolean", "integer"]:\n+ if "type" in item.attrib and item.attrib["type"] in [\n+ "boolean",\n+ "integer",\n+ ]:\n if item.attrib["type"] == "boolean":\n return item.text in ("yes", "true", "True")\n elif item.attrib["type"] == "integer":\n@@ -1379,7 +1405,10 @@\n for x in track.findall("files/trackFile"):\n if is_multi_bigwig:\n multi_bigwig_paths.append(\n- (x.attrib["label"], os.path.realpath(x.attrib["path"]))\n+ (\n+ x.attrib["label"],\n+ os.path.realpath(x.attrib["path"]),\n+ )\n )\n else:\n if trackfiles:\n' |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 jbrowse2.xml --- a/jbrowse2.xml Thu Jan 25 07:58:28 2024 +0000 +++ b/jbrowse2.xml Sun Jan 28 06:48:52 2024 +0000 |
b |
@@ -1,4 +1,4 @@ - <tool id="jbrowse2" name="jbrowse2" version="@TOOL_VERSION@+@WRAPPER_VERSION@_1" profile="22.05"> + <tool id="jbrowse2" name="jbrowse2" version="@TOOL_VERSION@+@WRAPPER_VERSION@.1" profile="22.05"> <description>genome browser</description> <macros> <import>macros.xml</import> @@ -781,38 +781,38 @@ about how to run the command line tools to format your data, and which options need to be supplied and where. -The JBrowse-in-Galaxy tool is maintained by `the Galaxy IUC -<https://github.com/galaxyproject/tools-iuc/issues>`__, who you can help you -with missing features or bugs in the tool. +The JBrowse-in-Galaxy tool has been rejected by `a Galaxy IUC +<https://github.com/galaxyproject/tools-iuc/issues>`__, reviewer. +It is maintained by https://github.com/fubar2 who you can help you +with missing features or bugs in the tool. For the record, he remains unconvinced by the reviewer's logic, +and disturbed by the distinctly coercive approach to introducing new code, +compared to the more usual method of providing a working PR. Options ------- -The first option you encounter is the **Reference sequence(s)** to use. This option -now accepts multiple fasta files, allowing you to build JBrowse2 -instances that contain data for multiple genomes or chrosomomes -(generally known as "landmark features" in gff3 terminology.) +**Reference or Assembly** + +Choose either a built-in or select one from your history. -**Track Groups** represent a set of tracks in a single category. These -can be used to let your users understand relationships between large -groups of tracks. +Track coordinates and contig names *must* match this reference precisely +or they will not display. + +**Track Groups** represent a set of tracks in a single category. Annotation Tracks ----------------- -There are a few different types of tracks supported, each with their own -set of options: - GFF3/BED ~~~~~~~~ -These are standard feature tracks. They usually highlight genes, -mRNAs and other features of interest along a genomic region. +Standard feature tracks. They usually highlight genes, mRNAs and other features of interest along a genomic region. When these contain tens of millions of features, such as repeat regions from a VGP assembly, displaying one at a time leads to extremely slow loading times when a large region is in view, unless the "LinearPileupDisplay" display option is selected for that track in the styling options section. The default is LinearBasicDisplay, which shows all details and works -well for relatively sparse bed files. +well for relatively sparse bed files. A better option is to make a bigwig track using a set of windows based on the +lengths of each assembly or reference contig. BAM Pileups ~~~~~~~~~~~ |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 macros.xml --- a/macros.xml Thu Jan 25 07:58:28 2024 +0000 +++ b/macros.xml Sun Jan 28 06:48:52 2024 +0000 |
[ |
@@ -24,7 +24,7 @@ </requirements> </xml> <token name="@DATA_DIR@">\$GALAXY_JBROWSE_SHARED_DIR</token> - <token name="@WRAPPER_VERSION@">galaxy0</token> + <token name="@WRAPPER_VERSION@">galaxy2</token> <token name="@ATTRIBUTION@"><![CDATA[ **Attribution** This Galaxy tool relies on the JBrowse2, maintained by the GMOD Community. The Galaxy wrapper is developed by the IUC |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 plants.sh --- a/plants.sh Thu Jan 25 07:58:28 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,1 +0,0 @@ -planemo shed_update --shed_target toolshed --owner fubar --name jbrowse2 --shed_key 8d01f2f35d48a0405f72d6d37aedde60 ./ |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 readme.rst --- a/readme.rst Thu Jan 25 07:58:28 2024 +0000 +++ b/readme.rst Sun Jan 28 06:48:52 2024 +0000 |
b |
@@ -46,7 +46,8 @@ - works well enough to be useful in workflows such as TreeValGal. - JB2 seems to set defaults wisely. - not yet ideal for users who need fine grained track control. - - synteny works. + - synteny (paf + reference) now working + - rehomed at https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 while IUC reviews are slowly sorted out. Wrapper License (MIT/BSD Style) |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/bam.png |
b |
Binary file static/images/bam.png has changed |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/bigwig.png |
b |
Binary file static/images/bigwig.png has changed |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/blast.png |
b |
Binary file static/images/blast.png has changed |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/opacity.png |
b |
Binary file static/images/opacity.png has changed |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/sections.png |
b |
Binary file static/images/sections.png has changed |
b |
diff -r 1fe91657bfd6 -r 4c201a3d4755 static/images/styling.png |
b |
Binary file static/images/styling.png has changed |