Previous changeset 18:2e6c48910819 (2024-01-29) Next changeset 20:9c7aa5885721 (2024-01-30) |
Commit message:
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 1290bf486bc55c02fecd0327de10a28655a18e81-dirty |
modified:
gff3_rebase.py jbrowse2.py jbrowse2.xml macros.xml |
added:
autogenJB2.py autogenJB2.xml |
removed:
GFFOutput.py jb2_GFF/GFFOutput.py jb2_GFF/GFFParser.py jb2_GFF/__init__.py jb2_GFF/__pycache__/GFFOutput.cpython-310.pyc jb2_GFF/__pycache__/GFFParser.cpython-310.pyc jb2_GFF/__pycache__/__init__.cpython-310.pyc jb2_GFF/_utils.py |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d GFFOutput.py --- a/GFFOutput.py Mon Jan 29 02:34:43 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,213 +0,0 @@ -"""Output Biopython SeqRecords and SeqFeatures to GFF3 format. - -The target format is GFF3, the current GFF standard: - http://www.sequenceontology.org/gff3.shtml -""" -from six.moves import urllib - -from Bio import SeqIO - - -class _IdHandler: - """Generate IDs for GFF3 Parent/Child - relationships where they don't exist.""" - - def __init__(self): - self._prefix = "biopygen" - self._counter = 1 - self._seen_ids = [] - - def _generate_id(self, quals): - """Generate a unique ID not present in our existing IDs.""" - gen_id = self._get_standard_id(quals) - if gen_id is None: - while 1: - gen_id = "%s%s" % (self._prefix, self._counter) - if gen_id not in self._seen_ids: - break - self._counter += 1 - return gen_id - - def _get_standard_id(self, quals): - """Retrieve standardized IDs from other sources like NCBI GenBank. - - This tries to find IDs from known key/values when stored differently - than GFF3 specifications. - """ - possible_keys = ["transcript_id", "protein_id"] - for test_key in possible_keys: - if test_key in quals: - cur_id = quals[test_key] - if isinstance(cur_id, tuple) or isinstance(cur_id, list): - return cur_id[0] - else: - return cur_id - return None - - def update_quals(self, quals, has_children): - """Update a set of qualifiers, adding an ID if necessary.""" - cur_id = quals.get("ID", None) - # if we have an ID, record it - if cur_id: - if not isinstance(cur_id, list) and not isinstance(cur_id, tuple): - cur_id = [cur_id] - for add_id in cur_id: - self._seen_ids.append(add_id) - # if we need one and don't have it, create a new one - elif has_children: - new_id = self._generate_id(quals) - self._seen_ids.append(new_id) - quals["ID"] = [new_id] - return quals - - -class GFF3Writer: - """Write GFF3 files starting with standard Biopython objects.""" - - def __init__(self): - pass - - def write(self, recs, out_handle, include_fasta=False): - """Write the provided records to the given handle in GFF3 format.""" - id_handler = _IdHandler() - self._write_header(out_handle) - fasta_recs = [] - try: - recs = iter(recs) - except TypeError: - recs = [recs] - for rec in recs: - self._write_rec(rec, out_handle) - self._write_annotations(rec.annotations, rec.id, len(rec.seq), out_handle) - for sf in rec.features: - sf = self._clean_feature(sf) - id_handler = self._write_feature(sf, rec.id, out_handle, id_handler) - if include_fasta and len(rec.seq) > 0: - fasta_recs.append(rec) - if len(fasta_recs) > 0: - self._write_fasta(fasta_recs, out_handle) - - def _clean_feature(self, feature): - quals = {} - for key, val in feature.qualifiers.items(): - if not isinstance(val, (list, tuple)): - val = [val] - val = [str(x) for x in val] - quals[key] = val - feature.qualifiers = quals - # Support for Biopython 1.68 and above, which removed sub_features - if not hasattr(feature, "sub_features"): - feature.sub_features = [] - clean_sub = [self._clean_feature(f) for f in feature.sub_features] - feature.sub_features = clean_sub - return feature - - def _write_rec(self, rec, out_handle): - # if we have a SeqRecord, write out optional directive - if len(rec.seq) > 0: - out_handle.write("##sequence-region %s 1 %s\n" % (rec.id, len(rec.seq))) - - def _get_phase(self, feature): - if "phase" in feature.qualifiers: - phase = feature.qualifiers["phase"][0] - elif feature.type == "CDS": - phase = int(feature.qualifiers.get("codon_start", [1])[0]) - 1 - else: - phase = "." - return str(phase) - - def _write_feature(self, feature, rec_id, out_handle, id_handler, parent_id=None): - """Write a feature with location information.""" - if feature.location.strand == 1: - strand = "+" - elif feature.location.strand == -1: - strand = "-" - else: - strand = "." - # remove any standard features from the qualifiers - quals = feature.qualifiers.copy() - for std_qual in ["source", "score", "phase"]: - if std_qual in quals and len(quals[std_qual]) == 1: - del quals[std_qual] - # add a link to a parent identifier if it exists - if parent_id: - if "Parent" not in quals: - quals["Parent"] = [] - quals["Parent"].append(parent_id) - quals = id_handler.update_quals(quals, len(feature.sub_features) > 0) - if feature.type: - ftype = feature.type - else: - ftype = "sequence_feature" - parts = [ - str(rec_id), - feature.qualifiers.get("source", ["feature"])[0], - ftype, - str(feature.location.start + 1), # 1-based indexing - str(feature.location.end), - feature.qualifiers.get("score", ["."])[0], - strand, - self._get_phase(feature), - self._format_keyvals(quals), - ] - out_handle.write("\t".join(parts) + "\n") - for sub_feature in feature.sub_features: - id_handler = self._write_feature( - sub_feature, - rec_id, - out_handle, - id_handler, - quals["ID"][0], - ) - return id_handler - - def _format_keyvals(self, keyvals): - format_kvs = [] - for key in sorted(keyvals.keys()): - values = keyvals[key] - key = key.strip() - format_vals = [] - if not isinstance(values, list) or isinstance(values, tuple): - values = [values] - for val in values: - val = urllib.parse.quote(str(val).strip(), safe=":/ ") - if (key and val) and val not in format_vals: - format_vals.append(val) - format_kvs.append("%s=%s" % (key, ",".join(format_vals))) - return ";".join(format_kvs) - - def _write_annotations(self, anns, rec_id, size, out_handle): - """Add annotations which refer to an entire sequence.""" - format_anns = self._format_keyvals(anns) - if format_anns: - parts = [ - rec_id, - "annotation", - "remark", - "1", - str(size if size > 1 else 1), - ".", - ".", - ".", - format_anns, - ] - out_handle.write("\t".join(parts) + "\n") - - def _write_header(self, out_handle): - """Write out standard header directives.""" - out_handle.write("##gff-version 3\n") - - def _write_fasta(self, recs, out_handle): - """Write sequence records using the ##FASTA directive.""" - out_handle.write("##FASTA\n") - SeqIO.write(recs, out_handle, "fasta") - - -def write(recs, out_handle, include_fasta=False): - """High level interface to write GFF3 files from SeqRecords and SeqFeatures. - - If include_fasta is True, the GFF3 file will include sequence information - using the ##FASTA directive. - """ - writer = GFF3Writer() - return writer.write(recs, out_handle, include_fasta) |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d autogenJB2.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/autogenJB2.py Tue Jan 30 06:05:03 2024 +0000 |
[ |
@@ -0,0 +1,88 @@ +import argparse +import os + +from jbrowse2 import jbrowseConnector as jbC + + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="", epilog="") + parser.add_argument("--yaml", help="Track Configuration") + parser.add_argument("--outdir", help="Output directory", default="out") + parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.0.1") + args = parser.parse_args() + if os.path.exists(args.outdir): + root = args.outdir + dirList = os.scandir(root) + subdirs = [f.path for f in dirList if f.is_dir()] + genome_paths = [f.path for f in dirList if f.name.startswith('genome') and f.is_file()] + if len(genome_paths) > 0: + genome_fnames = [os.path.basename(x).split('_')[2:] for x in genome_paths] # expect genome_1_genomename.fasta etc + jc = jbC( + outdir=args.outdir, + genomes=[ + { + "path": x, + "meta": {"name" : genome_fnames[i], }, + } + for i,x in enumerate(genome_paths) + ], + ) + jc.process_genomes() + # .add_default_view() replace from https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py + default_session_data = { + "visibility": { + "default_on": [], + "default_off": [], + }, + "style": {}, + "style_labels": {}, + } + + track_paths = [x for x in genome_paths if not x.startswith('genome') and x.is_file()] + for i, track in enumerate(track_paths): + track_conf = {} + track_conf['format']= os.path.basename(track).split('_')[0] + track_conf["name"] = os.path.basename(track).split('_')[2:] # expect genome_1_genomename.fasta etc + fext = os.path.splitext(os.path.basename(track)).replace('.','') + track_conf["label"] = "%s_%i" % (os.path.basename(track), i) + track_conf["trackfiles"] = [] + keys = jc.process_annotations(track_conf) + + if keys: + for key in keys: + default_session_data["visibility"][ + track.attrib.get("visibility", "default_off") + ].append(key) + if track_conf.get("style", None): + default_session_data["style"][key] = track_conf[ + "style" + ] # TODO do we need this anymore? + if track_conf.get("style_lables", None): + default_session_data["style_labels"][key] = track_conf.get( + "style_labels", None + ) + # default_session_data["defaultLocation"] = root.find( + # "metadata/general/defaultLocation" + # ).text + # default_session_data["session_name"] = root.find( + # "metadata/general/session_name" + # ).text + # general_data = { + # "analytics": root.find("metadata/general/analytics").text, + # "primary_color": root.find("metadata/general/primary_color").text, + # "secondary_color": root.find("metadata/general/secondary_color").text, + # "tertiary_color": root.find("metadata/general/tertiary_color").text, + # "quaternary_color": root.find("metadata/general/quaternary_color").text, + # "font_size": root.find("metadata/general/font_size").text, + # } + # jc.add_general_configuration(general_data) + trackconf = jc.config_json.get("tracks", None) + if trackconf: + jc.config_json["tracks"].update(jc.tracksToAdd) + else: + jc.config_json["tracks"] = jc.tracksToAdd + jc.write_config() + jc.add_default_session(default_session_data) + # jc.text_index() not sure what broke here. |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d autogenJB2.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/autogenJB2.xml Tue Jan 30 06:05:03 2024 +0000 |
[ |
@@ -0,0 +1,148 @@ + <tool id="autogenjb2" name="autogenjb2" version="2.10.0_0" profile="22.05"> + <description>Files to JBrowse2</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="edamInc"/> + <xrefs> + <xref type="bio.tools">jbrowse2</xref> + </xrefs> + <expand macro="requirements"/> + <version_command>python '${__tool_directory__}/autogenJB2.py' --version</version_command> + <command detect_errors="aggressive"><![CDATA[ +python '$__tool_directory__/autogenJB2.py' +--outdir '$jbrowseme' + +&& + +cp '$output.files_path/index.html' '$output' + +## Ugly testing hack since I cannot get <extra_files> to test the files I want to test. Hmph. +#if str($uglyTestingHack) == "enabled": + && cp '$trackxml' '$output' +#end if + ]]></command> + <inputs> + <param + label="Collection of files specially named to become tracks" + name="jbrowseme" + type="data_collection"> + </param> + <param type="hidden" name="uglyTestingHack" value="" /> + </inputs> + <outputs> + <data format="html" name="output" label="JBrowse2 on $reference_genome.genome.element_identifier"/> + </outputs> + + <help><![CDATA[ + +JBrowse2-in-Galaxy +================== + +JBrowse2-in-Galaxy offers a highly configurable, workflow-compatible +alternative to JBrowse1-in-Galaxy and Trackster. + +Compared to JBrowse1-in-Galaxy, there is no support for alternative codons for unusual genomes, +and detailed track styling is not yet implemented. Send code. +JBrowse1 development has now ceased in favour of JBrowse2. + +Use and local viewing +===================== + + +A JBrowse2 history item can be opened by viewing it (the "eye" icon). + +The same browser data and setup can also be downloaded as a compressed zip archive by clicking the download ("floppy disk") icon in the history. +This can be shared and viewed without Galaxy. + +A replacement application to serve the browser is required without Galaxy. A local python web server can be started using a script included in each archive, +assuming that Python3 is already working on your desktop - if not you will have to install it first. Unzip the archive (*unzip [filename].zip*) and change +directory to the first level in that zip archive. It contains a file named *jb2_webserver.py* + +With python3 installed, + +*python3 jb2_webserver.py* + +will serve the unarchived JBrowse2 configuration from the same directory as the python script automatically. If a new browser window does not open, +but the script appears to be running, try pointing your web browser to the default of *localhost:8080* + +Overview +-------- + +JBrowse is a fast, embeddable genome browser built completely with +JavaScript and HTML5. + +The JBrowse-in-Galaxy (JiG) tool was written to help build complex +JBrowse installations straight from Galaxy. It allows you to build up a JBrowse instance without worrying +about how to run the command line tools to format your data, and which +options need to be supplied and where. + +The JBrowse-in-Galaxy tool has been rejected by `a Galaxy IUC +<https://github.com/galaxyproject/tools-iuc/issues>`__, reviewer. +It is maintained by https://github.com/fubar2 who you can help you +with missing features or bugs in the tool. For the record, he remains unconvinced by the reviewer's logic, +and disturbed by the distinctly coercive approach to introducing new code, +compared to the more usual method of providing a working PR. + +Options +------- + +**Reference or Assembly** + +Choose either a built-in or select one from your history. + +Track coordinates and contig names *must* match this reference precisely +or they will not display. + +**Track Groups** represent a set of tracks in a single category. + +Annotation Tracks +----------------- + +GFF3/BED +~~~~~~~~ + +Standard feature tracks. They usually highlight genes, mRNAs and other features of interest along a genomic region. + +When these contain tens of millions of features, such as repeat regions from a VGP assembly, displaying one at a time leads +to extremely slow loading times when a large region is in view, unless the "LinearPileupDisplay" display option is +selected for that track in the styling options section. The default is LinearBasicDisplay, which shows all details and works +well for relatively sparse bed files. A better option is to make a bigwig track using a set of windows based on the +lengths of each assembly or reference contig. + +BAM Pileups +~~~~~~~~~~~ + +We support BAM files and can automatically generate SNP tracks based on +that bam data. + + +BlastXML +~~~~~~~~ + +JiG now supports both blastn and blastp datasets. JiG internally uses a +blastXML to gapped GFF3 tool to convert your blastxml datasets into a +format amenable to visualization in JBrowse. This tool is also +available separately from the IUC on the toolshed. + +**Minimum Gap Size** reflects how long a gap must be before it becomes a +real gap in the processed gff3 file. In the picture above, various sizes +of gaps can be seen. If the minimum gap size was set much higher, say +100nt, many of the smaller gaps would disappear, and the features on +both sides would be merged into one, longer feature. This setting is +inversely proportional to runtime and output file size. *Do not set this +to a low value for large datasets*. By setting this number lower, you +will have extremely large outputs and extremely long runtimes. The +default was configured based off of the author's experience, but the +author only works on small viruses. It is *strongly* recommended that +you filter your blast results before display, e.g. picking out the top +10 hits or so. + +**Protein blast search** option merely informs underlying tools that +they should adjust feature locations by 3x. + + +@ATTRIBUTION@ +]]></help> + <expand macro="citations"/> +</tool> |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d gff3_rebase.py --- a/gff3_rebase.py Mon Jan 29 02:34:43 2024 +0000 +++ b/gff3_rebase.py Tue Jan 30 06:05:03 2024 +0000 |
b |
@@ -117,7 +117,7 @@ start *= 3 end *= 3 - if parent.location.strand >= 0: + if parent.location.strand !=None and parent.location.strand >= 0: ns = parent.location.start + start ne = parent.location.start + end st = +1 @@ -136,7 +136,8 @@ ns %= 3 if ne < 0: ne %= 3 - + if ns > ne: + ne, ns = ns, ne # dunno why but sometimes happens feature.location = FeatureLocation(ns, ne, strand=st) if hasattr(feature, "sub_features"): |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d jb2_GFF/GFFOutput.py --- a/jb2_GFF/GFFOutput.py Mon Jan 29 02:34:43 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,233 +0,0 @@\n-"""Output Biopython SeqRecords and SeqFeatures to GFF3 format.\n-\n-The target format is GFF3, the current GFF standard:\n- http://www.sequenceontology.org/gff3.shtml\n-"""\n-from six.moves import urllib\n-\n-from Bio import SeqIO\n-\n-\n-class _IdHandler:\n- """Generate IDs for GFF3 Parent/Child\n- relationships where they don\'t exist."""\n-\n- def __init__(self):\n- self._prefix = "biopygen"\n- self._counter = 1\n- self._seen_ids = []\n-\n- def _generate_id(self, quals):\n- """Generate a unique ID not present in our existing IDs."""\n- gen_id = self._get_standard_id(quals)\n- if gen_id is None:\n- while 1:\n- gen_id = "%s%s" % (self._prefix, self._counter)\n- if gen_id not in self._seen_ids:\n- break\n- self._counter += 1\n- return gen_id\n-\n- def _get_standard_id(self, quals):\n- """Retrieve standardized IDs from other sources like NCBI GenBank.\n-\n- This tries to find IDs from known key/values when stored differently\n- than GFF3 specifications.\n- """\n- possible_keys = ["transcript_id", "protein_id"]\n- for test_key in possible_keys:\n- if test_key in quals:\n- cur_id = quals[test_key]\n- if isinstance(cur_id, tuple) or isinstance(\n- cur_id, list\n- ):\n- return cur_id[0]\n- else:\n- return cur_id\n- return None\n-\n- def update_quals(self, quals, has_children):\n- """Update a set of qualifiers, adding an ID if necessary."""\n- cur_id = quals.get("ID", None)\n- # if we have an ID, record it\n- if cur_id:\n- if not isinstance(cur_id, list) and not isinstance(\n- cur_id, tuple\n- ):\n- cur_id = [cur_id]\n- for add_id in cur_id:\n- self._seen_ids.append(add_id)\n- # if we need one and don\'t have it, create a new one\n- elif has_children:\n- new_id = self._generate_id(quals)\n- self._seen_ids.append(new_id)\n- quals["ID"] = [new_id]\n- return quals\n-\n-\n-class GFF3Writer:\n- """Write GFF3 files starting with standard Biopython objects."""\n-\n- def __init__(self):\n- pass\n-\n- def write(self, recs, out_handle, include_fasta=False):\n- """Write the provided records to the given handle in GFF3 format."""\n- id_handler = _IdHandler()\n- self._write_header(out_handle)\n- fasta_recs = []\n- try:\n- recs = iter(recs)\n- except TypeError:\n- recs = [recs]\n- for rec in recs:\n- self._write_rec(rec, out_handle)\n- self._write_annotations(\n- rec.annotations, rec.id, len(rec.seq), out_handle\n- )\n- for sf in rec.features:\n- sf = self._clean_feature(sf)\n- id_handler = self._write_feature(\n- sf, rec.id, out_handle, id_handler\n- )\n- if include_fasta and len(rec.seq) > 0:\n- fasta_recs.append(rec)\n- if len(fasta_recs) > 0:\n- self._write_fasta(fasta_recs, out_handle)\n-\n- def _clean_feature(self, feature):\n- quals = {}\n- for key, val in feature.qualifiers.items():\n- if not isinstance(val, (list, tuple)):\n- val = [val]\n- val = [str(x) for x in val]\n- quals[key] = val\n- feature.qualifiers = quals\n- # Support for Biopython 1.68 and above, which removed sub_features\n- if not hasattr(feature, "sub_features"):\n- feature.sub_features = []\n- clean_sub = [\n- self._clean_feature(f) for f in feature.sub_features\n- ]\n- feature.sub_features = clean_sub\n- return feature\n-\n- def _write_rec(self, rec, out_handle):\n- # if we have a SeqRecord, write ou'..b' feature):\n- if "phase" in feature.qualifiers:\n- phase = feature.qualifiers["phase"][0]\n- elif feature.type == "CDS":\n- phase = (\n- int(feature.qualifiers.get("codon_start", [1])[0]) - 1\n- )\n- else:\n- phase = "."\n- return str(phase)\n-\n- def _write_feature(\n- self, feature, rec_id, out_handle, id_handler, parent_id=None\n- ):\n- """Write a feature with location information."""\n- if feature.location.strand == 1:\n- strand = "+"\n- elif feature.location.strand == -1:\n- strand = "-"\n- else:\n- strand = "."\n- # remove any standard features from the qualifiers\n- quals = feature.qualifiers.copy()\n- for std_qual in ["source", "score", "phase"]:\n- if std_qual in quals and len(quals[std_qual]) == 1:\n- del quals[std_qual]\n- # add a link to a parent identifier if it exists\n- if parent_id:\n- if "Parent" not in quals:\n- quals["Parent"] = []\n- quals["Parent"].append(parent_id)\n- quals = id_handler.update_quals(\n- quals, len(feature.sub_features) > 0\n- )\n- if feature.type:\n- ftype = feature.type\n- else:\n- ftype = "sequence_feature"\n- parts = [\n- str(rec_id),\n- feature.qualifiers.get("source", ["feature"])[0],\n- ftype,\n- str(feature.location.start + 1), # 1-based indexing\n- str(feature.location.end),\n- feature.qualifiers.get("score", ["."])[0],\n- strand,\n- self._get_phase(feature),\n- self._format_keyvals(quals),\n- ]\n- out_handle.write("\\t".join(parts) + "\\n")\n- for sub_feature in feature.sub_features:\n- id_handler = self._write_feature(\n- sub_feature,\n- rec_id,\n- out_handle,\n- id_handler,\n- quals["ID"][0],\n- )\n- return id_handler\n-\n- def _format_keyvals(self, keyvals):\n- format_kvs = []\n- for key in sorted(keyvals.keys()):\n- values = keyvals[key]\n- key = key.strip()\n- format_vals = []\n- if not isinstance(values, list) or isinstance(\n- values, tuple\n- ):\n- values = [values]\n- for val in values:\n- val = urllib.parse.quote(str(val).strip(), safe=":/ ")\n- if (key and val) and val not in format_vals:\n- format_vals.append(val)\n- format_kvs.append("%s=%s" % (key, ",".join(format_vals)))\n- return ";".join(format_kvs)\n-\n- def _write_annotations(self, anns, rec_id, size, out_handle):\n- """Add annotations which refer to an entire sequence."""\n- format_anns = self._format_keyvals(anns)\n- if format_anns:\n- parts = [\n- rec_id,\n- "annotation",\n- "remark",\n- "1",\n- str(size if size > 1 else 1),\n- ".",\n- ".",\n- ".",\n- format_anns,\n- ]\n- out_handle.write("\\t".join(parts) + "\\n")\n-\n- def _write_header(self, out_handle):\n- """Write out standard header directives."""\n- out_handle.write("##gff-version 3\\n")\n-\n- def _write_fasta(self, recs, out_handle):\n- """Write sequence records using the ##FASTA directive."""\n- out_handle.write("##FASTA\\n")\n- SeqIO.write(recs, out_handle, "fasta")\n-\n-\n-def write(recs, out_handle, include_fasta=False):\n- """High level interface to write GFF3 files from SeqRecords and SeqFeatures.\n-\n- If include_fasta is True, the GFF3 file will include sequence information\n- using the ##FASTA directive.\n- """\n- writer = GFF3Writer()\n- return writer.write(recs, out_handle, include_fasta)\n' |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d jb2_GFF/GFFParser.py --- a/jb2_GFF/GFFParser.py Mon Jan 29 02:34:43 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1099 +0,0 @@\n-"""Parse GFF files into features attached to Biopython SeqRecord objects.\n-\n-This deals with GFF3 formatted files, a tab delimited format for storing\n-sequence features and annotations:\n-\n-http://www.sequenceontology.org/gff3.shtml\n-\n-It will also deal with older GFF versions (GTF/GFF2):\n-\n-http://www.sanger.ac.uk/Software/formats/GFF/GFF_Spec.shtml\n-http://mblab.wustl.edu/GTF22.html\n-\n-The implementation utilizes map/reduce parsing of GFF using Disco. Disco\n-(http://discoproject.org) is a Map-Reduce framework for Python utilizing\n-Erlang for parallelization. The code works on a single processor without\n-Disco using the same architecture.\n-"""\n-import os\n-import copy\n-import json\n-import re\n-import collections\n-import io\n-import itertools\n-import warnings\n-import six\n-from six.moves import urllib\n-\n-from Bio.SeqRecord import SeqRecord\n-from Bio import SeqFeature\n-from Bio import SeqIO\n-from Bio import BiopythonDeprecationWarning\n-\n-import disco\n-\n-# Make defaultdict compatible with versions of python older than 2.4\n-try:\n- collections.defaultdict\n-except AttributeError:\n- import _utils\n-\n- collections.defaultdict = _utils.defaultdict\n-\n-unknown_seq_avail = False\n-try:\n- from Bio.Seq import UnknownSeq\n-\n- unknown_seq_avail = True\n-except ImportError:\n- # Starting with biopython 1.81, has been removed\n- from Bio.Seq import _UndefinedSequenceData\n- from Bio.Seq import Seq\n-\n-\n-warnings.simplefilter("ignore", BiopythonDeprecationWarning)\n-\n-\n-def _gff_line_map(line, params):\n- """Map part of Map-Reduce; parses a line of GFF into a dictionary.\n-\n- Given an input line from a GFF file, this:\n- - decides if the file passes our filtering limits\n- - if so:\n- - breaks it into component elements\n- - determines the type of attribute (flat, parent, child or annotation)\n- - generates a dictionary of GFF info which can be serialized as JSON\n- """\n-\n- def _merge_keyvals(parts):\n- """Merge key-values escaped by quotes\n- that are improperly split at semicolons."""\n- out = []\n- for i, p in enumerate(parts):\n- if (\n- i > 0\n- and len(p) == 1\n- and p[0].endswith(\'"\')\n- and not p[0].startswith(\'"\')\n- ):\n- if out[-1][-1].startswith(\'"\'):\n- prev_p = out.pop(-1)\n- to_merge = prev_p[-1]\n- prev_p[-1] = "%s; %s" % (to_merge, p[0])\n- out.append(prev_p)\n- else:\n- out.append(p)\n- return out\n-\n- gff3_kw_pat = re.compile(r"\\w+=")\n-\n- def _split_keyvals(keyval_str):\n- """Split key-value pairs in a GFF2, GTF and GFF3 compatible way.\n-\n- GFF3 has key value pairs like:\n- count=9;gene=amx-2;sequence=SAGE:aacggagccg\n- GFF2 and GTF have:\n- Sequence "Y74C9A" ; Note "Clone Y74C9A; Genbank AC024206"\n- name "fgenesh1_pg.C_chr_1000003"; transcriptId 869\n- """\n- quals = collections.defaultdict(list)\n- if keyval_str is None:\n- return quals\n- # ensembl GTF has a stray semi-colon at the end\n- if keyval_str[-1] == ";":\n- keyval_str = keyval_str[:-1]\n- # GFF2/GTF has a semi-colon with at least one space after it.\n- # It can have spaces on both sides; wormbase does this.\n- # GFF3 works with no spaces.\n- # Split at the first one we can recognize as working\n- parts = keyval_str.split(" ; ")\n- if len(parts) == 1:\n- parts = [x.strip() for x in keyval_str.split(";")]\n- # check if we have GFF3 style key-vals (with =)\n- is_gff2 = True\n- if gff3_kw_pat.match(parts[0]):\n- is_gff2 = False\n- key_vals = _merge_keyvals([p.split("=") for p in parts])\n- # otherwise, we are separated by a space with a key as the first item\n- else:\n- pieces = []\n- '..b' self.jsonify = False\n-\n- params = _LocalParams()\n- params.limit_info = limit_info\n- params.filter_info = self._filter_info\n- return params\n-\n- @_file_or_handle\n- def available_limits(self, gff_handle):\n- """Return dictionary information on possible limits for this file.\n-\n- This returns a nested dictionary with the following structure:\n-\n- keys -- names of items to filter by\n- values -- dictionary with:\n- keys -- filter choice\n- value -- counts of that filter in this file\n-\n- Not a parallelized map-reduce implementation.\n- """\n- cur_limits = dict()\n- for filter_key in self._filter_info.keys():\n- cur_limits[filter_key] = collections.defaultdict(int)\n- for line in gff_handle:\n- # when we hit FASTA sequences, we are done with annotations\n- if line.startswith("##FASTA"):\n- break\n- # ignore empty and comment lines\n- if line.strip() and line.strip()[0] != "#":\n- parts = [p.strip() for p in line.split("\\t")]\n- assert len(parts) >= 8, line\n- parts = parts[:9]\n- for (\n- filter_key,\n- cur_indexes,\n- ) in self._filter_info.items():\n- cur_id = tuple([parts[i] for i in cur_indexes])\n- cur_limits[filter_key][cur_id] += 1\n- # get rid of the default dicts\n- final_dict = dict()\n- for key, value_dict in cur_limits.items():\n- if len(key) == 1:\n- key = key[0]\n- final_dict[key] = dict(value_dict)\n- gff_handle.close()\n- return final_dict\n-\n- @_file_or_handle\n- def parent_child_map(self, gff_handle):\n- """Provide a mapping of parent to child relationships in the file.\n-\n- Returns a dictionary of parent child relationships:\n-\n- keys -- tuple of (source, type) for each parent\n- values -- tuple of (source, type) as children of that parent\n-\n- Not a parallelized map-reduce implementation.\n- """\n- # collect all of the parent and child types mapped to IDs\n- parent_sts = dict()\n- child_sts = collections.defaultdict(list)\n- for line in gff_handle:\n- # when we hit FASTA sequences, we are done with annotations\n- if line.startswith("##FASTA"):\n- break\n- if line.strip() and not line.startswith("#"):\n- line_type, line_info = _gff_line_map(\n- line, self._get_local_params()\n- )[0]\n- if line_type == "parent" or (\n- line_type == "child" and line_info["id"]\n- ):\n- parent_sts[line_info["id"]] = (\n- line_info["quals"].get("source", [""])[0],\n- line_info["type"],\n- )\n- if line_type == "child":\n- for parent_id in line_info["quals"]["Parent"]:\n- child_sts[parent_id].append(\n- (\n- line_info["quals"].get(\n- "source", [""]\n- )[0],\n- line_info["type"],\n- )\n- )\n- # print parent_sts, child_sts\n- # generate a dictionary of the unique final type relationships\n- pc_map = collections.defaultdict(list)\n- for parent_id, parent_type in parent_sts.items():\n- for child_type in child_sts[parent_id]:\n- pc_map[parent_type].append(child_type)\n- pc_final_map = dict()\n- for ptype, ctypes in pc_map.items():\n- unique_ctypes = list(set(ctypes))\n- unique_ctypes.sort()\n- pc_final_map[ptype] = unique_ctypes\n- return pc_final_map\n' |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d jb2_GFF/__init__.py --- a/jb2_GFF/__init__.py Mon Jan 29 02:34:43 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ -"""Top level of GFF parsing providing shortcuts for useful classes. -""" -from jb2_GFF.GFFParser import GFFParser, DiscoGFFParser, GFFExaminer, parse, parse_simple -from jb2_GFF.GFFOutput import GFF3Writer, write - -__version__ = "0.6.9" |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d jb2_GFF/__pycache__/GFFOutput.cpython-310.pyc |
b |
Binary file jb2_GFF/__pycache__/GFFOutput.cpython-310.pyc has changed |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d jb2_GFF/__pycache__/GFFParser.cpython-310.pyc |
b |
Binary file jb2_GFF/__pycache__/GFFParser.cpython-310.pyc has changed |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d jb2_GFF/__pycache__/__init__.cpython-310.pyc |
b |
Binary file jb2_GFF/__pycache__/__init__.cpython-310.pyc has changed |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d jb2_GFF/_utils.py --- a/jb2_GFF/_utils.py Mon Jan 29 02:34:43 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,49 +0,0 @@ -class defaultdict(dict): - """Back compatible defaultdict: - http://code.activestate.com/recipes/523034/""" - - def __init__(self, default_factory=None, *a, **kw): - if default_factory is not None and not hasattr( - default_factory, "__call__" - ): - raise TypeError("first argument must be callable") - dict.__init__(self, *a, **kw) - self.default_factory = default_factory - - def __getitem__(self, key): - try: - return dict.__getitem__(self, key) - except KeyError: - return self.__missing__(key) - - def __missing__(self, key): - if self.default_factory is None: - raise KeyError(key) - self[key] = value = self.default_factory() - return value - - def __reduce__(self): - if self.default_factory is None: - args = tuple() - else: - args = (self.default_factory,) - return type(self), args, None, None, self.items() - - def copy(self): - return self.__copy__() - - def __copy__(self): - return type(self)(self.default_factory, self) - - def __deepcopy__(self, memo): - import copy - - return type(self)( - self.default_factory, copy.deepcopy(self.items()) - ) - - def __repr__(self): - return "defaultdict(%s, %s)" % ( - self.default_factory, - dict.__repr__(self), - ) |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d jbrowse2.py --- a/jbrowse2.py Mon Jan 29 02:34:43 2024 +0000 +++ b/jbrowse2.py Tue Jan 30 06:05:03 2024 +0000 |
[ |
b'@@ -3,7 +3,6 @@\n import argparse\n import binascii\n import datetime\n-import hashlib\n import json\n import logging\n import os\n@@ -23,7 +22,7 @@\n \n TODAY = datetime.datetime.now().strftime("%Y-%m-%d")\n GALAXY_INFRASTRUCTURE_URL = None\n-JB2REL = "v2.10.0"\n+JB2REL = "v2.10.1"\n # version pinned for cloning\n \n mapped_chars = {\n@@ -232,7 +231,9 @@\n elif "scaling" in track:\n if track["scaling"]["method"] == "ignore":\n if track["scaling"]["scheme"]["color"] != "__auto__":\n- trackConfig["style"]["color"] = track["scaling"]["scheme"]["color"]\n+ trackConfig["style"]["color"] = track["scaling"]["scheme"][\n+ "color"\n+ ]\n else:\n trackConfig["style"]["color"] = self.hex_from_rgb(\n *self._get_colours()\n@@ -259,13 +260,18 @@\n "blue": blue,\n }\n )\n- trackConfig["style"]["color"] = color_function.replace("\\n", "")\n+ trackConfig["style"]["color"] = color_function.replace(\n+ "\\n", ""\n+ )\n elif trackFormat == "gene_calls":\n # Default values, based on GFF3 spec\n min_val = 0\n max_val = 1000\n # Get min/max and build a scoring function since JBrowse doesn\'t\n- if scales["type"] == "automatic" or scales["type"] == "__auto__":\n+ if (\n+ scales["type"] == "automatic"\n+ or scales["type"] == "__auto__"\n+ ):\n min_val, max_val = self.min_max_gff(gff3)\n else:\n min_val = scales.get("min", 0)\n@@ -273,7 +279,9 @@\n \n if scheme["color"] == "__auto__":\n user_color = "undefined"\n- auto_color = "\'%s\'" % self.hex_from_rgb(*self._get_colours())\n+ auto_color = "\'%s\'" % self.hex_from_rgb(\n+ *self._get_colours()\n+ )\n elif scheme["color"].startswith("#"):\n user_color = "\'%s\'" % self.hex_from_rgb(\n *self.rgb_from_hex(scheme["color"][1:])\n@@ -281,7 +289,9 @@\n auto_color = "undefined"\n else:\n user_color = "undefined"\n- auto_color = "\'%s\'" % self.hex_from_rgb(*self._get_colours())\n+ auto_color = "\'%s\'" % self.hex_from_rgb(\n+ *self._get_colours()\n+ )\n \n color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(\n **{\n@@ -293,7 +303,9 @@\n }\n )\n \n- trackConfig["style"]["color"] = color_function.replace("\\n", "")\n+ trackConfig["style"]["color"] = color_function.replace(\n+ "\\n", ""\n+ )\n return trackConfig\n \n \n@@ -336,40 +348,41 @@\n for (key, value) in node.findall("dataset")[0].attrib.items():\n metadata["dataset_%s" % key] = value\n \n- for (key, value) in node.findall("history")[0].attrib.items():\n- metadata["history_%s" % key] = value\n-\n- for (key, value) in node.findall("metadata")[0].attrib.items():\n- metadata["metadata_%s" % key] = value\n-\n- for (key, value) in node.findall("tool")[0].attrib.items():\n- metadata["tool_%s" % key] = value\n+ if node.findall("history"):\n+ for (key, value) in node.findall("history")[0].attrib.items():\n+ metadata["history_%s" % key] = value\n \n- # Additional Mappings applied:\n- metadata[\n- "dataset_edam_format"\n- ] = \'<a target="_blank" href="http://edamonto'..b'- # </bam_indices>\n- #\n- # The above will result in the \'bam_index\' key containing a\n- # string. If there are two or more indices, the container\n- # becomes a list. Fun!\n real_indexes = [real_indexes]\n \n self.add_bam(\n@@ -1157,6 +1203,19 @@\n track["conf"]["options"]["pileup"],\n bam_index=real_indexes[i],\n )\n+ elif dataset_ext == "cram":\n+ real_indexes = track["conf"]["options"]["cram"][\n+ "cram_indices"\n+ ]["cram_index"]\n+ if not isinstance(real_indexes, list):\n+ real_indexes = [real_indexes]\n+\n+ self.add_cram(\n+ dataset_path,\n+ outputTrackConfig,\n+ track["conf"]["options"]["cram"],\n+ cram_index=real_indexes[i],\n+ )\n elif dataset_ext == "blastxml":\n self.add_blastxml(\n dataset_path,\n@@ -1290,14 +1349,18 @@\n config_json.update(self.config_json)\n config_data = {}\n \n- config_data["disableAnalytics"] = data.get("analytics", "false") == "true"\n+ config_data["disableAnalytics"] = (\n+ data.get("analytics", "false") == "true"\n+ )\n \n config_data["theme"] = {\n "palette": {\n "primary": {"main": data.get("primary_color", "#0D233F")},\n "secondary": {"main": data.get("secondary_color", "#721E63")},\n "tertiary": {"main": data.get("tertiary_color", "#135560")},\n- "quaternary": {"main": data.get("quaternary_color", "#FFB11D")},\n+ "quaternary": {\n+ "main": data.get("quaternary_color", "#FFB11D")\n+ },\n },\n "typography": {"fontSize": int(data.get("font_size", 10))},\n }\n@@ -1351,9 +1414,10 @@\n parser = argparse.ArgumentParser(description="", epilog="")\n parser.add_argument("--xml", help="Track Configuration")\n parser.add_argument("--outdir", help="Output directory", default="out")\n- parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1")\n+ parser.add_argument(\n+ "--version", "-V", action="version", version="%(prog)s 2.0.1"\n+ )\n args = parser.parse_args()\n-\n tree = ET.parse(args.xml)\n root = tree.getroot()\n \n@@ -1448,7 +1512,8 @@\n track_conf["format"] = track.attrib["format"]\n if track.find("options/style"):\n track_conf["style"] = {\n- item.tag: parse_style_conf(item) for item in track.find("options/style")\n+ item.tag: parse_style_conf(item)\n+ for item in track.find("options/style")\n }\n if track.find("options/style_labels"):\n track_conf["style_labels"] = {\n@@ -1461,7 +1526,9 @@\n track_conf["format"] = track.attrib["format"]\n try:\n # Only pertains to gff3 + blastxml. TODO?\n- track_conf["style"] = {t.tag: t.text for t in track.find("options/style")}\n+ track_conf["style"] = {\n+ t.tag: t.text for t in track.find("options/style")\n+ }\n except TypeError:\n track_conf["style"] = {}\n pass\n@@ -1492,7 +1559,9 @@\n "primary_color": root.find("metadata/general/primary_color").text,\n "secondary_color": root.find("metadata/general/secondary_color").text,\n "tertiary_color": root.find("metadata/general/tertiary_color").text,\n- "quaternary_color": root.find("metadata/general/quaternary_color").text,\n+ "quaternary_color": root.find(\n+ "metadata/general/quaternary_color"\n+ ).text,\n "font_size": root.find("metadata/general/font_size").text,\n }\n jc.add_general_configuration(general_data)\n' |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d jbrowse2.xml --- a/jbrowse2.xml Mon Jan 29 02:34:43 2024 +0000 +++ b/jbrowse2.xml Tue Jan 30 06:05:03 2024 +0000 |
b |
@@ -1,4 +1,4 @@ - <tool id="jbrowse2" name="jbrowse2" version="@TOOL_VERSION@+@WRAPPER_VERSION@_2" profile="22.05"> + <tool id="jbrowse2" name="jbrowse2" version="@TOOL_VERSION@+@WRAPPER_VERSION@_3" profile="22.05"> <description>genome browser</description> <macros> <import>macros.xml</import> @@ -31,17 +31,20 @@ <metadata> <genomes> #if str($reference_genome.genome_type_select) == "indexed": - <genome path="${reference_genome.genomes.fields.path}" label="${reference_genome.genomes.fields.name}"> - <metadata /> + <genome path="${reference_genome.genome.fields.path}" label="${reference_genome.genome.fields.name}"> + <metadata> + <dataset + dname = "${reference_genome.genome.name}" /> + </metadata> </genome> #else - <genome path="$reference_genome.genome" label="${reference_genome.genome.element_identifier}"> + <genome path="$reference_genome.genome" label="${reference_genome.genome.name}"> <metadata> <dataset id="${__app__.security.encode_id($reference_genome.genome.id)}" hid="${reference_genome.genome.hid}" size="${reference_genome.genome.get_size(nice_size=True)}" edam_format="${reference_genome.genome.datatype.edam_format}" file_ext="${reference_genome.genome.ext}" - dname = "${reference_genome.genome.element_identifier}" /> + dname = "${reference_genome.genome.name}" /> <history id="${__app__.security.encode_id($reference_genome.genome.history_id)}" #if $reference_genome.genome.history.user: user_email="${reference_genome.genome.history.user.email}" @@ -165,7 +168,7 @@ #else if str($track.data_format.data_format_select) == "synteny": <synteny> <genome>${track.data_format.synteny_genome}</genome> - <genome_label>${track.data_format.synteny_genome.element_identifier}</genome_label> + <genome_label>${track.data_format.synteny_genome.name}</genome_label> </synteny> #else if str($track.data_format.data_format_select) == "hic": <hic> @@ -227,15 +230,15 @@ <param type="select" label="Track Type" name="data_format_select"> <option value="pileup">BAM Pileup track</option> <option value="wiggle">BigWig track</option> - <!-- <option value="blast">Blast XML track - converted to GFF</option> --> + <option value="blast">Blast XML track - converted to GFF</option> <option value="cram">CRAM</option> <option value="gene_calls" selected="true">GFF/GFF3/BED feature track</option> <option value="hic">HiC (compressed binary) track. Existing cool format must be converted to binary hic - hic_matrix will NOT work.</option> + <option value="maf">Multiple alignment format track. Reference name must match the MAF name exactly to work correctly</option> <option value="sparql">SPARQL</option> <option value="synteny">Synteny track with PAF data</option> <option value="vcf">VCF SNP annotation</option> </param> - <!-- <when value="blast"> <expand macro="input_conditional" label="BlastXML Track Data" format="blastxml" /> @@ -258,7 +261,7 @@ truevalue="true" falsevalue="false" /> <expand macro="track_visibility" /> - </when> --> + </when> <when value="vcf"> <expand macro="input_conditional" label="SNP Track Data" format="vcf,vcf_bgzip" /> <expand macro="track_styling_vcf"/> @@ -291,24 +294,24 @@ <expand macro="input_conditional" label="CRAM Track Data" format="cram" /> <expand macro="track_visibility" /> </when> + <when value="maf"> + <expand macro="input_conditional" label="MAF Track Data" format="maf" /> + <expand macro="track_visibility" /> + </when> <when value="wiggle"> <expand macro="input_conditional" label="BigWig Track Data" format="bigwig" /> <expand macro="track_visibility" /> - </when> - <when value="synteny"> <param label="Comparison genome sequence" help="Paf must use this as the reference to map the real reference sequence" format="fasta" name="synteny_genome" type="data" /> - <expand macro="input_conditional" label="Synteny data" format="paf" help="Make paf with minimap2 mapping real reference onto desired syntenic reference"/> <expand macro="track_visibility" /> </when> <when value="hic"> - <!-- TODO no hic datatype by default, but input for hicConvertFormat? hic_matrix datatype on .eu --> <expand macro="input_conditional" label="HiC data" format="hic" /> <expand macro="track_visibility" /> </when> @@ -371,7 +374,7 @@ <param type="hidden" name="uglyTestingHack" value="" /> </inputs> <outputs> - <data format="html" name="output" label="JBrowse2 on $reference_genome.genome.element_identifier"/> + <data format="html" name="output" label="JBrowse2 on $reference_genome.genome.name"/> </outputs> <tests> <test> |
b |
diff -r 2e6c48910819 -r bde6b1d09f7d macros.xml --- a/macros.xml Mon Jan 29 02:34:43 2024 +0000 +++ b/macros.xml Tue Jan 30 06:05:03 2024 +0000 |
b |
@@ -1,6 +1,6 @@ <?xml version="1.0"?> <macros> - <token name="@TOOL_VERSION@">2.10.0</token> + <token name="@TOOL_VERSION@">2.10.1</token> <xml name = "edamInc"> <edam_topics> <edam_topic>topic_3307</edam_topic> @@ -14,8 +14,8 @@ <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">jbrowse2</requirement> - <requirement type="package" version="1.82">biopython</requirement> - <requirement type="package" version="0.7.0">bcbio-gff</requirement> + <requirement type="package" version="1.81">biopython</requirement> + <requirement type="package" version="0.7.1">bcbio-gff</requirement> <requirement type="package" version="1.19">samtools</requirement> <requirement type="package" version="6.0.1">pyyaml</requirement> <requirement type="package" version="1.11">tabix</requirement> |