Mercurial > repos > fubar > jbrowse2
changeset 30:8f02a84ee278 draft
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 48bc917d34af182e9158915862c8a35723660919
author | fubar |
---|---|
date | Wed, 21 Feb 2024 02:57:30 +0000 |
parents | f728cf0df71d |
children | cb4b32ca9968 |
files | autogenJB2.py autogenJB2.xml jbrowse2.py jbrowse2.xml macros.xml |
diffstat | 5 files changed, 296 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/autogenJB2.py Wed Feb 21 02:57:30 2024 +0000 @@ -0,0 +1,147 @@ +import argparse +import re +import sys + +from jbrowse2 import jbrowseConnector as jbC + + +def makeDefaultLocation(jc, defLoc=None): + + refName = None + drdict = { + "reversed": False, + "assemblyName": jc.genome_name, + "start": 0, + "end": 100000, + } + + if defLoc: + loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", defLoc) + # allow commas like 100,000 but ignore as integer + if loc_match: + refName = loc_match.group(1) + drdict["refName"] = refName + if loc_match.group(2) > "": + drdict["start"] = int(loc_match.group(2).replace(",", "")) + if loc_match.group(3) > "": + drdict["end"] = int(loc_match.group(3).replace(",", "")) + else: + jc.logging.info( + "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" + % defLoc + ) + else: + drdict["refName"] = jc.genome_firstcontig + if drdict.get("refName", None): + jc.logging.info("@@@ defaultlocation %s for default session" % drdict) + return drdict + else: + jc.logging.info("@@@ no contig name found for default session - please add one!") + return None + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="", epilog="") + parser.add_argument("--sessname", help="Session name", default="AutoJBrowse") + parser.add_argument( + "--collection", + help="Collection of 'filepath, filename, filext' for JBrowse2", + default=[], + action="extend", + ) + parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.0.1") + args = parser.parse_args() + sessName = args.sessname + flistList = [x.split(",") for x in args.collection] + if flistList: + listgenomes = [f for f in flistList if f[0].startswith("REFERENCE_")] + if len(listgenomes) > 0: + genome_paths = [ + x[1] for x in listgenomes + ] # expect genome_1_genomename.fasta etc + genome_names = [x[0].split("REFERENCE_")[1] for x in listgenomes] + jc = jbC( + outdir=args.outdir, + genomes=[ + { + "path": x, + "meta": { + "name": genome_names[i], + }, + } + for i, x in enumerate(genome_paths) + ], + ) + jc.process_genomes() + default_session_data = { + "visibility": { + "default_on": [], + "default_off": [], + }, + "style": {}, + "style_labels": {}, + } + defLoc = makeDefaultLocation(jc) + listtracks = [f for f in flistList if not f[0].startswith("REFERENCE_")] + # foo.paf must have a foo_paf.fasta or fasta.gz to match + tnames = [x[0] for x in listtracks] + texts = [x[2] for x in listtracks] + for i, track in enumerate(listtracks): + if track[2] == "paf": + refname = track[0] + "_paf.fasta" + refdat = [x[1] for x in listtracks if x[0] == refname] + if not refdat: + jc.logging.warn( + "!! No reference file %s corresponding to paf file %s found. Not building - there must be a corresponding fasta for each paf" + % (refname, tnames[i]) + ) + sys.exit(3) + else: + track_conf = { + "conf": { + "options": { + "paf": {"genome": refdat, "genome_label": track[0]} + } + } + } + else: + track_conf = {} + track_conf["format"] = track[2] + track_conf["name"] = track[0] + track_conf["label"] = track[0] + track_conf["trackfiles"] = [] + keys = jc.process_annotations(track_conf) + + if keys: + for key in keys: + default_session_data["visibility"][ + track.attrib.get("visibility", "default_off") + ].append(key) + # if track_conf.get("style", None): + # default_session_data["style"][key] = track_conf[ + # "style" + # ] # TODO do we need this anymore? + # if track_conf.get("style_lables", None): + # default_session_data["style_labels"][key] = track_conf.get( + # "style_labels", None + # ) + # general_data = { + # "analytics": root.find("metadata/general/analytics").text, + # "primary_color": root.find("metadata/general/primary_color").text, + # "secondary_color": root.find("metadata/general/secondary_color").text, + # "tertiary_color": root.find("metadata/general/tertiary_color").text, + # "quaternary_color": root.find("metadata/general/quaternary_color").text, + # "font_size": root.find("metadata/general/font_size").text, + # } + # jc.add_general_configuration(general_data) + trackconf = jc.config_json.get("tracks", None) + if trackconf: + jc.config_json["tracks"].update(jc.tracksToAdd) + else: + jc.config_json["tracks"] = jc.tracksToAdd + jc.write_config() + defaultData = {"defaultLocation": defLoc, "session_name": sessName} + jc.add_default_session(defaultData) + # jc.text_index() not sure what broke here. + else: + sys.stderr.write("!! empty collection supplied - nothing to process")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/autogenJB2.xml Wed Feb 21 02:57:30 2024 +0000 @@ -0,0 +1,141 @@ + <tool id="autogenjb2" name="autogenjb2" version="2.10.0_0" profile="22.05"> + <description>Files to JBrowse2</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="edamInc"/> + <xrefs> + <xref type="bio.tools">jbrowse2</xref> + </xrefs> + <expand macro="requirements"/> + <version_command>python '${__tool_directory__}/autogenJB2.py' --version</version_command> + <command detect_errors="aggressive"><![CDATA[ +python '$__tool_directory__/autogenJB2.py' +#for $key in $jbrowseme.keys(): +--collection '$key,$jbrowseme[$key],$jbrowseme[$key].ext' +#end for +--sessName "Autogen JBrowse" + ]]></command> + <inputs> + <param + label="Collection of files specially named to become tracks" + name="jbrowseme" + type="data_collection"> + </param> + </inputs> + <outputs> + <data format="html" name="output" label="JBrowse2"/> + </outputs> + + <help><![CDATA[ + +JBrowse2-in-Galaxy +================== + +JBrowse2-in-Galaxy offers a highly configurable, workflow-compatible +alternative to JBrowse1-in-Galaxy and Trackster. + +Compared to JBrowse1-in-Galaxy, there is no support for alternative codons for unusual genomes, +and detailed track styling is not yet implemented. Send code. +JBrowse1 development has now ceased in favour of JBrowse2. + +Use and local viewing +===================== + + +A JBrowse2 history item can be opened by viewing it (the "eye" icon). + +The same browser data and setup can also be downloaded as a compressed zip archive by clicking the download ("floppy disk") icon in the history. +This can be shared and viewed without Galaxy. + +A replacement application to serve the browser is required without Galaxy. A local python web server can be started using a script included in each archive, +assuming that Python3 is already working on your desktop - if not you will have to install it first. Unzip the archive (*unzip [filename].zip*) and change +directory to the first level in that zip archive. It contains a file named *jb2_webserver.py* + +With python3 installed, + +*python3 jb2_webserver.py* + +will serve the unarchived JBrowse2 configuration from the same directory as the python script automatically. If a new browser window does not open, +but the script appears to be running, try pointing your web browser to the default of *localhost:8080* + +Overview +-------- + +JBrowse is a fast, embeddable genome browser built completely with +JavaScript and HTML5. + +The JBrowse-in-Galaxy (JiG) tool was written to help build complex +JBrowse installations straight from Galaxy. It allows you to build up a JBrowse instance without worrying +about how to run the command line tools to format your data, and which +options need to be supplied and where. + +The JBrowse-in-Galaxy tool has been rejected by `a Galaxy IUC +<https://github.com/galaxyproject/tools-iuc/issues>`__, reviewer. +It is maintained by https://github.com/fubar2 who you can help you +with missing features or bugs in the tool. For the record, he remains unconvinced by the reviewer's logic, +and disturbed by the distinctly coercive approach to introducing new code, +compared to the more usual method of providing a working PR. + +Options +------- + +**Reference or Assembly** + +Choose either a built-in or select one from your history. + +Track coordinates and contig names *must* match this reference precisely +or they will not display. + +**Track Groups** represent a set of tracks in a single category. + +Annotation Tracks +----------------- + +GFF3/BED +~~~~~~~~ + +Standard feature tracks. They usually highlight genes, mRNAs and other features of interest along a genomic region. + +When these contain tens of millions of features, such as repeat regions from a VGP assembly, displaying one at a time leads +to extremely slow loading times when a large region is in view, unless the "LinearPileupDisplay" display option is +selected for that track in the styling options section. The default is LinearBasicDisplay, which shows all details and works +well for relatively sparse bed files. A better option is to make a bigwig track using a set of windows based on the +lengths of each assembly or reference contig. + +BAM Pileups +~~~~~~~~~~~ + +We support BAM files and can automatically generate SNP tracks based on +that bam data. + + +BlastXML +~~~~~~~~ + +JiG now supports both blastn and blastp datasets. JiG internally uses a +blastXML to gapped GFF3 tool to convert your blastxml datasets into a +format amenable to visualization in JBrowse. This tool is also +available separately from the IUC on the toolshed. + +**Minimum Gap Size** reflects how long a gap must be before it becomes a +real gap in the processed gff3 file. In the picture above, various sizes +of gaps can be seen. If the minimum gap size was set much higher, say +100nt, many of the smaller gaps would disappear, and the features on +both sides would be merged into one, longer feature. This setting is +inversely proportional to runtime and output file size. *Do not set this +to a low value for large datasets*. By setting this number lower, you +will have extremely large outputs and extremely long runtimes. The +default was configured based off of the author's experience, but the +author only works on small viruses. It is *strongly* recommended that +you filter your blast results before display, e.g. picking out the top +10 hits or so. + +**Protein blast search** option merely informs underlying tools that +they should adjust feature locations by 3x. + + +@ATTRIBUTION@ +]]></help> + <expand macro="citations"/> +</tool>
--- a/jbrowse2.py Fri Feb 16 00:04:37 2024 +0000 +++ b/jbrowse2.py Wed Feb 21 02:57:30 2024 +0000 @@ -1119,6 +1119,7 @@ # Unsanitize labels (element_identifiers are always sanitized by Galaxy) for key, value in mapped_chars.items(): track_human_label = track_human_label.replace(value, key) + track_human_label = track_human_label.replace(" ","_") outputTrackConfig = { "category": category, "style": {}, @@ -1141,13 +1142,13 @@ dataset_ext, outputTrackConfig, ) - elif dataset_ext in ("hic",): + elif dataset_ext in ("hic","juicebox_hic"): self.add_hic( dataset_path, outputTrackConfig, ) elif dataset_ext in ("cool", "mcool", "scool"): - hic_url = "%s_%d.hic" % (track_human_label, i) + hic_url = "%s_%d.juicebox_hic" % (track_human_label, i) hic_path = os.path.join(self.outdir, hic_url) self.subprocess_check_call( [
--- a/jbrowse2.xml Fri Feb 16 00:04:37 2024 +0000 +++ b/jbrowse2.xml Wed Feb 21 02:57:30 2024 +0000 @@ -93,7 +93,7 @@ <track cat="${tg.category}" format="${track.data_format.data_format_select}" visibility="${track.data_format.track_visibility}"> #if $track.data_format.data_format_select != "sparql": <files> - #for $dataset in $track.data_format.annotation: + #set dataset = $track.data_format.annotation <trackFile path="${dataset}" ext="${dataset.ext}" label="${dataset.element_identifier}"> <metadata> <dataset id="${__app__.security.encode_id($dataset.id)}" hid="${dataset.hid}" @@ -128,7 +128,6 @@ /> </metadata> </trackFile> - #end for </files> #end if @@ -138,17 +137,13 @@ #if str($track.data_format.data_format_select) == "pileup": <pileup> <bam_indices> - #for $dataset in $track.data_format.annotation: <bam_index>${dataset.metadata.bam_index}</bam_index> - #end for </bam_indices> </pileup> #else if str($track.data_format.data_format_select) == "cram": <cram> <cram_indices> - #for $dataset in $track.data_format.annotation: <cram_index>${dataset.metadata.cram_index}</cram_index> - #end for </cram_indices> </cram> #else if str($track.data_format.data_format_select) == "blast": @@ -234,11 +229,11 @@ <option value="pileup">BAM Pileup track</option> <option value="wiggle">BigWig track</option> <option value="blast">Blast XML track - converted to GFF</option> - <option value="cool">cool/mcool/scool data in hdf5 data</option> + <option value="cool">HiC as cool/mcool/scool format files</option> <option value="cram">CRAM</option> <option value="gene_calls" selected="true">GFF/GFF3/BED feature track</option> - <option value="hic">HiC (compressed binary) track. Existing cool format must be converted to binary hic - hic_matrix will NOT work.</option> - <option value="maf">Multiple alignment format track. Reference name must match the MAF name exactly to work correctly</option> + <option value="hic">HiC as juicebox_hic format file. Tabular hic_matrix will NOT work.</option> + <option value="maf">Multiple alignment format. Reference name must match the MAF name exactly to work correctly</option> <option value="paf">PAF - approximate mapping positions between two set of sequences</option> <option value="sparql">SPARQL</option> <option value="vcf">VCF SNP annotation</option>
--- a/macros.xml Fri Feb 16 00:04:37 2024 +0000 +++ b/macros.xml Wed Feb 21 02:57:30 2024 +0000 @@ -504,7 +504,7 @@ <xml name="input_conditional" token_label="Track Data" token_format="data"> - <param label="@LABEL@" format="@FORMAT@" name="annotation" type="data" multiple="True"/> + <param label="@LABEL@" format="@FORMAT@" name="annotation" type="data" multiple="False"/> </xml> <xml name="citations"> <citations>