Mercurial > repos > fubar > jbrowse2dev
changeset 7:234cf4490901 draft
Uploaded
author | fubar |
---|---|
date | Fri, 05 Jan 2024 04:31:35 +0000 |
parents | 88b9b105c09b |
children | 1e6128ccc82b |
files | jbrowse2/blastxml_to_gapped_gff3.py jbrowse2/jbrowse2.py jbrowse2/jbrowse2.xml jbrowse2/servejb2.py jbrowse2/test-data/bam/merlin-sample.bam.gz |
diffstat | 5 files changed, 243 insertions(+), 168 deletions(-) [+] |
line wrap: on
line diff
--- a/jbrowse2/blastxml_to_gapped_gff3.py Fri Jan 05 01:58:02 2024 +0000 +++ b/jbrowse2/blastxml_to_gapped_gff3.py Fri Jan 05 04:31:35 2024 +0000 @@ -32,7 +32,7 @@ recid = record.query if " " in recid: - recid = recid[0 : recid.index(" ")] + recid = recid[0: recid.index(" ")] rec = SeqRecord(Seq("ACTG"), id=recid) for idx_hit, hit in enumerate(record.alignments): @@ -72,7 +72,7 @@ qualifiers["blast_" + prop] = getattr(hsp, prop, None) desc = hit.title.split(" >")[0] - qualifiers["description"] = desc[desc.index(" ") :] + qualifiers["description"] = desc[desc.index(" "):] # This required a fair bit of sketching out/match to figure out # the first time. @@ -161,9 +161,9 @@ fm = "" fs = "" for position in re.finditer("-", query): - fq += query[prev : position.start()] - fm += match[prev : position.start()] - fs += subject[prev : position.start()] + fq += query[prev: position.start()] + fm += match[prev: position.start()] + fs += subject[prev: position.start()] prev = position.start() + 1 fq += query[prev:] fm += match[prev:]
--- a/jbrowse2/jbrowse2.py Fri Jan 05 01:58:02 2024 +0000 +++ b/jbrowse2/jbrowse2.py Fri Jan 05 04:31:35 2024 +0000 @@ -108,7 +108,7 @@ class JbrowseConnector(object): - def __init__(self, jbrowse, outdir, genomes, standalone=None): + def __init__(self, jbrowse, outdir, genomes): self.debug = False self.usejson = True self.giURL = GALAXY_INFRASTRUCTURE_URL @@ -116,7 +116,6 @@ self.outdir = outdir os.makedirs(self.outdir, exist_ok=True) self.genome_paths = genomes - self.standalone = standalone self.trackIdlist = [] self.tracksToAdd = [] self.config_json = {} @@ -174,46 +173,27 @@ if self.debug: log.info("genome_node=%s" % str(genome_node)) genome_name = genome_node["meta"]["dataset_dname"] - dsId = genome_node["meta"]["dataset_id"] fapath = genome_node["path"] - if self.standalone == "complete": - faname = genome_name + ".fa.gz" - fadest = os.path.realpath(os.path.join(self.outdir, faname)) - cmd = "bgzip -i -c %s > %s && samtools faidx %s" % ( - fapath, - fadest, - fadest, - ) - self.subprocess_popen(cmd) - adapter = { - "type": "BgzipFastaAdapter", - "fastaLocation": { - "uri": faname, - }, - "faiLocation": { - "uri": faname + ".fai", - }, - "gziLocation": { - "uri": faname + ".gzi", - }, - } - else: - faurl = "%s/api/datasets/%s/display" % (self.giURL, dsId) - faname = genome_name + ".fa.fai" - fastalocation = { - "uri": faurl, - } - failocation = { + faname = genome_name + ".fa.gz" + fadest = os.path.realpath(os.path.join(self.outdir, faname)) + cmd = "bgzip -i -c %s > %s && samtools faidx %s" % ( + fapath, + fadest, + fadest, + ) + self.subprocess_popen(cmd) + adapter = { + "type": "BgzipFastaAdapter", + "fastaLocation": { "uri": faname, - } - adapter = { - "type": "IndexedFastaAdapter", - "fastaLocation": fastalocation, - "faiLocation": failocation, - } - - cmd = ["samtools", "faidx", fapath, "--fai-idx", faname] - self.subprocess_check_call(cmd) + }, + "faiLocation": { + "uri": faname + ".fai", + }, + "gziLocation": { + "uri": faname + ".gzi", + }, + } trackDict = { "name": genome_name, "sequence": { @@ -228,40 +208,23 @@ if self.usejson: self.config_json["assemblies"] = assemblies else: - if self.standalone == "complete": - cmd = [ - "jbrowse", - "add-assembly", - faname, - "-t", - "bgzipFasta", - "-n", - genome_name, - "--load", - "inPlace", - "--faiLocation", - faname + ".fai", - "--gziLocation", - faname + ".gzi", - "--target", - self.outdir, - ] - else: - cmd = [ - "jbrowse", - "add-assembly", - faname, - "-t", - "indexedFasta", - "-n", - genome_name, - "--load", - "inPlace", - "--faiLocation", - faname + ".fai", - "--target", - self.outdir, - ] + cmd = [ + "jbrowse", + "add-assembly", + faname, + "-t", + "bgzipFasta", + "-n", + genome_name, + "--load", + "inPlace", + "--faiLocation", + faname + ".fai", + "--gziLocation", + faname + ".gzi", + "--target", + self.outdir, + ] self.subprocess_check_call(cmd) def add_default_view(self): @@ -279,7 +242,7 @@ "-v", " LinearGenomeView", ] - if True or self.debug: + if self.debug: log.info("### calling set-default-session with cmd=%s" % " ".join(cmd)) self.subprocess_check_call(cmd) @@ -311,19 +274,13 @@ dsId, ) hname = trackData["name"] - if self.standalone == "complete": - dest = os.path.realpath(os.path.join(self.outdir, hname)) - url = hname - cmd = ["cp", data, dest] - self.subprocess_check_call(cmd) - floc = { - "uri": hname, - } - else: - url = "%s/api/datasets/%s/display?to_ext=hic" % (self.giURL, dsId) - floc = { - "uri": url, - } + dest = os.path.realpath(os.path.join(self.outdir, hname)) + url = hname + cmd = ["cp", data, dest] + self.subprocess_check_call(cmd) + floc = { + "uri": hname, + } trackDict = { "type": "HicTrack", "trackId": tId, @@ -509,15 +466,10 @@ def add_bigwig(self, data, trackData): url = "%s.bw" % trackData["name"] - if self.standalone == "complete": - dest = os.path.realpath(os.path.join(self.outdir, url)) - cmd = ["cp", data, dest] - self.subprocess_check_call(cmd) - bwloc = {"uri": url} - else: - dsId = trackData["metadata"]["dataset_id"] - url = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId) - bwloc = {"uri": url} + dest = os.path.realpath(os.path.join(self.outdir, url)) + cmd = ["cp", data, dest] + self.subprocess_check_call(cmd) + bwloc = {"uri": url} tId = trackData["label"] trackDict = { "type": "QuantitativeTrack", @@ -562,15 +514,10 @@ tId = trackData["label"] fname = "%s.bam" % trackData["label"] dest = os.path.realpath("%s/%s" % (self.outdir, fname)) - if self.standalone == "complete": - url = fname - self.subprocess_check_call(["cp", data, dest]) - log.info("### copied %s to %s" % (data, dest)) - bloc = {"uri": url} - else: - dsId = trackData["metadata"]["dataset_id"] - url = "%s/api/datasets/%s/display?to_ext=bam" % (self.giURL, dsId) - bloc = {"uri": url} + url = fname + self.subprocess_check_call(["cp", data, dest]) + log.info("### copied %s to %s" % (data, dest)) + bloc = {"uri": url} if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest self.subprocess_check_call( @@ -926,6 +873,8 @@ ]: cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] self.subprocess_check_call(cmd) + cmd = ['cp', os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir] + self.subprocess_check_call(cmd) if __name__ == "__main__": @@ -934,11 +883,6 @@ parser.add_argument("--jbrowse", help="Folder containing a jbrowse release") parser.add_argument("--outdir", help="Output directory", default="out") - parser.add_argument( - "--standalone", - choices=["complete", "minimal", "data"], - help="Standalone mode includes a copy of JBrowse", - ) parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0") args = parser.parse_args() @@ -963,7 +907,6 @@ } for x in root.findall("metadata/genomes/genome") ], - standalone=args.standalone, ) jc.process_genomes()
--- a/jbrowse2/jbrowse2.xml Fri Jan 05 01:58:02 2024 +0000 +++ b/jbrowse2/jbrowse2.xml Fri Jan 05 04:31:35 2024 +0000 @@ -21,16 +21,11 @@ python '$__tool_directory__/jbrowse2.py' --jbrowse \${JBROWSE_SOURCE_DIR} ---standalone '$standalone' --outdir '$output.files_path' '$trackxml' && -#if str($standalone) != "data": - cp '$output.files_path/index.html' '$output' -#else: - cp '$dummyIndex' '$output' -#end if +cp '$output.files_path/index.html' '$output' ## Ugly testing hack since I cannot get <extra_files> to test the files I want to test. Hmph. #if str($uglyTestingHack) == "enabled": @@ -212,14 +207,6 @@ </when> </conditional> - <param name="standalone" label="Include all reference and track data in the JBrowse2 object" type="select" - help="Default is efficient but will not work offline. Including reference sequences, tracks and indexes will allow standalone viewing, at the cost of copying and moving all data" > - <option value="complete">Complete: Choose ONLY if need to view offline, or if history cannot be published. WARNING: produces bloated downloads storing redundant copies of all data! - </option> - <option value="minimal" selected="true">Sufficient: Uses URLs for Galaxy data. Requires internet access and a published history to download, share and view remotely. - </option> - </param> - <repeat name="track_groups" title="Track Group"> <param label="Track Category" name="category" @@ -230,12 +217,12 @@ <repeat name="data_tracks" title="Annotation Track"> <conditional name="data_format" label="Track Options"> <param type="select" label="Track Type" name="data_format_select"> - <option value="blast">Blast XML</option> - <option value="gene_calls">GFF/GFF3/BED Features</option> - <option value="hic">HiC data (convert .cool with hicexplorer)</option> - <option value="pileup">BAM Pileups</option> - <option value="vcf">VCF SNPs</option> - <option value="wiggle">BigWig XY</option> + <option value="blast">Blast XML track - converted to GFF with actual gaps between hits</option> + <option value="gene_calls" selected="true">GFF/GFF3/BED feature tracks</option> + <option value="hic">HiC binary data. Existing cool format must be converted to binary hic - hic_matrix will NOT work.</option> + <option value="pileup">BAM Pileup track</option> + <option value="vcf">VCF SNP annotation track</option> + <option value="wiggle">BigWig XY track</option> </param> <when value="hic"> <expand macro="input_conditional" label="HiC Track Data" format="hic" help="Cool files must be converted first with hicexplorer" /> @@ -316,13 +303,12 @@ <param type="hidden" name="uglyTestingHack" value="" /> </inputs> <outputs> - <data format="html" name="output" label="JBrowse2 on $reference_genome.genome.element_identifier - $standalone"/> + <data format="html" name="output" label="JBrowse2 on $reference_genome.genome.element_identifier"/> </outputs> <tests> <test> <param name="reference_genome|genome_type_select" value="history"/> <param name="reference_genome|genome" value="merlin.fa"/> - <param name="standalone" value="minimal" /> <param name="uglyTestingHack" value="enabled" /> <output name="output"> <assert_contents> @@ -337,7 +323,6 @@ <test> <param name="reference_genome|genome_type_select" value="history"/> <param name="reference_genome|genome" value="merlin.fa"/> - <param name="standalone" value="minimal" /> <repeat name="track_groups"> <param name="category" value="Default" /> <repeat name="data_tracks"> @@ -364,7 +349,6 @@ <param name="genome_type_select" value="history"/> <param name="genome" value="merlin.fa"/> </conditional> - <param name="standalone" value="minimal" /> <repeat name="track_groups"> <param name="category" value="Auto Coloured" /> <repeat name="data_tracks"> @@ -388,7 +372,6 @@ <test> <param name="reference_genome|genome_type_select" value="history"/> <param name="reference_genome|genome" value="merlin.fa"/> - <param name="standalone" value="minimal" /> <param name="uglyTestingHack" value="enabled" /> <output name="output"> <assert_contents> @@ -410,6 +393,19 @@ and detailed track styling is not yet implemented. Send code. JBrowse1 development has now ceased in favour of JBrowse2. +Use and local viewing +===================== + +A JBrowse2 history item can be opened by viewing it (the "eye" icon). +They can also be downloaded as archives ("floppy disk" icon) to share and for local viewing. +One extra step is required before they can be viewed. A local python web server must be started using a script included in each archive. +Unpack the archive (tar -xvzf [filename].tgz) and the first level directory will contain a file named "servejb2.py" + +Assuming you have python3 installed, running + +*python3 servjb2.py* + +will serve the unarchived JBrowse2 configuration, so it can be browsed by pointing a web browser to localhost:8080 Overview -------- @@ -436,9 +432,7 @@ The first option you encounter is the **Fasta Sequence(s)**. This option now accepts multiple fasta files, allowing you to build JBrowse instances that contain data for multiple genomes or chrosomomes -(generally known as "landmark features" in gff3 terminology.) Up to 30 -will be shown from the dropdown selector within JBrowse, this is a known -issue. +(generally known as "landmark features" in gff3 terminology.) **Track Groups** represent a set of tracks in a single category. These can be used to let your users understand relationships between large @@ -449,25 +443,14 @@ Annotation Tracks ----------------- -Within Track Groups, you have one or more **Annotation Tracks**. Each -Annotation Track is a groups of datasets which have similar styling. -This allows you to rapidly build up JBrowse instances without having to -configure tracks individually. A massive improvement over previous -versions. For example, if you have five different GFF3 files from -various gene callers that you wish to display, you can take advantage of -this feature to style all of them similarly. - There are a few different types of tracks supported, each with their own set of options: GFF3/BED ~~~~~~~~ -These are your standard feature tracks. They usually highlight genes, -mRNAs and other features of interest along a genomic region. The -underlying tool and this help documentation focus primarily on GFF3 -data, and have not been tested extensively with other formats. Automatic -min/max detection will fail under BED datasets. +These are standard feature tracks. They usually highlight genes, +mRNAs and other features of interest along a genomic region. BAM Pileups ~~~~~~~~~~~ @@ -475,11 +458,6 @@ We support BAM files and can automatically generate SNP tracks based on that bam data. -.. image:: bam.png - -This is *strongly discouraged* for high coverage density datasets. -Unfortunately there are no other configuration options exposed for bam -files. BlastXML ~~~~~~~~ @@ -512,12 +490,7 @@ .. image:: bigwig.png -**XYPlot** -BigWig tracks can be displayed as a "density" plot which is a continuous -line which varies in colour, or as an "XYplot." XYplots are preferable -for users to visually identify specific features in a bigwig track, -however density tracks are more visually compact. VCFs/SNPs ~~~~~~~~~
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jbrowse2/servejb2.py Fri Jan 05 04:31:35 2024 +0000 @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 + +# spec: simplest python web server with range support and multithreading that takes root path, +# port and bind address as command line arguments; by default uses the current dir as webroot, +# port 8000 and bind address of 0.0.0.0 +# borrowed from https://github.com/danvk/RangeHTTPServer +# and reborrowed from https://gist.github.com/glowinthedark/b99900abe935e4ab4857314d647a9068 + + +import argparse +import functools +import os +import re +import socketserver +import webbrowser +from http.server import SimpleHTTPRequestHandler + + +DEFAULT_PORT = 8080 + + +def copy_byte_range(infile, outfile, start=None, stop=None, bufsize=16 * 1024): + """Like shutil.copyfileobj, but only copy a range of the streams. + + Both start and stop are inclusive. + """ + if start is not None: + infile.seek(start) + while 1: + to_read = min(bufsize, stop + 1 - infile.tell() if stop else bufsize) + buf = infile.read(to_read) + if not buf: + break + outfile.write(buf) + + +BYTE_RANGE_RE = re.compile(r"bytes=(\d+)-(\d+)?$") + + +def parse_byte_range(byte_range): + """Returns the two numbers in 'bytes=123-456' or throws ValueError. + + The last number or both numbers may be None. + """ + if byte_range.strip() == "": + return None, None + + m = BYTE_RANGE_RE.match(byte_range) + if not m: + raise ValueError("Invalid byte range %s" % byte_range) + + first, last = [x and int(x) for x in m.groups()] + if last and last < first: + raise ValueError("Invalid byte range %s" % byte_range) + return first, last + + +class RangeRequestHandler(SimpleHTTPRequestHandler): + """Adds support for HTTP 'Range' requests to SimpleHTTPRequestHandler + + The approach is to: + - Override send_head to look for 'Range' and respond appropriately. + - Override copyfile to only transmit a range when requested. + """ + + def handle(self): + try: + SimpleHTTPRequestHandler.handle(self) + except Exception: + # ignored, thrown whenever the client aborts streaming (broken pipe) + pass + + def send_head(self): + if "Range" not in self.headers: + self.range = None + return SimpleHTTPRequestHandler.send_head(self) + try: + self.range = parse_byte_range(self.headers["Range"]) + except ValueError: + self.send_error(400, "Invalid byte range") + return None + first, last = self.range + + # Mirroring SimpleHTTPServer.py here + path = self.translate_path(self.path) + f = None + ctype = self.guess_type(path) + try: + f = open(path, "rb") + except IOError: + self.send_error(404, "File not found") + return None + + fs = os.fstat(f.fileno()) + file_len = fs[6] + if first >= file_len: + self.send_error(416, "Requested Range Not Satisfiable") + return None + + self.send_response(206) + self.send_header("Content-type", ctype) + + if last is None or last >= file_len: + last = file_len - 1 + response_length = last - first + 1 + + self.send_header("Content-Range", "bytes %s-%s/%s" % (first, last, file_len)) + self.send_header("Content-Length", str(response_length)) + self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) + self.end_headers() + return f + + def end_headers(self): + self.send_header("Accept-Ranges", "bytes") + return SimpleHTTPRequestHandler.end_headers(self) + + def copyfile(self, source, outputfile): + if not self.range: + return SimpleHTTPRequestHandler.copyfile(self, source, outputfile) + + # SimpleHTTPRequestHandler uses shutil.copyfileobj, which doesn't let + # you stop the copying before the end of the file. + start, stop = self.range # set in send_head() + copy_byte_range(source, outputfile, start, stop) + + +class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer): + allow_reuse_address = True + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Simple Python Web Server with Range Support" + ) + parser.add_argument( + "--root", + default=os.getcwd(), + help="Root path to serve files from (default: current working directory)", + ) + parser.add_argument( + "--port", + type=int, + default=DEFAULT_PORT, + help=f"Port to listen on (default: {DEFAULT_PORT})", + ) + parser.add_argument( + "--bind", default="0.0.0.0", help="IP address to bind to (default: 0.0.0.0)" + ) + args = parser.parse_args() + + handler = functools.partial(RangeRequestHandler, directory=args.root) + + webbrowser.open(f"http://{args.bind}:{args.port}") + + with ThreadedTCPServer((args.bind, args.port), handler) as httpd: + print( + f"Serving HTTP on {args.bind} port {args.port} (http://{args.bind}:{args.port}/)" + ) + httpd.serve_forever()