Mercurial > repos > fubar > jbrowse2dev
diff jbrowse2/jbrowse2.py @ 6:88b9b105c09b draft
Uploaded
author | fubar |
---|---|
date | Fri, 05 Jan 2024 01:58:02 +0000 |
parents | 42ca8804cd93 |
children | 234cf4490901 |
line wrap: on
line diff
--- a/jbrowse2/jbrowse2.py Thu Jan 04 02:18:18 2024 +0000 +++ b/jbrowse2/jbrowse2.py Fri Jan 05 01:58:02 2024 +0000 @@ -110,6 +110,7 @@ class JbrowseConnector(object): def __init__(self, jbrowse, outdir, genomes, standalone=None): self.debug = False + self.usejson = True self.giURL = GALAXY_INFRASTRUCTURE_URL self.jbrowse = jbrowse self.outdir = outdir @@ -118,20 +119,9 @@ self.standalone = standalone self.trackIdlist = [] self.tracksToAdd = [] - self.config_json = { - "configuration": { - "rpc": { - "defaultDriver": "WebWorkerRpcDriver", - "drivers": {"MainThreadRpcDriver": {}, "WebWorkerRpcDriver": {}}, - }, - "logoPath": {"locationType": "UriLocation", "uri": ""}, - } - } - self.config_json_file = os.path.join(outdir, "config.json") - if standalone == "complete": - self.clone_jbrowse(self.jbrowse, self.outdir) - elif standalone == "minimal": - self.clone_jbrowse(self.jbrowse, self.outdir, minimal=True) + self.config_json = {} + self.config_json_file = os.path.realpath(os.path.join(outdir, "config.json")) + self.clone_jbrowse(self.jbrowse, self.outdir) def subprocess_check_call(self, command, output=None): if output: @@ -181,44 +171,98 @@ def process_genomes(self): assemblies = [] for i, genome_node in enumerate(self.genome_paths): - log.info("genome_node=%s" % str(genome_node)) - # We only expect one input genome per run. This for loop is just - # easier to write than the alternative / catches any possible - # issues. + if self.debug: + log.info("genome_node=%s" % str(genome_node)) genome_name = genome_node["meta"]["dataset_dname"] dsId = genome_node["meta"]["dataset_id"] fapath = genome_node["path"] - faname = genome_name + ".fasta" - faind = os.path.realpath(os.path.join(self.outdir, faname + ".fai")) if self.standalone == "complete": - faurl = faname + faname = genome_name + ".fa.gz" fadest = os.path.realpath(os.path.join(self.outdir, faname)) - cmd = ["cp", fapath, fadest] - self.subprocess_check_call(cmd) + cmd = "bgzip -i -c %s > %s && samtools faidx %s" % ( + fapath, + fadest, + fadest, + ) + self.subprocess_popen(cmd) + adapter = { + "type": "BgzipFastaAdapter", + "fastaLocation": { + "uri": faname, + }, + "faiLocation": { + "uri": faname + ".fai", + }, + "gziLocation": { + "uri": faname + ".gzi", + }, + } else: - faurl = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId) - cmd = ["samtools", "faidx", fapath, "--fai-idx", faind] - self.subprocess_check_call(cmd) + faurl = "%s/api/datasets/%s/display" % (self.giURL, dsId) + faname = genome_name + ".fa.fai" + fastalocation = { + "uri": faurl, + } + failocation = { + "uri": faname, + } + adapter = { + "type": "IndexedFastaAdapter", + "fastaLocation": fastalocation, + "faiLocation": failocation, + } + + cmd = ["samtools", "faidx", fapath, "--fai-idx", faname] + self.subprocess_check_call(cmd) trackDict = { "name": genome_name, "sequence": { "type": "ReferenceSequenceTrack", "trackId": genome_name, - "adapter": { - "type": "IndexedFastaAdapter", - "fastaLocation": {"uri": faurl, "locationType": "UriLocation"}, - "faiLocation": { - "uri": faname + ".fai", - "locationType": "UriLocation", - }, - }, + "adapter": adapter, }, + "rendering": {"type": "DivSequenceRenderer"}, } assemblies.append(trackDict) - self.config_json["assemblies"] = assemblies self.genome_name = genome_name - self.genome_path = faurl - self.genome_fai_path = faname + ".fai" + if self.usejson: + self.config_json["assemblies"] = assemblies + else: + if self.standalone == "complete": + cmd = [ + "jbrowse", + "add-assembly", + faname, + "-t", + "bgzipFasta", + "-n", + genome_name, + "--load", + "inPlace", + "--faiLocation", + faname + ".fai", + "--gziLocation", + faname + ".gzi", + "--target", + self.outdir, + ] + else: + cmd = [ + "jbrowse", + "add-assembly", + faname, + "-t", + "indexedFasta", + "-n", + genome_name, + "--load", + "inPlace", + "--faiLocation", + faname + ".fai", + "--target", + self.outdir, + ] + self.subprocess_check_call(cmd) def add_default_view(self): cmd = [ @@ -229,10 +273,14 @@ "-t", ",".join(self.trackIdlist), "-n", - "Default", + "JBrowse2 in Galaxy", "--target", - self.outdir, - ] # + self.config_json_file, + "-v", + " LinearGenomeView", + ] + if True or self.debug: + log.info("### calling set-default-session with cmd=%s" % " ".join(cmd)) self.subprocess_check_call(cmd) def write_config(self): @@ -268,8 +316,14 @@ url = hname cmd = ["cp", data, dest] self.subprocess_check_call(cmd) + floc = { + "uri": hname, + } else: url = "%s/api/datasets/%s/display?to_ext=hic" % (self.giURL, dsId) + floc = { + "uri": url, + } trackDict = { "type": "HicTrack", "trackId": tId, @@ -277,11 +331,29 @@ "assemblyNames": [self.genome_name], "adapter": { "type": "HicAdapter", - "hicLocation": {"uri": url, "locationType": "UriLocation"}, + "hicLocation": floc, }, } - self.tracksToAdd.append(trackDict) - self.trackIdlist.append(tId) + if self.usejson: + self.tracksToAdd.append(trackDict) + self.trackIdlist.append(tId) + else: + cmd = [ + "jbrowse", + "add-track", + url, + "-t", + "HicTrack", + "-a", + self.genome_name, + "-n", + hname, + "--load", + "inPlace", + "--target", + self.outdir, + ] + self.subprocess_check_call(cmd) def add_maf(self, data, trackData): """ @@ -333,9 +405,13 @@ "adapter": { "type": "MafTabixAdapter", "samples": samples, - "bedGzLocation": {"uri": fname + ".sorted.bed.gz"}, + "bedGzLocation": { + "uri": fname + ".sorted.bed.gz", + }, "index": { - "location": {"uri": fname + ".sorted.bed.gz.tbi"}, + "location": { + "uri": fname + ".sorted.bed.gz.tbi", + }, }, }, "assemblyNames": [self.genome_name], @@ -390,9 +466,13 @@ "assemblyNames": [self.genome_name], "adapter": { "type": "Gff3TabixAdapter", - "gffGzLocation": {"locationType": "UriLocation", "uri": url}, + "gffGzLocation": { + "uri": url, + }, "index": { - "location": {"locationType": "UriLocation", "uri": url + ".tbi"} + "location": { + "uri": url + ".tbi", + } }, }, "displays": [ @@ -403,31 +483,52 @@ {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId}, ], } - self.tracksToAdd.append(trackDict) - self.trackIdlist.append(tId) + if self.usejson: + self.tracksToAdd.append(trackDict) + self.trackIdlist.append(tId) + else: + cmd = [ + "jbrowse", + "add-track", + url, + "-t", + "FeatureTrack", + "-a", + self.genome_name, + "--indexFile", + url + ".tbi", + "-n", + trackData["name"], + "--load", + "inPlace", + "--target", + self.outdir, + ] + self.subprocess_check_call(cmd) os.unlink(gff3) def add_bigwig(self, data, trackData): - fname = trackData["name"] + url = "%s.bw" % trackData["name"] if self.standalone == "complete": - dest = os.path.realpath(os.path.join(self.outdir, fname)) - url = fname + dest = os.path.realpath(os.path.join(self.outdir, url)) cmd = ["cp", data, dest] self.subprocess_check_call(cmd) + bwloc = {"uri": url} else: dsId = trackData["metadata"]["dataset_id"] url = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId) + bwloc = {"uri": url} tId = trackData["label"] trackDict = { "type": "QuantitativeTrack", "trackId": tId, - "name": fname, + "name": url, "assemblyNames": [ self.genome_name, ], "adapter": { "type": "BigWigAdapter", - "bigWigLocation": {"locationType": "UriLocation", "uri": url}, + "bigWigLocation": bwloc, }, "displays": [ { @@ -436,19 +537,40 @@ } ], } - self.tracksToAdd.append(trackDict) - self.trackIdlist.append(tId) + if self.usejson: + self.tracksToAdd.append(trackDict) + self.trackIdlist.append(tId) + else: + cmd = [ + "jbrowse", + "add-track", + url, + "-t", + "QuantitativeTrack", + "-a", + self.genome_name, + "-n", + trackData["name"], + "--load", + "inPlace", + "--target", + self.outdir, + ] + self.subprocess_check_call(cmd) def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): tId = trackData["label"] fname = "%s.bam" % trackData["label"] dest = os.path.realpath("%s/%s" % (self.outdir, fname)) - if self.standalone == "minimal": + if self.standalone == "complete": + url = fname + self.subprocess_check_call(["cp", data, dest]) + log.info("### copied %s to %s" % (data, dest)) + bloc = {"uri": url} + else: dsId = trackData["metadata"]["dataset_id"] url = "%s/api/datasets/%s/display?to_ext=bam" % (self.giURL, dsId) - else: - url = fname - self.symlink_or_copy(data, dest) + bloc = {"uri": url} if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest self.subprocess_check_call( @@ -470,29 +592,36 @@ "assemblyNames": [self.genome_name], "adapter": { "type": "BamAdapter", - "bamLocation": {"locationType": "UriLocation", "uri": url}, + "bamLocation": bloc, "index": { - "location": {"locationType": "UriLocation", "uri": fname + ".bai"} - }, - "sequenceAdapter": { - "type": "IndexedFastaAdapter", - "fastaLocation": { - "locationType": "UriLocation", - "uri": self.genome_path, - }, - "faiLocation": { - "locationType": "UriLocation", - "uri": self.genome_fai_path, - }, - "metadataLocation": { - "locationType": "UriLocation", - "uri": "/path/to/fa.metadata.yaml", - }, + "location": { + "uri": fname + ".bai", + } }, }, } - self.tracksToAdd.append(trackDict) - self.trackIdlist.append(tId) + if self.usejson: + self.tracksToAdd.append(trackDict) + self.trackIdlist.append(tId) + else: + cmd = [ + "jbrowse", + "add-track", + fname, + "-t", + "AlignmentsTrack", + "-l", + "inPlace", + "-a", + self.genome_name, + "--indexFile", + fname + ".bai", + "-n", + trackData["name"], + "--target", + self.outdir, + ] + self.subprocess_check_call(cmd) def add_vcf(self, data, trackData): tId = trackData["label"] @@ -513,9 +642,13 @@ "assemblyNames": [self.genome_name], "adapter": { "type": "VcfTabixAdapter", - "vcfGzLocation": {"uri": url, "locationType": "UriLocation"}, + "vcfGzLocation": { + "uri": url, + }, "index": { - "location": {"uri": url + ".tbi", "locationType": "UriLocation"} + "location": { + "uri": url + ".tbi", + } }, }, "displays": [ @@ -533,8 +666,28 @@ }, ], } - self.tracksToAdd.append(trackDict) - self.trackIdlist.append(tId) + if self.usejson: + self.tracksToAdd.append(trackDict) + self.trackIdlist.append(tId) + else: + cmd = [ + "jbrowse", + "add-track", + url, + "-t", + "VariantTrack", + "-a", + self.genome_name, + "--indexFile", + url + ".tbi", + "-n", + trackData["name"], + "--load", + "inPlace", + "--target", + self.outdir, + ] + self.subprocess_check_call(cmd) def _sort_gff(self, data, dest): # Only index if not already done @@ -567,9 +720,13 @@ "assemblyNames": [self.genome_name], "adapter": { "type": "Gff3TabixAdapter", - "gffGzLocation": {"locationType": "UriLocation", "uri": url}, + "gffGzLocation": { + "uri": url, + }, "index": { - "location": {"uri": url + ".tbi", "locationType": "UriLocation"} + "location": { + "uri": url + ".tbi", + } }, }, "displays": [ @@ -580,8 +737,26 @@ {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId}, ], } - self.tracksToAdd.append(trackDict) - self.trackIdlist.append(tId) + if self.usejson: + self.tracksToAdd.append(trackDict) + self.trackIdlist.append(tId) + else: + cmd = [ + "jbrowse", + "add-track", + url, + "-t", + "FeatureTrack", + "-a", + self.genome_name, + "-n", + trackData["name"], + "--load", + "inPlace", + "--target", + self.outdir, + ] + self.subprocess_check_call(cmd) def add_bed(self, data, ext, trackData): url = "%s.%s" % (trackData["label"], ext) @@ -596,9 +771,13 @@ "assemblyNames": [self.genome_name], "adapter": { "type": "BedTabixAdapter", - "bedGzLocation": {"locationType": "UriLocation", "uri": url}, + "bedGzLocation": { + "uri": url, + }, "index": { - "location": {"uri": url + ".tbi", "locationType": "UriLocation"} + "location": { + "uri": url + ".tbi", + } }, }, "displays": [ @@ -609,8 +788,28 @@ {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId}, ], } - self.tracksToAdd.append(trackDict) - self.trackIdlist.append(tId) + if self.usejson: + self.tracksToAdd.append(trackDict) + self.trackIdlist.append(tId) + else: + cmd = [ + "jbrowse", + "add-track", + url, + "-t", + "FeatureTrack", + "-a", + self.genome_name, + "--indexFile", + url + ".tbi", + "-n", + trackData["name"], + "--load", + "inPlace", + "--target", + self.outdir, + ] + self.subprocess_check_call(cmd) def process_annotations(self, track): category = track["category"].replace("__pd__date__pd__", TODAY) @@ -713,7 +912,7 @@ else: log.warn("Do not know how to handle %s", dataset_ext) - def clone_jbrowse(self, jbrowse_dir, destination, minimal=False): + def clone_jbrowse(self, jbrowse_dir, destination): """Clone a JBrowse directory into a destination directory.""" cmd = ["jbrowse", "create", "-f", self.outdir] self.subprocess_check_call(cmd) @@ -842,5 +1041,6 @@ str(jc.config_json), ) jc.config_json["tracks"] = jc.tracksToAdd - jc.write_config() + if jc.usejson: + jc.write_config() jc.add_default_view()