Mercurial > repos > fubar > jbrowse2dev
view jbrowse2/jbrowse2_json.py @ 9:6a41f87b5d7f draft
Uploaded
author | fubar |
---|---|
date | Fri, 05 Jan 2024 05:06:21 +0000 |
parents | 88b9b105c09b |
children |
line wrap: on
line source
#!/usr/bin/env python # change to accumulating all configuration for config.json based on the default from the clone import argparse import datetime import hashlib import json import logging import os import shutil import subprocess import tempfile import xml.etree.ElementTree as ET from collections import defaultdict logging.basicConfig(level=logging.INFO) log = logging.getLogger("jbrowse") TODAY = datetime.datetime.now().strftime("%Y-%m-%d") GALAXY_INFRASTRUCTURE_URL = None mapped_chars = { ">": "__gt__", "<": "__lt__", "'": "__sq__", '"': "__dq__", "[": "__ob__", "]": "__cb__", "{": "__oc__", "}": "__cc__", "@": "__at__", "#": "__pd__", "": "__cn__", } def etree_to_dict(t): if t is None: return {} d = {t.tag: {} if t.attrib else None} children = list(t) if children: dd = defaultdict(list) for dc in map(etree_to_dict, children): for k, v in dc.items(): dd[k].append(v) d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}} if t.attrib: d[t.tag].update(("@" + k, v) for k, v in t.attrib.items()) if t.text: text = t.text.strip() if children or t.attrib: if text: d[t.tag]["#text"] = text else: d[t.tag] = text return d INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) def metadata_from_node(node): metadata = {} try: if len(node.findall("dataset")) != 1: # exit early return metadata except Exception: return {} for (key, value) in node.findall("dataset")[0].attrib.items(): metadata["dataset_%s" % key] = value for (key, value) in node.findall("history")[0].attrib.items(): metadata["history_%s" % key] = value for (key, value) in node.findall("metadata")[0].attrib.items(): metadata["metadata_%s" % key] = value for (key, value) in node.findall("tool")[0].attrib.items(): metadata["tool_%s" % key] = value # Additional Mappings applied: metadata[ "dataset_edam_format" ] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format( metadata["dataset_edam_format"], metadata["dataset_file_ext"] ) metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format( metadata["history_user_email"] ) metadata["hist_name"] = metadata["history_display_name"] metadata[ "history_display_name" ] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format( galaxy=GALAXY_INFRASTRUCTURE_URL, encoded_hist_id=metadata["history_id"], hist_name=metadata["history_display_name"], ) metadata[ "tool_tool" ] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format( galaxy=GALAXY_INFRASTRUCTURE_URL, encoded_id=metadata["dataset_id"], tool_id=metadata["tool_tool_id"], # tool_version=metadata['tool_tool_version'], ) return metadata class JbrowseConnector(object): def __init__(self, jbrowse, outdir, genomes, standalone=None): self.debug = False self.giURL = GALAXY_INFRASTRUCTURE_URL self.jbrowse = jbrowse self.outdir = outdir os.makedirs(self.outdir, exist_ok=True) self.genome_paths = genomes self.standalone = standalone self.trackIdlist = [] self.tracksToAdd = [] self.config_json = {} self.config_json_file = os.path.realpath(os.path.join(outdir, "config.json")) if standalone == "complete": self.clone_jbrowse(self.jbrowse, self.outdir) elif standalone == "minimal": self.clone_jbrowse(self.jbrowse, self.outdir, minimal=True) def subprocess_check_call(self, command, output=None): if output: if self.debug: log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) subprocess.check_call(command, cwd=self.outdir, stdout=output) else: log.debug("cd %s && %s", self.outdir, " ".join(command)) subprocess.check_call(command, cwd=self.outdir) def subprocess_popen(self, command): if self.debug: log.debug("cd %s && %s", self.outdir, command) p = subprocess.Popen( command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) output, err = p.communicate() retcode = p.returncode if retcode != 0: log.error("cd %s && %s", self.outdir, command) log.error(output) log.error(err) raise RuntimeError("Command failed with exit code %s" % (retcode)) def subprocess_check_output(self, command): if self.debug: log.debug("cd %s && %s", self.outdir, " ".join(command)) return subprocess.check_output(command, cwd=self.outdir) def _jbrowse_bin(self, command): return os.path.realpath(os.path.join(self.jbrowse, "bin", command)) def symlink_or_copy(self, src, dest): if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( os.environ["GALAXY_JBROWSE_SYMLINKS"] ): cmd = ["ln", "-s", src, dest] else: cmd = ["cp", src, dest] return self.subprocess_check_call(cmd) def _add_track(self, track_data): if len(track_data) == 0: return cmd = [ "jbrowse", "add-track", track_data["path"], "-t", track_data["type"], "-n", track_data["name"], "-l", "move", "--trackId", track_data["label"], "--target", self.outdir, ] if track_data.get("indexfile"): cmd += ["--indexFile", track_data["indexfile"]] if track_data.get("category"): for c in track_data["category"]: cmd += ["--category", c] def process_genomes(self): assemblies = [] for i, genome_node in enumerate(self.genome_paths): log.info("genome_node=%s" % str(genome_node)) # We only expect one input genome per run. This for loop is just # easier to write than the alternative / catches any possible # issues. genome_name = genome_node["meta"]["dataset_dname"] dsId = genome_node["meta"]["dataset_id"] fapath = genome_node["path"] faname = genome_name + ".fa.gz" faind = os.path.realpath(os.path.join(self.outdir, faname + ".gzi")) if True or self.standalone == "complete": fadest = os.path.realpath(os.path.join(self.outdir, faname)) cmd = "bgzip -i -c %s > %s && samtools faidx %s" % ( fapath, fadest, fadest, ) self.subprocess_popen(cmd) adapter = { "type": "BgzipFastaAdapter", "fastaLocation": { "uri": faname, }, "faiLocation": { "uri": faname + ".fai", }, "gziLocation": { "uri": faname + ".gzi", }, } else: faurl = "%s/api/datasets/%s/display" % (self.giURL, dsId) fastalocation = { "uri": faurl, } failocation = { "uri": faname + ".fai", } adapter = { "type": "IndexedFastaAdapter", "fastaLocation": fastalocation, "faiLocation": failocation, } cmd = ["samtools", "faidx", fapath, "--fai-idx", faind] self.subprocess_check_call(cmd) trackDict = { "name": genome_name, "sequence": { "type": "ReferenceSequenceTrack", "trackId": genome_name, "adapter": adapter, }, "rendering": {"type": "DivSequenceRenderer"}, } assemblies.append(trackDict) # self.config_json["assemblies"] = assemblies self.genome_name = genome_name cmd = [ "jbrowse", "add-assembly", faname, "-t", "bgzipFasta", "-n", genome_name, "--load", "inPlace", "--faiLocation", faname + ".fai", "--gziLocation", faname + ".gzi", "--target", self.outdir, ] self.subprocess_check_call(cmd) def add_default_view(self): cmd = [ "jbrowse", "set-default-session", "-s", self.config_json_file, "-t", ",".join(self.trackIdlist), "-n", "JBrowse2 in Galaxy", "--target", self.config_json_file, "-v", " LinearGenomeView", ] if True or self.debug: log.info("### calling set-default-session with cmd=%s" % " ".join(cmd)) self.subprocess_check_call(cmd) def write_config(self): with open(self.config_json_file, "w") as fp: json.dump(self.config_json, fp) def add_hic(self, data, trackData): """ HiC adapter. https://github.com/aidenlab/hic-format/blob/master/HiCFormatV9.md for testing locally, these work: HiC data is from https://s3.amazonaws.com/igv.broadinstitute.org/data/hic/intra_nofrag_30.hic using hg19 reference track as a 'BgzipFastaAdapter' fastaLocation: uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz', faiLocation: uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai', gziLocation: uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi', Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438 """ log.info("#### trackData=%s" % trackData) tId = trackData["label"] dsId = trackData["metadata"]["dataset_id"] url = "%s/api/datasets/%s/display?to_ext=hic " % ( self.giURL, dsId, ) hname = trackData["name"] if True or self.standalone == "complete": dest = os.path.realpath(os.path.join(self.outdir, hname)) url = hname cmd = ["cp", data, dest] self.subprocess_check_call(cmd) floc = { "uri": hname, } else: url = "%s/api/datasets/%s/display?to_ext=hic" % (self.giURL, dsId) floc = { "uri": url, } trackDict = { "type": "HicTrack", "trackId": tId, "name": hname, "assemblyNames": [self.genome_name], "adapter": { "type": "HicAdapter", "hicLocation": floc, }, } # self.tracksToAdd.append(trackDict) # self.trackIdlist.append(tId) cmd = [ "jbrowse", "add-track", url, "-t", "HicTrack", "-a", self.genome_name, "-n", hname, "--load", "inPlace", "--target", self.outdir, ] self.subprocess_check_call(cmd) def add_maf(self, data, trackData): """ from https://github.com/cmdcolin/maf2bed Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name e.g. hg38.chr1 in the sequence identifiers. need the reference id - eg hg18, for maf2bed.pl as the first parameter """ mafPlugin = { "plugins": [ { "name": "MafViewer", "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", } ] } tId = trackData["label"] fname = "%s.bed" % tId dest = os.path.realpath("%s/%s" % (self.outdir, fname)) # self.symlink_or_copy(data, dest) # Process MAF to bed-like. Need build to munge chromosomes gname = self.genome_name cmd = [ "bash", os.path.join(INSTALLED_TO, "convertMAF.sh"), data, gname, INSTALLED_TO, dest, ] self.subprocess_check_call(cmd) if True or self.debug: log.info("### convertMAF.sh called as %s" % " ".join(cmd)) # Construct samples list # We could get this from galaxy metadata, not sure how easily. ps = subprocess.Popen(["grep", "^s [^ ]*", "-o", data], stdout=subprocess.PIPE) output = subprocess.check_output(("sort", "-u"), stdin=ps.stdout) ps.wait() outp = output.decode("ascii") soutp = outp.split("\n") samp = [x.split("s ")[1] for x in soutp if x.startswith("s ")] samples = [x.split(".")[0] for x in samp] if self.debug: log.info("### got samples = %s " % (samples)) trackDict = { "type": "MafTrack", "trackId": tId, "name": trackData["name"], "adapter": { "type": "MafTabixAdapter", "samples": samples, "bedGzLocation": { "uri": fname + ".sorted.bed.gz", }, "index": { "location": { "uri": fname + ".sorted.bed.gz.tbi", }, }, }, "assemblyNames": [self.genome_name], } self.tracksToAdd.append(trackDict) self.trackIdlist.append(tId) if self.config_json.get("plugins", None): self.config_json["plugins"].append(mafPlugin[0]) else: self.config_json.update(mafPlugin) def _blastxml_to_gff3(self, xml, min_gap=10): gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) cmd = [ "python", os.path.join(INSTALLED_TO, "blastxml_to_gapped_gff3.py"), "--trim", "--trim_end", "--include_seq", "--min_gap", str(min_gap), xml, ] subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) gff3_unrebased.close() return gff3_unrebased.name def add_blastxml(self, data, trackData, blastOpts, **kwargs): gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"]) if "parent" in blastOpts and blastOpts["parent"] != "None": gff3_rebased = tempfile.NamedTemporaryFile(delete=False) cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")] if blastOpts.get("protein", "false") == "true": cmd.append("--protein2dna") cmd.extend([os.path.realpath(blastOpts["parent"]), gff3]) subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) gff3_rebased.close() # Replace original gff3 file shutil.copy(gff3_rebased.name, gff3) os.unlink(gff3_rebased.name) url = "%s.gff3" % trackData["label"] dest = os.path.realpath("%s/%s" % (self.outdir, url)) self._sort_gff(gff3, dest) url = url + ".gz" tId = trackData["label"] trackDict = { "type": "FeatureTrack", "trackId": tId, "name": trackData["name"], "assemblyNames": [self.genome_name], "adapter": { "type": "Gff3TabixAdapter", "gffGzLocation": { "uri": url, }, "index": { "location": { "uri": url + ".tbi", } }, }, "displays": [ { "type": "LinearBasicDisplay", "displayId": "%s-LinearBasicDisplay" % tId, }, {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId}, ], } # self.tracksToAdd.append(trackDict) # self.trackIdlist.append(tId) cmd = [ "jbrowse", "add-track", url, "-t", "FeatureTrack", "-a", self.genome_name, "--indexFile", url + ".tbi", "-n", trackData["name"], "--load", "inPlace", "--target", self.outdir, ] self.subprocess_check_call(cmd) os.unlink(gff3) def add_bigwig(self, data, trackData): url = "%s.bw" % trackData["name"] if True or self.standalone == "complete": dest = os.path.realpath(os.path.join(self.outdir, url)) cmd = ["cp", data, dest] self.subprocess_check_call(cmd) bwloc = {"uri": url} else: dsId = trackData["metadata"]["dataset_id"] url = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId) bwloc = {"uri": url} tId = trackData["label"] trackDict = { "type": "QuantitativeTrack", "trackId": tId, "name": url, "assemblyNames": [ self.genome_name, ], "adapter": { "type": "BigWigAdapter", "bigWigLocation": bwloc, }, "displays": [ { "type": "LinearWiggleDisplay", "displayId": "%s-LinearWiggleDisplay" % tId, } ], } # self.tracksToAdd.append(trackDict) # self.trackIdlist.append(tId) cmd = [ "jbrowse", "add-track", url, "-t", "QuantitativeTrack", "-a", self.genome_name, "-n", trackData["name"], "--load", "inPlace", "--target", self.outdir, ] self.subprocess_check_call(cmd) def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): tId = trackData["label"] fname = "%s.bam" % trackData["label"] dest = os.path.realpath("%s/%s" % (self.outdir, fname)) if True or self.standalone == "complete": url = fname self.subprocess_check_call(["cp", data, dest]) log.info("### copied %s to %s" % (data, dest)) bloc = {"uri": url} else: dsId = trackData["metadata"]["dataset_id"] url = "%s/api/datasets/%s/display?to_ext=bam" % (self.giURL, dsId) bloc = {"uri": url} if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest self.subprocess_check_call( ["cp", os.path.realpath(bam_index), dest + ".bai"] ) else: # Can happen in exotic condition # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam # => no index generated by galaxy, but there might be one next to the symlink target # this trick allows to skip the bam sorting made by galaxy if already done outside if os.path.exists(os.path.realpath(data) + ".bai"): self.symlink_or_copy(os.path.realpath(data) + ".bai", dest + ".bai") else: log.warn("Could not find a bam index (.bai file) for %s", data) trackDict = { "type": "AlignmentsTrack", "trackId": tId, "name": trackData["name"], "assemblyNames": [self.genome_name], "adapter": { "type": "BamAdapter", "bamLocation": bloc, "index": { "location": { "uri": fname + ".bai", } }, }, } # self.tracksToAdd.append(trackDict) # self.trackIdlist.append(tId) cmd = [ "jbrowse", "add-track", fname, "-t", "AlignmentsTrack", "-l", "inPlace", "-a", self.genome_name, "--indexFile", fname + ".bai", "-n", trackData["name"], "--target", self.outdir, ] self.subprocess_check_call(cmd) def add_vcf(self, data, trackData): tId = trackData["label"] url = "%s/api/datasets/%s/display" % ( self.giURL, trackData["metadata"]["dataset_id"], ) url = "%s.vcf.gz" % tId dest = os.path.realpath("%s/%s" % (self.outdir, url)) cmd = "bgzip -c %s > %s" % (data, dest) self.subprocess_popen(cmd) cmd = ["tabix", "-p", "vcf", dest] self.subprocess_check_call(cmd) trackDict = { "type": "VariantTrack", "trackId": tId, "name": trackData["name"], "assemblyNames": [self.genome_name], "adapter": { "type": "VcfTabixAdapter", "vcfGzLocation": { "uri": url, }, "index": { "location": { "uri": url + ".tbi", } }, }, "displays": [ { "type": "LinearVariantDisplay", "displayId": "%s-LinearVariantDisplay" % tId, }, { "type": "ChordVariantDisplay", "displayId": "%s-ChordVariantDisplay" % tId, }, { "type": "LinearPairedArcDisplay", "displayId": "%s-LinearPairedArcDisplay" % tId, }, ], } # self.tracksToAdd.append(trackDict) # self.trackIdlist.append(tId) cmd = [ "jbrowse", "add-track", url, "-t", "VariantTrack", "-a", self.genome_name, "--indexFile", url + ".tbi", "-n", trackData["name"], "--load", "inPlace", "--target", self.outdir, ] self.subprocess_check_call(cmd) def _sort_gff(self, data, dest): # Only index if not already done if not os.path.exists(dest + ".gz"): cmd = "jbrowse sort-gff %s | bgzip -c > %s.gz" % ( data, dest, ) # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" self.subprocess_popen(cmd) self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"]) def _sort_bed(self, data, dest): # Only index if not already done if not os.path.exists(dest): cmd = "sort -k1,1 -k2,2n %s | bgzip -c > %s" % (data, dest) self.subprocess_popen(cmd) cmd = ["tabix", "-f", "-p", "bed", dest] self.subprocess_check_call(cmd) def add_gff(self, data, ext, trackData): url = "%s.%s" % (trackData["label"], ext) dest = os.path.realpath("%s/%s" % (self.outdir, url)) self._sort_gff(data, dest) url = url + ".gz" tId = trackData["label"] trackDict = { "type": "FeatureTrack", "trackId": tId, "name": trackData["name"], "assemblyNames": [self.genome_name], "adapter": { "type": "Gff3TabixAdapter", "gffGzLocation": { "uri": url, }, "index": { "location": { "uri": url + ".tbi", } }, }, "displays": [ { "type": "LinearBasicDisplay", "displayId": "%s-LinearBasicDisplay" % tId, }, {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId}, ], } # self.tracksToAdd.append(trackDict) # self.trackIdlist.append(tId) cmd = [ "jbrowse", "add-track", url, "-t", "FeatureTrack", "-a", self.genome_name, "-n", trackData["name"], "--load", "inPlace", "--target", self.outdir, ] self.subprocess_check_call(cmd) def add_bed(self, data, ext, trackData): url = "%s.%s" % (trackData["label"], ext) dest = os.path.realpath("%s/%s.gz" % (self.outdir, url)) self._sort_bed(data, dest) tId = trackData["label"] url = url + ".gz" trackDict = { "type": "FeatureTrack", "trackId": tId, "name": trackData["name"], "assemblyNames": [self.genome_name], "adapter": { "type": "BedTabixAdapter", "bedGzLocation": { "uri": url, }, "index": { "location": { "uri": url + ".tbi", } }, }, "displays": [ { "type": "LinearBasicDisplay", "displayId": "%s-LinearBasicDisplay" % tId, }, {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId}, ], } # self.tracksToAdd.append(trackDict) # self.trackIdlist.append(tId) cmd = [ "jbrowse", "add-track", url, "-t", "FeatureTrack", "-a", self.genome_name, "--indexFile", url + ".tbi", "-n", trackData["name"], "--load", "inPlace", "--target", self.outdir, ] self.subprocess_check_call(cmd) def process_annotations(self, track): category = track["category"].replace("__pd__date__pd__", TODAY) for i, ( dataset_path, dataset_ext, track_human_label, extra_metadata, ) in enumerate(track["trackfiles"]): # Unsanitize labels (element_identifiers are always sanitized by Galaxy) for key, value in mapped_chars.items(): track_human_label = track_human_label.replace(value, key) outputTrackConfig = { "category": category, } if self.debug: log.info( "Processing category = %s, track_human_label = %s", category, track_human_label, ) # We add extra data to hash for the case of REST + SPARQL. if ( "conf" in track and "options" in track["conf"] and "url" in track["conf"]["options"] ): rest_url = track["conf"]["options"]["url"] else: rest_url = "" # I chose to use track['category'] instead of 'category' here. This # is intentional. This way re-running the tool on a different date # will not generate different hashes and make comparison of outputs # much simpler. hashData = [ str(dataset_path), track_human_label, track["category"], rest_url, ] hashData = "|".join(hashData).encode("utf-8") outputTrackConfig["label"] = hashlib.md5(hashData).hexdigest() + "_%s" % i outputTrackConfig["metadata"] = extra_metadata outputTrackConfig["name"] = track_human_label if dataset_ext in ("gff", "gff3"): self.add_gff( dataset_path, dataset_ext, outputTrackConfig, ) elif dataset_ext in ("hic",): self.add_hic( dataset_path, outputTrackConfig, ) elif dataset_ext in ("bed",): self.add_bed( dataset_path, dataset_ext, outputTrackConfig, ) elif dataset_ext in ("maf",): self.add_maf( dataset_path, outputTrackConfig, ) elif dataset_ext == "bigwig": self.add_bigwig( dataset_path, outputTrackConfig, ) elif dataset_ext == "bam": real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][ "bam_index" ] if not isinstance(real_indexes, list): # <bam_indices> # <bam_index>/path/to/a.bam.bai</bam_index> # </bam_indices> # # The above will result in the 'bam_index' key containing a # string. If there are two or more indices, the container # becomes a list. Fun! real_indexes = [real_indexes] self.add_bam( dataset_path, outputTrackConfig, track["conf"]["options"]["pileup"], bam_index=real_indexes[i], ) elif dataset_ext == "blastxml": self.add_blastxml( dataset_path, outputTrackConfig, track["conf"]["options"]["blast"] ) elif dataset_ext == "vcf": self.add_vcf(dataset_path, outputTrackConfig) else: log.warn("Do not know how to handle %s", dataset_ext) def clone_jbrowse(self, jbrowse_dir, destination, minimal=False): """Clone a JBrowse directory into a destination directory.""" cmd = ["jbrowse", "create", "-f", self.outdir] self.subprocess_check_call(cmd) for fn in [ "asset-manifest.json", "favicon.ico", "robots.txt", "umd_plugin.js", "version.txt", "test_data", ]: cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] self.subprocess_check_call(cmd) if __name__ == "__main__": parser = argparse.ArgumentParser(description="", epilog="") parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration") parser.add_argument("--jbrowse", help="Folder containing a jbrowse release") parser.add_argument("--outdir", help="Output directory", default="out") parser.add_argument( "--standalone", choices=["complete", "minimal", "data"], help="Standalone mode includes a copy of JBrowse", ) parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0") args = parser.parse_args() tree = ET.parse(args.xml.name) root = tree.getroot() # This should be done ASAP GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text # Sometimes this comes as `localhost` without a protocol if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): # so we'll prepend `http://` and hope for the best. Requests *should* # be GET and not POST so it should redirect OK GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL jc = JbrowseConnector( jbrowse=args.jbrowse, outdir=args.outdir, genomes=[ { "path": os.path.realpath(x.attrib["path"]), "meta": metadata_from_node(x.find("metadata")), } for x in root.findall("metadata/genomes/genome") ], standalone=args.standalone, ) jc.process_genomes() for track in root.findall("tracks/track"): track_conf = {} track_conf["trackfiles"] = [] is_multi_bigwig = False try: if track.find("options/wiggle/multibigwig") and ( track.find("options/wiggle/multibigwig").text == "True" ): is_multi_bigwig = True multi_bigwig_paths = [] except KeyError: pass trackfiles = track.findall("files/trackFile") if trackfiles: for x in track.findall("files/trackFile"): if is_multi_bigwig: multi_bigwig_paths.append( (x.attrib["label"], os.path.realpath(x.attrib["path"])) ) else: if trackfiles: metadata = metadata_from_node(x.find("metadata")) track_conf["dataset_id"] = metadata["dataset_id"] track_conf["trackfiles"].append( ( os.path.realpath(x.attrib["path"]), x.attrib["ext"], x.attrib["label"], metadata, ) ) else: # For tracks without files (rest, sparql) track_conf["trackfiles"].append( ( "", # N/A, no path for rest or sparql track.attrib["format"], track.find("options/label").text, {}, ) ) if is_multi_bigwig: metadata = metadata_from_node(x.find("metadata")) track_conf["trackfiles"].append( ( multi_bigwig_paths, # Passing an array of paths to represent as one track "bigwig_multiple", "MultiBigWig", # Giving an hardcoded name for now {}, # No metadata for multiple bigwig ) ) track_conf["category"] = track.attrib["cat"] track_conf["format"] = track.attrib["format"] try: # Only pertains to gff3 + blastxml. TODO? track_conf["style"] = {t.tag: t.text for t in track.find("options/style")} except TypeError: track_conf["style"] = {} pass track_conf["conf"] = etree_to_dict(track.find("options")) jc.process_annotations(track_conf) print("## processed", str(track_conf), "trackIdlist", jc.trackIdlist) print( "###done processing, trackIdlist=", jc.trackIdlist, "config=", str(jc.config_json), ) jc.config_json["tracks"] = jc.tracksToAdd # jc.write_config() jc.add_default_view()