jbrowse2dev: jbrowse2/jbrowse2_jbrowsenotjson.py comparison

comparison jbrowse2/jbrowse2_jbrowsenotjson.py @ 6:88b9b105c09b draft

Uploaded

author	fubar
date	Fri, 05 Jan 2024 01:58:02 +0000
parents
children

comparison

equal deleted inserted replaced

-:42ca8804cd93
+:88b9b105c09b
+#!/usr/bin/env python
+# change to accumulating all configuration for config.json based on the default from the clone
+import argparse
+import datetime
+import hashlib
+import json
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger("jbrowse")
+TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
+GALAXY_INFRASTRUCTURE_URL = None
+mapped_chars = {
+">": "__gt__",
+"<": "__lt__",
+"'": "__sq__",
+'"': "__dq__",
+"[": "__ob__",
+"]": "__cb__",
+"{": "__oc__",
+"}": "__cc__",
+"@": "__at__",
+"#": "__pd__",
+"": "__cn__",
+}
+def etree_to_dict(t):
+if t is None:
+return {}
+d = {t.tag: {} if t.attrib else None}
+children = list(t)
+if children:
+dd = defaultdict(list)
+for dc in map(etree_to_dict, children):
+for k, v in dc.items():
+dd[k].append(v)
+d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
+if t.attrib:
+d[t.tag].update(("@" + k, v) for k, v in t.attrib.items())
+if t.text:
+text = t.text.strip()
+if children or t.attrib:
+if text:
+d[t.tag]["#text"] = text
+else:
+d[t.tag] = text
+return d
+INSTALLED_TO = os.path.dirname(os.path.realpath(__file__))
+def metadata_from_node(node):
+metadata = {}
+try:
+if len(node.findall("dataset")) != 1:
+# exit early
+return metadata
+except Exception:
+return {}
+for (key, value) in node.findall("dataset")[0].attrib.items():
+metadata["dataset_%s" % key] = value
+for (key, value) in node.findall("history")[0].attrib.items():
+metadata["history_%s" % key] = value
+for (key, value) in node.findall("metadata")[0].attrib.items():
+metadata["metadata_%s" % key] = value
+for (key, value) in node.findall("tool")[0].attrib.items():
+metadata["tool_%s" % key] = value
+# Additional Mappings applied:
+metadata[
+"dataset_edam_format"
+] = '<a target="_blank" href="http://edamontology.org/{0}">{1}</a>'.format(
+metadata["dataset_edam_format"], metadata["dataset_file_ext"]
+)
+metadata["history_user_email"] = '<a href="mailto:{0}">{0}</a>'.format(
+metadata["history_user_email"]
+)
+metadata["hist_name"] = metadata["history_display_name"]
+metadata[
+"history_display_name"
+] = '<a target="_blank" href="{galaxy}/history/view/{encoded_hist_id}">{hist_name}</a>'.format(
+galaxy=GALAXY_INFRASTRUCTURE_URL,
+encoded_hist_id=metadata["history_id"],
+hist_name=metadata["history_display_name"],
+)
+metadata[
+"tool_tool"
+] = '<a target="_blank" href="{galaxy}/datasets/{encoded_id}/show_params">{tool_id}</a>'.format(
+galaxy=GALAXY_INFRASTRUCTURE_URL,
+encoded_id=metadata["dataset_id"],
+tool_id=metadata["tool_tool_id"],
+# tool_version=metadata['tool_tool_version'],
+)
+return metadata
+class JbrowseConnector(object):
+def __init__(self, jbrowse, outdir, genomes, standalone=None):
+self.debug = False
+self.usejson = True
+self.giURL = GALAXY_INFRASTRUCTURE_URL
+self.jbrowse = jbrowse
+self.outdir = outdir
+os.makedirs(self.outdir, exist_ok=True)
+self.genome_paths = genomes
+self.standalone = standalone
+self.trackIdlist = []
+self.tracksToAdd = []
+self.config_json = {}
+self.config_json_file = os.path.realpath(os.path.join(outdir, "config.json"))
+if standalone == "complete":
+self.clone_jbrowse(self.jbrowse, self.outdir)
+elif standalone == "minimal":
+self.clone_jbrowse(self.jbrowse, self.outdir, minimal=True)
+def subprocess_check_call(self, command, output=None):
+if output:
+if self.debug:
+log.debug("cd %s && %s >  %s", self.outdir, " ".join(command), output)
+subprocess.check_call(command, cwd=self.outdir, stdout=output)
+else:
+log.debug("cd %s && %s", self.outdir, " ".join(command))
+subprocess.check_call(command, cwd=self.outdir)
+def subprocess_popen(self, command):
+if self.debug:
+log.debug("cd %s && %s", self.outdir, command)
+p = subprocess.Popen(
+command,
+shell=True,
+stdin=subprocess.PIPE,
+stdout=subprocess.PIPE,
+stderr=subprocess.PIPE,
+)
+output, err = p.communicate()
+retcode = p.returncode
+if retcode != 0:
+log.error("cd %s && %s", self.outdir, command)
+log.error(output)
+log.error(err)
+raise RuntimeError("Command failed with exit code %s" % (retcode))
+def subprocess_check_output(self, command):
+if self.debug:
+log.debug("cd %s && %s", self.outdir, " ".join(command))
+return subprocess.check_output(command, cwd=self.outdir)
+def _jbrowse_bin(self, command):
+return os.path.realpath(os.path.join(self.jbrowse, "bin", command))
+def symlink_or_copy(self, src, dest):
+if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool(
+os.environ["GALAXY_JBROWSE_SYMLINKS"]
+):
+cmd = ["ln", "-s", src, dest]
+else:
+cmd = ["cp", src, dest]
+return self.subprocess_check_call(cmd)
+def process_genomes(self):
+assemblies = []
+for i, genome_node in enumerate(self.genome_paths):
+log.info("genome_node=%s" % str(genome_node))
+# We only expect one input genome per run. This for loop is just
+# easier to write than the alternative / catches any possible
+# issues.
+genome_name = genome_node["meta"]["dataset_dname"]
+dsId = genome_node["meta"]["dataset_id"]
+fapath = genome_node["path"]
+faname = genome_name + ".fa.gz"
+faind = os.path.realpath(os.path.join(self.outdir, faname + ".gzi"))
+if self.standalone == "complete":
+fadest = os.path.realpath(os.path.join(self.outdir, faname))
+cmd = "bgzip -i -c %s > %s && samtools faidx %s" % (
+fapath,
+fadest,
+fadest,
+)
+self.subprocess_popen(cmd)
+adapter = {
+"type": "BgzipFastaAdapter",
+"fastaLocation": {
+"uri": faname,
+},
+"faiLocation": {
+"uri": faname + ".fai",
+},
+"gziLocation": {
+"uri": faname + ".gzi",
+},
+}
+else:
+faurl = "%s/api/datasets/%s/display" % (self.giURL, dsId)
+fastalocation = {
+"uri": faurl,
+}
+failocation = {
+"uri": faname + ".fai",
+}
+adapter = {
+"type": "IndexedFastaAdapter",
+"fastaLocation": fastalocation,
+"faiLocation": failocation,
+}
+cmd = ["samtools", "faidx", fapath, "--fai-idx", faind]
+self.subprocess_check_call(cmd)
+trackDict = {
+"name": genome_name,
+"sequence": {
+"type": "ReferenceSequenceTrack",
+"trackId": genome_name,
+"adapter": adapter,
+},
+"rendering": {"type": "DivSequenceRenderer"},
+}
+assemblies.append(trackDict)
+self.genome_name = genome_name
+if self.usejson:
+self.config_json["assemblies"] = assemblies
+else:
+cmd = [
+"jbrowse",
+"add-assembly",
+faname,
+"-t",
+"bgzipFasta",
+"-n",
+genome_name,
+"--load",
+"inPlace",
+"--faiLocation",
+faname + ".fai",
+"--gziLocation",
+faname + ".gzi",
+"--target",
+self.outdir,
+]
+self.subprocess_check_call(cmd)
+def add_default_view(self):
+cmd = [
+"jbrowse",
+"set-default-session",
+"-s",
+self.config_json_file,
+"-t",
+",".join(self.trackIdlist),
+"-n",
+"JBrowse2 in Galaxy",
+"--target",
+self.config_json_file,
+"-v",
+" LinearGenomeView",
+]
+if True or self.debug:
+log.info("### calling set-default-session with cmd=%s" % "  ".join(cmd))
+self.subprocess_check_call(cmd)
+def write_config(self):
+with open(self.config_json_file, "w") as fp:
+json.dump(self.config_json, fp)
+def add_hic(self, data, trackData):
+"""
+HiC adapter.
+https://github.com/aidenlab/hic-format/blob/master/HiCFormatV9.md
+for testing locally, these work:
+HiC data is from https://s3.amazonaws.com/igv.broadinstitute.org/data/hic/intra_nofrag_30.hic
+using hg19 reference track as a
+'BgzipFastaAdapter'
+fastaLocation:
+uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz',
+faiLocation:
+uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.fai',
+gziLocation:
+uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi',
+Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438
+"""
+log.info("#### trackData=%s" % trackData)
+tId = trackData["label"]
+dsId = trackData["metadata"]["dataset_id"]
+url = "%s/api/datasets/%s/display?to_ext=hic " % (
+self.giURL,
+dsId,
+)
+hname = trackData["name"]
+if self.standalone == "complete":
+dest = os.path.realpath(os.path.join(self.outdir, hname))
+url = hname
+cmd = ["cp", data, dest]
+self.subprocess_check_call(cmd)
+floc = {
+"uri": hname,
+}
+else:
+url = "%s/api/datasets/%s/display?to_ext=hic" % (self.giURL, dsId)
+floc = {
+"uri": url,
+}
+trackDict = {
+"type": "HicTrack",
+"trackId": tId,
+"name": hname,
+"assemblyNames": [self.genome_name],
+"adapter": {
+"type": "HicAdapter",
+"hicLocation": floc,
+},
+}
+if self.usejson:
+self.tracksToAdd.append(trackDict)
+self.trackIdlist.append(tId)
+else:
+cmd = [
+"jbrowse",
+"add-track",
+url,
+"-t",
+"HicTrack",
+"-a",
+self.genome_name,
+"-n",
+hname,
+"--load",
+"inPlace",
+"--target",
+self.outdir,
+]
+self.subprocess_check_call(cmd)
+def add_maf(self, data, trackData):
+"""
+from https://github.com/cmdcolin/maf2bed
+Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name
+e.g. hg38.chr1 in the sequence identifiers.
+need the reference id - eg hg18, for maf2bed.pl as the first parameter
+"""
+mafPlugin = {
+"plugins": [
+{
+"name": "MafViewer",
+"url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js",
+}
+]
+}
+tId = trackData["label"]
+fname = "%s.bed" % tId
+dest = os.path.realpath("%s/%s" % (self.outdir, fname))
+# self.symlink_or_copy(data, dest)
+# Process MAF to bed-like. Need build to munge chromosomes
+gname = self.genome_name
+cmd = [
+"bash",
+os.path.join(INSTALLED_TO, "convertMAF.sh"),
+data,
+gname,
+INSTALLED_TO,
+dest,
+]
+self.subprocess_check_call(cmd)
+if True or self.debug:
+log.info("### convertMAF.sh called as %s" % " ".join(cmd))
+# Construct samples list
+# We could get this from galaxy metadata, not sure how easily.
+ps = subprocess.Popen(["grep", "^s [^ ]*", "-o", data], stdout=subprocess.PIPE)
+output = subprocess.check_output(("sort", "-u"), stdin=ps.stdout)
+ps.wait()
+outp = output.decode("ascii")
+soutp = outp.split("\n")
+samp = [x.split("s ")[1] for x in soutp if x.startswith("s ")]
+samples = [x.split(".")[0] for x in samp]
+if self.debug:
+log.info("### got samples = %s " % (samples))
+trackDict = {
+"type": "MafTrack",
+"trackId": tId,
+"name": trackData["name"],
+"adapter": {
+"type": "MafTabixAdapter",
+"samples": samples,
+"bedGzLocation": {
+"uri": fname + ".sorted.bed.gz",
+},
+"index": {
+"location": {
+"uri": fname + ".sorted.bed.gz.tbi",
+},
+},
+},
+"assemblyNames": [self.genome_name],
+}
+self.tracksToAdd.append(trackDict)
+self.trackIdlist.append(tId)
+if self.config_json.get("plugins", None):
+self.config_json["plugins"].append(mafPlugin[0])
+else:
+self.config_json.update(mafPlugin)
+def _blastxml_to_gff3(self, xml, min_gap=10):
+gff3_unrebased = tempfile.NamedTemporaryFile(delete=False)
+cmd = [
+"python",
+os.path.join(INSTALLED_TO, "blastxml_to_gapped_gff3.py"),
+"--trim",
+"--trim_end",
+"--include_seq",
+"--min_gap",
+str(min_gap),
+xml,
+]
+subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased)
+gff3_unrebased.close()
+return gff3_unrebased.name
+def add_blastxml(self, data, trackData, blastOpts, **kwargs):
+gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts["min_gap"])
+if "parent" in blastOpts and blastOpts["parent"] != "None":
+gff3_rebased = tempfile.NamedTemporaryFile(delete=False)
+cmd = ["python", os.path.join(INSTALLED_TO, "gff3_rebase.py")]
+if blastOpts.get("protein", "false") == "true":
+cmd.append("--protein2dna")
+cmd.extend([os.path.realpath(blastOpts["parent"]), gff3])
+subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased)
+gff3_rebased.close()
+# Replace original gff3 file
+shutil.copy(gff3_rebased.name, gff3)
+os.unlink(gff3_rebased.name)
+url = "%s.gff3" % trackData["label"]
+dest = os.path.realpath("%s/%s" % (self.outdir, url))
+self._sort_gff(gff3, dest)
+url = url + ".gz"
+tId = trackData["label"]
+trackDict = {
+"type": "FeatureTrack",
+"trackId": tId,
+"name": trackData["name"],
+"assemblyNames": [self.genome_name],
+"adapter": {
+"type": "Gff3TabixAdapter",
+"gffGzLocation": {
+"uri": url,
+},
+"index": {
+"location": {
+"uri": url + ".tbi",
+}
+},
+},
+"displays": [
+{
+"type": "LinearBasicDisplay",
+"displayId": "%s-LinearBasicDisplay" % tId,
+},
+{"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},
+],
+}
+if self.usejson:
+self.tracksToAdd.append(trackDict)
+self.trackIdlist.append(tId)
+else:
+cmd = [
+"jbrowse",
+"add-track",
+url,
+"-t",
+"FeatureTrack",
+"-a",
+self.genome_name,
+"--indexFile",
+url + ".tbi",
+"-n",
+trackData["name"],
+"--load",
+"inPlace",
+"--target",
+self.outdir,
+]
+self.subprocess_check_call(cmd)
+os.unlink(gff3)
+def add_bigwig(self, data, trackData):
+url = "%s.bw" % trackData["name"]
+if self.standalone == "complete":
+dest = os.path.realpath(os.path.join(self.outdir, url))
+cmd = ["cp", data, dest]
+self.subprocess_check_call(cmd)
+bwloc = {"uri": url}
+else:
+dsId = trackData["metadata"]["dataset_id"]
+url = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId)
+bwloc = {"uri": url}
+tId = trackData["label"]
+trackDict = {
+"type": "QuantitativeTrack",
+"trackId": tId,
+"name": url,
+"assemblyNames": [
+self.genome_name,
+],
+"adapter": {
+"type": "BigWigAdapter",
+"bigWigLocation": bwloc,
+},
+"displays": [
+{
+"type": "LinearWiggleDisplay",
+"displayId": "%s-LinearWiggleDisplay" % tId,
+}
+],
+}
+if self.usejson:
+self.tracksToAdd.append(trackDict)
+self.trackIdlist.append(tId)
+else:
+cmd = [
+"jbrowse",
+"add-track",
+url,
+"-t",
+"QuantitativeTrack",
+"-a",
+self.genome_name,
+"-n",
+trackData["name"],
+"--load",
+"inPlace",
+"--target",
+self.outdir,
+]
+self.subprocess_check_call(cmd)
+def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
+tId = trackData["label"]
+fname = "%s.bam" % trackData["label"]
+dest = os.path.realpath("%s/%s" % (self.outdir, fname))
+if self.standalone == "complete":
+url = fname
+self.subprocess_check_call(["cp", data, dest])
+log.info("### copied %s to %s" % (data, dest))
+bloc = {"uri": url}
+else:
+dsId = trackData["metadata"]["dataset_id"]
+url = "%s/api/datasets/%s/display?to_ext=bam" % (self.giURL, dsId)
+bloc = {"uri": url}
+if bam_index is not None and os.path.exists(os.path.realpath(bam_index)):
+# bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
+self.subprocess_check_call(
+["cp", os.path.realpath(bam_index), dest + ".bai"]
+)
+else:
+# Can happen in exotic condition
+# e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam
+#      => no index generated by galaxy, but there might be one next to the symlink target
+#      this trick allows to skip the bam sorting made by galaxy if already done outside
+if os.path.exists(os.path.realpath(data) + ".bai"):
+self.symlink_or_copy(os.path.realpath(data) + ".bai", dest + ".bai")
+else:
+log.warn("Could not find a bam index (.bai file) for %s", data)
+trackDict = {
+"type": "AlignmentsTrack",
+"trackId": tId,
+"name": trackData["name"],
+"assemblyNames": [self.genome_name],
+"adapter": {
+"type": "BamAdapter",
+"bamLocation": bloc,
+"index": {
+"location": {
+"uri": fname + ".bai",
+}
+},
+},
+}
+if self.usejson:
+self.tracksToAdd.append(trackDict)
+self.trackIdlist.append(tId)
+else:
+cmd = [
+"jbrowse",
+"add-track",
+fname,
+"-t",
+"AlignmentsTrack",
+"-l",
+"inPlace",
+"-a",
+self.genome_name,
+"--indexFile",
+fname + ".bai",
+"-n",
+trackData["name"],
+"--target",
+self.outdir,
+]
+self.subprocess_check_call(cmd)
+def add_vcf(self, data, trackData):
+tId = trackData["label"]
+url = "%s/api/datasets/%s/display" % (
+self.giURL,
+trackData["metadata"]["dataset_id"],
+)
+url = "%s.vcf.gz" % tId
+dest = os.path.realpath("%s/%s" % (self.outdir, url))
+cmd = "bgzip -c %s  > %s" % (data, dest)
+self.subprocess_popen(cmd)
+cmd = ["tabix", "-p", "vcf", dest]
+self.subprocess_check_call(cmd)
+trackDict = {
+"type": "VariantTrack",
+"trackId": tId,
+"name": trackData["name"],
+"assemblyNames": [self.genome_name],
+"adapter": {
+"type": "VcfTabixAdapter",
+"vcfGzLocation": {
+"uri": url,
+},
+"index": {
+"location": {
+"uri": url + ".tbi",
+}
+},
+},
+"displays": [
+{
+"type": "LinearVariantDisplay",
+"displayId": "%s-LinearVariantDisplay" % tId,
+},
+{
+"type": "ChordVariantDisplay",
+"displayId": "%s-ChordVariantDisplay" % tId,
+},
+{
+"type": "LinearPairedArcDisplay",
+"displayId": "%s-LinearPairedArcDisplay" % tId,
+},
+],
+}
+if self.usejson:
+self.tracksToAdd.append(trackDict)
+self.trackIdlist.append(tId)
+else:
+cmd = [
+"jbrowse",
+"add-track",
+url,
+"-t",
+"VariantTrack",
+"-a",
+self.genome_name,
+"--indexFile",
+url + ".tbi",
+"-n",
+trackData["name"],
+"--load",
+"inPlace",
+"--target",
+self.outdir,
+]
+self.subprocess_check_call(cmd)
+def _sort_gff(self, data, dest):
+# Only index if not already done
+if not os.path.exists(dest + ".gz"):
+cmd = "jbrowse sort-gff %s | bgzip -c > %s.gz" % (
+data,
+dest,
+)  # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'"
+self.subprocess_popen(cmd)
+self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"])
+def _sort_bed(self, data, dest):
+# Only index if not already done
+if not os.path.exists(dest):
+cmd = "sort -k1,1 -k2,2n %s | bgzip -c > %s" % (data, dest)
+self.subprocess_popen(cmd)
+cmd = ["tabix", "-f", "-p", "bed", dest]
+self.subprocess_check_call(cmd)
+def add_gff(self, data, ext, trackData):
+url = "%s.%s" % (trackData["label"], ext)
+dest = os.path.realpath("%s/%s" % (self.outdir, url))
+self._sort_gff(data, dest)
+url = url + ".gz"
+tId = trackData["label"]
+trackDict = {
+"type": "FeatureTrack",
+"trackId": tId,
+"name": trackData["name"],
+"assemblyNames": [self.genome_name],
+"adapter": {
+"type": "Gff3TabixAdapter",
+"gffGzLocation": {
+"uri": url,
+},
+"index": {
+"location": {
+"uri": url + ".tbi",
+}
+},
+},
+"displays": [
+{
+"type": "LinearBasicDisplay",
+"displayId": "%s-LinearBasicDisplay" % tId,
+},
+{"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},
+],
+}
+if self.usejson:
+self.tracksToAdd.append(trackDict)
+self.trackIdlist.append(tId)
+else:
+cmd = [
+"jbrowse",
+"add-track",
+url,
+"-t",
+"FeatureTrack",
+"-a",
+self.genome_name,
+"-n",
+trackData["name"],
+"--load",
+"inPlace",
+"--target",
+self.outdir,
+]
+self.subprocess_check_call(cmd)
+def add_bed(self, data, ext, trackData):
+url = "%s.%s" % (trackData["label"], ext)
+dest = os.path.realpath("%s/%s.gz" % (self.outdir, url))
+self._sort_bed(data, dest)
+tId = trackData["label"]
+url = url + ".gz"
+trackDict = {
+"type": "FeatureTrack",
+"trackId": tId,
+"name": trackData["name"],
+"assemblyNames": [self.genome_name],
+"adapter": {
+"type": "BedTabixAdapter",
+"bedGzLocation": {
+"uri": url,
+},
+"index": {
+"location": {
+"uri": url + ".tbi",
+}
+},
+},
+"displays": [
+{
+"type": "LinearBasicDisplay",
+"displayId": "%s-LinearBasicDisplay" % tId,
+},
+{"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},
+],
+}
+if self.usejson:
+self.tracksToAdd.append(trackDict)
+self.trackIdlist.append(tId)
+else:
+cmd = [
+"jbrowse",
+"add-track",
+url,
+"-t",
+"FeatureTrack",
+"-a",
+self.genome_name,
+"--indexFile",
+url + ".tbi",
+"-n",
+trackData["name"],
+"--load",
+"inPlace",
+"--target",
+self.outdir,
+]
+self.subprocess_check_call(cmd)
+def process_annotations(self, track):
+category = track["category"].replace("__pd__date__pd__", TODAY)
+for i, (
+dataset_path,
+dataset_ext,
+track_human_label,
+extra_metadata,
+) in enumerate(track["trackfiles"]):
+# Unsanitize labels (element_identifiers are always sanitized by Galaxy)
+for key, value in mapped_chars.items():
+track_human_label = track_human_label.replace(value, key)
+outputTrackConfig = {
+"category": category,
+}
+if self.debug:
+log.info(
+"Processing category = %s, track_human_label = %s",
+category,
+track_human_label,
+)
+# We add extra data to hash for the case of REST + SPARQL.
+if (
+"conf" in track
+and "options" in track["conf"]
+and "url" in track["conf"]["options"]
+):
+rest_url = track["conf"]["options"]["url"]
+else:
+rest_url = ""
+# I chose to use track['category'] instead of 'category' here. This
+# is intentional. This way re-running the tool on a different date
+# will not generate different hashes and make comparison of outputs
+# much simpler.
+hashData = [
+str(dataset_path),
+track_human_label,
+track["category"],
+rest_url,
+]
+hashData = "|".join(hashData).encode("utf-8")
+outputTrackConfig["label"] = hashlib.md5(hashData).hexdigest() + "_%s" % i
+outputTrackConfig["metadata"] = extra_metadata
+outputTrackConfig["name"] = track_human_label
+if dataset_ext in ("gff", "gff3"):
+self.add_gff(
+dataset_path,
+dataset_ext,
+outputTrackConfig,
+)
+elif dataset_ext in ("hic",):
+self.add_hic(
+dataset_path,
+outputTrackConfig,
+)
+elif dataset_ext in ("bed",):
+self.add_bed(
+dataset_path,
+dataset_ext,
+outputTrackConfig,
+)
+elif dataset_ext in ("maf",):
+self.add_maf(
+dataset_path,
+outputTrackConfig,
+)
+elif dataset_ext == "bigwig":
+self.add_bigwig(
+dataset_path,
+outputTrackConfig,
+)
+elif dataset_ext == "bam":
+real_indexes = track["conf"]["options"]["pileup"]["bam_indices"][
+"bam_index"
+]
+if not isinstance(real_indexes, list):
+# <bam_indices>
+#  <bam_index>/path/to/a.bam.bai</bam_index>
+# </bam_indices>
+#
+# The above will result in the 'bam_index' key containing a
+# string. If there are two or more indices, the container
+# becomes a list. Fun!
+real_indexes = [real_indexes]
+self.add_bam(
+dataset_path,
+outputTrackConfig,
+track["conf"]["options"]["pileup"],
+bam_index=real_indexes[i],
+)
+elif dataset_ext == "blastxml":
+self.add_blastxml(
+dataset_path, outputTrackConfig, track["conf"]["options"]["blast"]
+)
+elif dataset_ext == "vcf":
+self.add_vcf(dataset_path, outputTrackConfig)
+else:
+log.warn("Do not know how to handle %s", dataset_ext)
+def clone_jbrowse(self, jbrowse_dir, destination, minimal=False):
+"""Clone a JBrowse directory into a destination directory."""
+cmd = ["jbrowse", "create", "-f", self.outdir]
+self.subprocess_check_call(cmd)
+for fn in [
+"asset-manifest.json",
+"favicon.ico",
+"robots.txt",
+"umd_plugin.js",
+"version.txt",
+"test_data",
+]:
+cmd = ["rm", "-rf", os.path.join(self.outdir, fn)]
+self.subprocess_check_call(cmd)
+if __name__ == "__main__":
+parser = argparse.ArgumentParser(description="", epilog="")
+parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration")
+parser.add_argument("--jbrowse", help="Folder containing a jbrowse release")
+parser.add_argument("--outdir", help="Output directory", default="out")
+parser.add_argument(
+"--standalone",
+choices=["complete", "minimal", "data"],
+help="Standalone mode includes a copy of JBrowse",
+)
+parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0")
+args = parser.parse_args()
+tree = ET.parse(args.xml.name)
+root = tree.getroot()
+# This should be done ASAP
+GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text
+# Sometimes this comes as `localhost` without a protocol
+if not GALAXY_INFRASTRUCTURE_URL.startswith("http"):
+# so we'll prepend `http://` and hope for the best. Requests *should*
+# be GET and not POST so it should redirect OK
+GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL
+jc = JbrowseConnector(
+jbrowse=args.jbrowse,
+outdir=args.outdir,
+genomes=[
+{
+"path": os.path.realpath(x.attrib["path"]),
+"meta": metadata_from_node(x.find("metadata")),
+}
+for x in root.findall("metadata/genomes/genome")
+],
+standalone=args.standalone,
+)
+jc.process_genomes()
+for track in root.findall("tracks/track"):
+track_conf = {}
+track_conf["trackfiles"] = []
+is_multi_bigwig = False
+try:
+if track.find("options/wiggle/multibigwig") and (
+track.find("options/wiggle/multibigwig").text == "True"
+):
+is_multi_bigwig = True
+multi_bigwig_paths = []
+except KeyError:
+pass
+trackfiles = track.findall("files/trackFile")
+if trackfiles:
+for x in track.findall("files/trackFile"):
+if is_multi_bigwig:
+multi_bigwig_paths.append(
+(x.attrib["label"], os.path.realpath(x.attrib["path"]))
+)
+else:
+if trackfiles:
+metadata = metadata_from_node(x.find("metadata"))
+track_conf["dataset_id"] = metadata["dataset_id"]
+track_conf["trackfiles"].append(
+(
+os.path.realpath(x.attrib["path"]),
+x.attrib["ext"],
+x.attrib["label"],
+metadata,
+)
+)
+else:
+# For tracks without files (rest, sparql)
+track_conf["trackfiles"].append(
+(
+"",  # N/A, no path for rest or sparql
+track.attrib["format"],
+track.find("options/label").text,
+{},
+)
+)
+if is_multi_bigwig:
+metadata = metadata_from_node(x.find("metadata"))
+track_conf["trackfiles"].append(
+(
+multi_bigwig_paths,  # Passing an array of paths to represent as one track
+"bigwig_multiple",
+"MultiBigWig",  # Giving an hardcoded name for now
+{},  # No metadata for multiple bigwig
+)
+)
+track_conf["category"] = track.attrib["cat"]
+track_conf["format"] = track.attrib["format"]
+try:
+# Only pertains to gff3 + blastxml. TODO?
+track_conf["style"] = {t.tag: t.text for t in track.find("options/style")}
+except TypeError:
+track_conf["style"] = {}
+pass
+track_conf["conf"] = etree_to_dict(track.find("options"))
+jc.process_annotations(track_conf)
+print("## processed", str(track_conf), "trackIdlist", jc.trackIdlist)
+print(
+"###done processing, trackIdlist=",
+jc.trackIdlist,
+"config=",
+str(jc.config_json),
+)
+jc.config_json["tracks"] = jc.tracksToAdd
+if jc.usejson:
+jc.write_config()
+jc.add_default_view()

Mercurial > repos > fubar > jbrowse2dev

comparison jbrowse2/jbrowse2_jbrowsenotjson.py @ 6:88b9b105c09b draft