Mercurial > repos > fubar > jbrowse2dev
comparison jbrowse2/jbrowse2.py @ 10:0db895a99532 draft default tip
Uploaded
| author | fubar |
|---|---|
| date | Fri, 05 Jan 2024 22:26:16 +0000 |
| parents | 6a41f87b5d7f |
| children |
comparison
equal
deleted
inserted
replaced
| 9:6a41f87b5d7f | 10:0db895a99532 |
|---|---|
| 106 ) | 106 ) |
| 107 return metadata | 107 return metadata |
| 108 | 108 |
| 109 | 109 |
| 110 class JbrowseConnector(object): | 110 class JbrowseConnector(object): |
| 111 def __init__(self, jbrowse, outdir, genomes): | 111 def __init__(self, outdir, genomes): |
| 112 self.debug = False | 112 self.debug = False |
| 113 self.usejson = True | 113 self.usejson = True |
| 114 self.giURL = GALAXY_INFRASTRUCTURE_URL | 114 self.giURL = GALAXY_INFRASTRUCTURE_URL |
| 115 self.jbrowse = jbrowse | |
| 116 self.outdir = outdir | 115 self.outdir = outdir |
| 117 os.makedirs(self.outdir, exist_ok=True) | 116 os.makedirs(self.outdir, exist_ok=True) |
| 118 self.genome_paths = genomes | 117 self.genome_paths = genomes |
| 119 self.trackIdlist = [] | 118 self.trackIdlist = [] |
| 120 self.tracksToAdd = [] | 119 self.tracksToAdd = [] |
| 121 self.config_json = {} | 120 self.config_json = {} |
| 122 self.config_json_file = os.path.realpath(os.path.join(outdir, "config.json")) | 121 self.config_json_file = os.path.join(outdir, "config.json") |
| 123 self.clone_jbrowse(self.jbrowse, self.outdir) | 122 self.clone_jbrowse(destination=self.outdir) |
| 124 | 123 |
| 125 def subprocess_check_call(self, command, output=None): | 124 def subprocess_check_call(self, command, output=None): |
| 126 if output: | 125 if output: |
| 127 if self.debug: | 126 if self.debug: |
| 128 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) | 127 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) |
| 131 log.debug("cd %s && %s", self.outdir, " ".join(command)) | 130 log.debug("cd %s && %s", self.outdir, " ".join(command)) |
| 132 subprocess.check_call(command, cwd=self.outdir) | 131 subprocess.check_call(command, cwd=self.outdir) |
| 133 | 132 |
| 134 def subprocess_popen(self, command): | 133 def subprocess_popen(self, command): |
| 135 if self.debug: | 134 if self.debug: |
| 136 log.debug("cd %s && %s", self.outdir, command) | 135 log.debug(command) |
| 137 p = subprocess.Popen( | 136 p = subprocess.Popen( |
| 138 command, | 137 command, |
| 138 cwd=self.outdir, | |
| 139 shell=True, | 139 shell=True, |
| 140 stdin=subprocess.PIPE, | 140 stdin=subprocess.PIPE, |
| 141 stdout=subprocess.PIPE, | 141 stdout=subprocess.PIPE, |
| 142 stderr=subprocess.PIPE, | 142 stderr=subprocess.PIPE, |
| 143 ) | 143 ) |
| 144 output, err = p.communicate() | 144 output, err = p.communicate() |
| 145 retcode = p.returncode | 145 retcode = p.returncode |
| 146 if retcode != 0: | 146 if retcode != 0: |
| 147 log.error("cd %s && %s", self.outdir, command) | 147 log.error(command) |
| 148 log.error(output) | 148 log.error(output) |
| 149 log.error(err) | 149 log.error(err) |
| 150 raise RuntimeError("Command failed with exit code %s" % (retcode)) | 150 raise RuntimeError("Command failed with exit code %s" % (retcode)) |
| 151 | 151 |
| 152 def subprocess_check_output(self, command): | 152 def subprocess_check_output(self, command): |
| 153 if self.debug: | 153 if self.debug: |
| 154 log.debug("cd %s && %s", self.outdir, " ".join(command)) | 154 log.debug(" ".join(command)) |
| 155 return subprocess.check_output(command, cwd=self.outdir) | 155 return subprocess.check_output(command, cwd=self.outdir) |
| 156 | |
| 157 def _jbrowse_bin(self, command): | |
| 158 return os.path.realpath(os.path.join(self.jbrowse, "bin", command)) | |
| 159 | 156 |
| 160 def symlink_or_copy(self, src, dest): | 157 def symlink_or_copy(self, src, dest): |
| 161 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( | 158 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( |
| 162 os.environ["GALAXY_JBROWSE_SYMLINKS"] | 159 os.environ["GALAXY_JBROWSE_SYMLINKS"] |
| 163 ): | 160 ): |
| 173 if self.debug: | 170 if self.debug: |
| 174 log.info("genome_node=%s" % str(genome_node)) | 171 log.info("genome_node=%s" % str(genome_node)) |
| 175 genome_name = genome_node["meta"]["dataset_dname"].strip().split()[0] | 172 genome_name = genome_node["meta"]["dataset_dname"].strip().split()[0] |
| 176 fapath = genome_node["path"] | 173 fapath = genome_node["path"] |
| 177 faname = genome_name + ".fa.gz" | 174 faname = genome_name + ".fa.gz" |
| 178 fadest = os.path.realpath(os.path.join(self.outdir, faname)) | 175 fadest = os.path.join(self.outdir, faname) |
| 179 cmd = "bgzip -i -c %s > %s && samtools faidx %s" % ( | 176 # fadest = os.path.realpath(os.path.join(self.outdir, faname)) |
| 177 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( | |
| 180 fapath, | 178 fapath, |
| 181 fadest, | 179 fadest, |
| 182 fadest | 180 fadest, |
| 181 fadest, | |
| 183 ) | 182 ) |
| 184 log.info("### cmd = %s" % ' '.join(cmd)) | 183 if self.debug: |
| 184 log.info("### cmd = %s" % cmd) | |
| 185 self.subprocess_popen(cmd) | 185 self.subprocess_popen(cmd) |
| 186 adapter = { | 186 adapter = { |
| 187 "type": "BgzipFastaAdapter", | 187 "type": "BgzipFastaAdapter", |
| 188 "fastaLocation": { | 188 "fastaLocation": { |
| 189 "uri": faname, | 189 "uri": faname, |
| 273 url = "%s/api/datasets/%s/display?to_ext=hic " % ( | 273 url = "%s/api/datasets/%s/display?to_ext=hic " % ( |
| 274 self.giURL, | 274 self.giURL, |
| 275 dsId, | 275 dsId, |
| 276 ) | 276 ) |
| 277 hname = trackData["name"] | 277 hname = trackData["name"] |
| 278 dest = os.path.realpath(os.path.join(self.outdir, hname)) | 278 dest = os.path.join(self.outdir, hname) |
| 279 url = hname | 279 url = hname |
| 280 cmd = ["cp", data, dest] | 280 cmd = ["cp", data, dest] |
| 281 self.subprocess_check_call(cmd) | 281 self.subprocess_check_call(cmd) |
| 282 floc = { | 282 floc = { |
| 283 "uri": hname, | 283 "uri": hname, |
| 328 } | 328 } |
| 329 ] | 329 ] |
| 330 } | 330 } |
| 331 tId = trackData["label"] | 331 tId = trackData["label"] |
| 332 fname = "%s.bed" % tId | 332 fname = "%s.bed" % tId |
| 333 dest = os.path.realpath("%s/%s" % (self.outdir, fname)) | 333 dest = "%s/%s" % (self.outdir, fname) |
| 334 # self.symlink_or_copy(data, dest) | 334 # self.symlink_or_copy(data, dest) |
| 335 # Process MAF to bed-like. Need build to munge chromosomes | 335 # Process MAF to bed-like. Need build to munge chromosomes |
| 336 gname = self.genome_name | 336 gname = self.genome_name |
| 337 cmd = [ | 337 cmd = [ |
| 338 "bash", | 338 "bash", |
| 411 | 411 |
| 412 # Replace original gff3 file | 412 # Replace original gff3 file |
| 413 shutil.copy(gff3_rebased.name, gff3) | 413 shutil.copy(gff3_rebased.name, gff3) |
| 414 os.unlink(gff3_rebased.name) | 414 os.unlink(gff3_rebased.name) |
| 415 url = "%s.gff3" % trackData["label"] | 415 url = "%s.gff3" % trackData["label"] |
| 416 dest = os.path.realpath("%s/%s" % (self.outdir, url)) | 416 dest = "%s/%s" % (self.outdir, url) |
| 417 self._sort_gff(gff3, dest) | 417 self._sort_gff(gff3, dest) |
| 418 url = url + ".gz" | 418 url = url + ".gz" |
| 419 tId = trackData["label"] | 419 tId = trackData["label"] |
| 420 trackDict = { | 420 trackDict = { |
| 421 "type": "FeatureTrack", | 421 "type": "FeatureTrack", |
| 465 self.subprocess_check_call(cmd) | 465 self.subprocess_check_call(cmd) |
| 466 os.unlink(gff3) | 466 os.unlink(gff3) |
| 467 | 467 |
| 468 def add_bigwig(self, data, trackData): | 468 def add_bigwig(self, data, trackData): |
| 469 url = "%s.bw" % trackData["name"] | 469 url = "%s.bw" % trackData["name"] |
| 470 dest = os.path.realpath(os.path.join(self.outdir, url)) | 470 dest = os.path.join(self.outdir, url) |
| 471 cmd = ["cp", data, dest] | 471 cmd = ["cp", data, dest] |
| 472 self.subprocess_check_call(cmd) | 472 self.subprocess_check_call(cmd) |
| 473 bwloc = {"uri": url} | 473 bwloc = {"uri": url} |
| 474 tId = trackData["label"] | 474 tId = trackData["label"] |
| 475 trackDict = { | 475 trackDict = { |
| 512 self.subprocess_check_call(cmd) | 512 self.subprocess_check_call(cmd) |
| 513 | 513 |
| 514 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): | 514 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): |
| 515 tId = trackData["label"] | 515 tId = trackData["label"] |
| 516 fname = "%s.bam" % trackData["label"] | 516 fname = "%s.bam" % trackData["label"] |
| 517 dest = os.path.realpath("%s/%s" % (self.outdir, fname)) | 517 dest = "%s/%s" % (self.outdir, fname) |
| 518 url = fname | 518 url = fname |
| 519 self.subprocess_check_call(["cp", data, dest]) | 519 self.subprocess_check_call(["cp", data, dest]) |
| 520 log.info("### copied %s to %s" % (data, dest)) | 520 log.info("### copied %s to %s" % (data, dest)) |
| 521 bloc = {"uri": url} | 521 bloc = {"uri": url} |
| 522 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): | 522 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): |
| 576 url = "%s/api/datasets/%s/display" % ( | 576 url = "%s/api/datasets/%s/display" % ( |
| 577 self.giURL, | 577 self.giURL, |
| 578 trackData["metadata"]["dataset_id"], | 578 trackData["metadata"]["dataset_id"], |
| 579 ) | 579 ) |
| 580 url = "%s.vcf.gz" % tId | 580 url = "%s.vcf.gz" % tId |
| 581 dest = os.path.realpath("%s/%s" % (self.outdir, url)) | 581 dest = "%s/%s" % (self.outdir, url) |
| 582 cmd = "bgzip -c %s > %s" % (data, dest) | 582 cmd = "bgzip -c %s > %s" % (data, dest) |
| 583 self.subprocess_popen(cmd) | 583 self.subprocess_popen(cmd) |
| 584 cmd = ["tabix", "-p", "vcf", dest] | 584 cmd = ["tabix", "-p", "vcf", dest] |
| 585 self.subprocess_check_call(cmd) | 585 self.subprocess_check_call(cmd) |
| 586 trackDict = { | 586 trackDict = { |
| 655 cmd = ["tabix", "-f", "-p", "bed", dest] | 655 cmd = ["tabix", "-f", "-p", "bed", dest] |
| 656 self.subprocess_check_call(cmd) | 656 self.subprocess_check_call(cmd) |
| 657 | 657 |
| 658 def add_gff(self, data, ext, trackData): | 658 def add_gff(self, data, ext, trackData): |
| 659 url = "%s.%s" % (trackData["label"], ext) | 659 url = "%s.%s" % (trackData["label"], ext) |
| 660 dest = os.path.realpath("%s/%s" % (self.outdir, url)) | 660 dest = "%s/%s" % (self.outdir, url) |
| 661 self._sort_gff(data, dest) | 661 self._sort_gff(data, dest) |
| 662 url = url + ".gz" | 662 url = url + ".gz" |
| 663 tId = trackData["label"] | 663 tId = trackData["label"] |
| 664 trackDict = { | 664 trackDict = { |
| 665 "type": "FeatureTrack", | 665 "type": "FeatureTrack", |
| 706 ] | 706 ] |
| 707 self.subprocess_check_call(cmd) | 707 self.subprocess_check_call(cmd) |
| 708 | 708 |
| 709 def add_bed(self, data, ext, trackData): | 709 def add_bed(self, data, ext, trackData): |
| 710 url = "%s.%s" % (trackData["label"], ext) | 710 url = "%s.%s" % (trackData["label"], ext) |
| 711 dest = os.path.realpath("%s/%s.gz" % (self.outdir, url)) | 711 dest = "%s/%s.gz" % (self.outdir, url) |
| 712 self._sort_bed(data, dest) | 712 self._sort_bed(data, dest) |
| 713 tId = trackData["label"] | 713 tId = trackData["label"] |
| 714 url = url + ".gz" | 714 url = url + ".gz" |
| 715 trackDict = { | 715 trackDict = { |
| 716 "type": "FeatureTrack", | 716 "type": "FeatureTrack", |
| 858 elif dataset_ext == "vcf": | 858 elif dataset_ext == "vcf": |
| 859 self.add_vcf(dataset_path, outputTrackConfig) | 859 self.add_vcf(dataset_path, outputTrackConfig) |
| 860 else: | 860 else: |
| 861 log.warn("Do not know how to handle %s", dataset_ext) | 861 log.warn("Do not know how to handle %s", dataset_ext) |
| 862 | 862 |
| 863 def clone_jbrowse(self, jbrowse_dir, destination): | 863 def clone_jbrowse(self, destination): |
| 864 """Clone a JBrowse directory into a destination directory.""" | 864 """Clone a JBrowse directory into a destination directory.""" |
| 865 cmd = ["jbrowse", "create", "-f", self.outdir] | 865 cmd = ["jbrowse", "create", "-f", os.path.realpath(destination)] |
| 866 self.subprocess_check_call(cmd) | 866 self.subprocess_check_call(cmd) |
| 867 for fn in [ | 867 for fn in [ |
| 868 "asset-manifest.json", | 868 "asset-manifest.json", |
| 869 "favicon.ico", | 869 "favicon.ico", |
| 870 "robots.txt", | 870 "robots.txt", |
| 872 "version.txt", | 872 "version.txt", |
| 873 "test_data", | 873 "test_data", |
| 874 ]: | 874 ]: |
| 875 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] | 875 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] |
| 876 self.subprocess_check_call(cmd) | 876 self.subprocess_check_call(cmd) |
| 877 cmd = ['cp', os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir] | 877 cmd = ["cp", os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir] |
| 878 self.subprocess_check_call(cmd) | |
| 879 | |
| 880 def clone_jbrowse2(self, jbrowse_dir, destination): | |
| 881 """Clone a JBrowse directory into a destination directory.""" | |
| 882 cmd = ["cp", "-rv", jbrowse_dir + "/*", self.outdir] | |
| 883 self.subprocess_check_call(cmd) | |
| 884 for fn in [ | |
| 885 "asset-manifest.json", | |
| 886 "favicon.ico", | |
| 887 "robots.txt", | |
| 888 "umd_plugin.js", | |
| 889 "version.txt", | |
| 890 "test_data", | |
| 891 ]: | |
| 892 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] | |
| 893 self.subprocess_check_call(cmd) | |
| 894 cmd = ["cp", os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir] | |
| 878 self.subprocess_check_call(cmd) | 895 self.subprocess_check_call(cmd) |
| 879 | 896 |
| 880 | 897 |
| 881 if __name__ == "__main__": | 898 if __name__ == "__main__": |
| 882 parser = argparse.ArgumentParser(description="", epilog="") | 899 parser = argparse.ArgumentParser(description="", epilog="") |
| 883 parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration") | 900 parser.add_argument("--xml", help="Track Configuration") |
| 884 | 901 parser.add_argument("--jbrowse", help="Output from 'which jbrowse'") |
| 885 parser.add_argument("--jbrowse", help="Folder containing a jbrowse release") | |
| 886 parser.add_argument("--outdir", help="Output directory", default="out") | 902 parser.add_argument("--outdir", help="Output directory", default="out") |
| 887 parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0") | 903 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1") |
| 888 args = parser.parse_args() | 904 args = parser.parse_args() |
| 889 | 905 |
| 890 tree = ET.parse(args.xml.name) | 906 tree = ET.parse(args.xml) |
| 891 root = tree.getroot() | 907 root = tree.getroot() |
| 892 | 908 |
| 893 # This should be done ASAP | 909 # This should be done ASAP |
| 894 GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text | 910 GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text |
| 895 # Sometimes this comes as `localhost` without a protocol | 911 # Sometimes this comes as `localhost` without a protocol |
| 896 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): | 912 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): |
| 897 # so we'll prepend `http://` and hope for the best. Requests *should* | 913 # so we'll prepend `http://` and hope for the best. Requests *should* |
| 898 # be GET and not POST so it should redirect OK | 914 # be GET and not POST so it should redirect OK |
| 899 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL | 915 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL |
| 900 | |
| 901 jc = JbrowseConnector( | 916 jc = JbrowseConnector( |
| 902 jbrowse=args.jbrowse, | |
| 903 outdir=args.outdir, | 917 outdir=args.outdir, |
| 904 genomes=[ | 918 genomes=[ |
| 905 { | 919 { |
| 906 "path": os.path.realpath(x.attrib["path"]), | 920 "path": os.path.realpath(x.attrib["path"]), |
| 907 "meta": metadata_from_node(x.find("metadata")), | 921 "meta": metadata_from_node(x.find("metadata")), |
