comparison jbrowse2/jbrowse2.py @ 10:0db895a99532 draft default tip

Uploaded
author fubar
date Fri, 05 Jan 2024 22:26:16 +0000
parents 6a41f87b5d7f
children
comparison
equal deleted inserted replaced
9:6a41f87b5d7f 10:0db895a99532
106 ) 106 )
107 return metadata 107 return metadata
108 108
109 109
110 class JbrowseConnector(object): 110 class JbrowseConnector(object):
111 def __init__(self, jbrowse, outdir, genomes): 111 def __init__(self, outdir, genomes):
112 self.debug = False 112 self.debug = False
113 self.usejson = True 113 self.usejson = True
114 self.giURL = GALAXY_INFRASTRUCTURE_URL 114 self.giURL = GALAXY_INFRASTRUCTURE_URL
115 self.jbrowse = jbrowse
116 self.outdir = outdir 115 self.outdir = outdir
117 os.makedirs(self.outdir, exist_ok=True) 116 os.makedirs(self.outdir, exist_ok=True)
118 self.genome_paths = genomes 117 self.genome_paths = genomes
119 self.trackIdlist = [] 118 self.trackIdlist = []
120 self.tracksToAdd = [] 119 self.tracksToAdd = []
121 self.config_json = {} 120 self.config_json = {}
122 self.config_json_file = os.path.realpath(os.path.join(outdir, "config.json")) 121 self.config_json_file = os.path.join(outdir, "config.json")
123 self.clone_jbrowse(self.jbrowse, self.outdir) 122 self.clone_jbrowse(destination=self.outdir)
124 123
125 def subprocess_check_call(self, command, output=None): 124 def subprocess_check_call(self, command, output=None):
126 if output: 125 if output:
127 if self.debug: 126 if self.debug:
128 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) 127 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output)
131 log.debug("cd %s && %s", self.outdir, " ".join(command)) 130 log.debug("cd %s && %s", self.outdir, " ".join(command))
132 subprocess.check_call(command, cwd=self.outdir) 131 subprocess.check_call(command, cwd=self.outdir)
133 132
134 def subprocess_popen(self, command): 133 def subprocess_popen(self, command):
135 if self.debug: 134 if self.debug:
136 log.debug("cd %s && %s", self.outdir, command) 135 log.debug(command)
137 p = subprocess.Popen( 136 p = subprocess.Popen(
138 command, 137 command,
138 cwd=self.outdir,
139 shell=True, 139 shell=True,
140 stdin=subprocess.PIPE, 140 stdin=subprocess.PIPE,
141 stdout=subprocess.PIPE, 141 stdout=subprocess.PIPE,
142 stderr=subprocess.PIPE, 142 stderr=subprocess.PIPE,
143 ) 143 )
144 output, err = p.communicate() 144 output, err = p.communicate()
145 retcode = p.returncode 145 retcode = p.returncode
146 if retcode != 0: 146 if retcode != 0:
147 log.error("cd %s && %s", self.outdir, command) 147 log.error(command)
148 log.error(output) 148 log.error(output)
149 log.error(err) 149 log.error(err)
150 raise RuntimeError("Command failed with exit code %s" % (retcode)) 150 raise RuntimeError("Command failed with exit code %s" % (retcode))
151 151
152 def subprocess_check_output(self, command): 152 def subprocess_check_output(self, command):
153 if self.debug: 153 if self.debug:
154 log.debug("cd %s && %s", self.outdir, " ".join(command)) 154 log.debug(" ".join(command))
155 return subprocess.check_output(command, cwd=self.outdir) 155 return subprocess.check_output(command, cwd=self.outdir)
156
157 def _jbrowse_bin(self, command):
158 return os.path.realpath(os.path.join(self.jbrowse, "bin", command))
159 156
160 def symlink_or_copy(self, src, dest): 157 def symlink_or_copy(self, src, dest):
161 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( 158 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool(
162 os.environ["GALAXY_JBROWSE_SYMLINKS"] 159 os.environ["GALAXY_JBROWSE_SYMLINKS"]
163 ): 160 ):
173 if self.debug: 170 if self.debug:
174 log.info("genome_node=%s" % str(genome_node)) 171 log.info("genome_node=%s" % str(genome_node))
175 genome_name = genome_node["meta"]["dataset_dname"].strip().split()[0] 172 genome_name = genome_node["meta"]["dataset_dname"].strip().split()[0]
176 fapath = genome_node["path"] 173 fapath = genome_node["path"]
177 faname = genome_name + ".fa.gz" 174 faname = genome_name + ".fa.gz"
178 fadest = os.path.realpath(os.path.join(self.outdir, faname)) 175 fadest = os.path.join(self.outdir, faname)
179 cmd = "bgzip -i -c %s > %s && samtools faidx %s" % ( 176 # fadest = os.path.realpath(os.path.join(self.outdir, faname))
177 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % (
180 fapath, 178 fapath,
181 fadest, 179 fadest,
182 fadest 180 fadest,
181 fadest,
183 ) 182 )
184 log.info("### cmd = %s" % ' '.join(cmd)) 183 if self.debug:
184 log.info("### cmd = %s" % cmd)
185 self.subprocess_popen(cmd) 185 self.subprocess_popen(cmd)
186 adapter = { 186 adapter = {
187 "type": "BgzipFastaAdapter", 187 "type": "BgzipFastaAdapter",
188 "fastaLocation": { 188 "fastaLocation": {
189 "uri": faname, 189 "uri": faname,
273 url = "%s/api/datasets/%s/display?to_ext=hic " % ( 273 url = "%s/api/datasets/%s/display?to_ext=hic " % (
274 self.giURL, 274 self.giURL,
275 dsId, 275 dsId,
276 ) 276 )
277 hname = trackData["name"] 277 hname = trackData["name"]
278 dest = os.path.realpath(os.path.join(self.outdir, hname)) 278 dest = os.path.join(self.outdir, hname)
279 url = hname 279 url = hname
280 cmd = ["cp", data, dest] 280 cmd = ["cp", data, dest]
281 self.subprocess_check_call(cmd) 281 self.subprocess_check_call(cmd)
282 floc = { 282 floc = {
283 "uri": hname, 283 "uri": hname,
328 } 328 }
329 ] 329 ]
330 } 330 }
331 tId = trackData["label"] 331 tId = trackData["label"]
332 fname = "%s.bed" % tId 332 fname = "%s.bed" % tId
333 dest = os.path.realpath("%s/%s" % (self.outdir, fname)) 333 dest = "%s/%s" % (self.outdir, fname)
334 # self.symlink_or_copy(data, dest) 334 # self.symlink_or_copy(data, dest)
335 # Process MAF to bed-like. Need build to munge chromosomes 335 # Process MAF to bed-like. Need build to munge chromosomes
336 gname = self.genome_name 336 gname = self.genome_name
337 cmd = [ 337 cmd = [
338 "bash", 338 "bash",
411 411
412 # Replace original gff3 file 412 # Replace original gff3 file
413 shutil.copy(gff3_rebased.name, gff3) 413 shutil.copy(gff3_rebased.name, gff3)
414 os.unlink(gff3_rebased.name) 414 os.unlink(gff3_rebased.name)
415 url = "%s.gff3" % trackData["label"] 415 url = "%s.gff3" % trackData["label"]
416 dest = os.path.realpath("%s/%s" % (self.outdir, url)) 416 dest = "%s/%s" % (self.outdir, url)
417 self._sort_gff(gff3, dest) 417 self._sort_gff(gff3, dest)
418 url = url + ".gz" 418 url = url + ".gz"
419 tId = trackData["label"] 419 tId = trackData["label"]
420 trackDict = { 420 trackDict = {
421 "type": "FeatureTrack", 421 "type": "FeatureTrack",
465 self.subprocess_check_call(cmd) 465 self.subprocess_check_call(cmd)
466 os.unlink(gff3) 466 os.unlink(gff3)
467 467
468 def add_bigwig(self, data, trackData): 468 def add_bigwig(self, data, trackData):
469 url = "%s.bw" % trackData["name"] 469 url = "%s.bw" % trackData["name"]
470 dest = os.path.realpath(os.path.join(self.outdir, url)) 470 dest = os.path.join(self.outdir, url)
471 cmd = ["cp", data, dest] 471 cmd = ["cp", data, dest]
472 self.subprocess_check_call(cmd) 472 self.subprocess_check_call(cmd)
473 bwloc = {"uri": url} 473 bwloc = {"uri": url}
474 tId = trackData["label"] 474 tId = trackData["label"]
475 trackDict = { 475 trackDict = {
512 self.subprocess_check_call(cmd) 512 self.subprocess_check_call(cmd)
513 513
514 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): 514 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
515 tId = trackData["label"] 515 tId = trackData["label"]
516 fname = "%s.bam" % trackData["label"] 516 fname = "%s.bam" % trackData["label"]
517 dest = os.path.realpath("%s/%s" % (self.outdir, fname)) 517 dest = "%s/%s" % (self.outdir, fname)
518 url = fname 518 url = fname
519 self.subprocess_check_call(["cp", data, dest]) 519 self.subprocess_check_call(["cp", data, dest])
520 log.info("### copied %s to %s" % (data, dest)) 520 log.info("### copied %s to %s" % (data, dest))
521 bloc = {"uri": url} 521 bloc = {"uri": url}
522 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): 522 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)):
576 url = "%s/api/datasets/%s/display" % ( 576 url = "%s/api/datasets/%s/display" % (
577 self.giURL, 577 self.giURL,
578 trackData["metadata"]["dataset_id"], 578 trackData["metadata"]["dataset_id"],
579 ) 579 )
580 url = "%s.vcf.gz" % tId 580 url = "%s.vcf.gz" % tId
581 dest = os.path.realpath("%s/%s" % (self.outdir, url)) 581 dest = "%s/%s" % (self.outdir, url)
582 cmd = "bgzip -c %s > %s" % (data, dest) 582 cmd = "bgzip -c %s > %s" % (data, dest)
583 self.subprocess_popen(cmd) 583 self.subprocess_popen(cmd)
584 cmd = ["tabix", "-p", "vcf", dest] 584 cmd = ["tabix", "-p", "vcf", dest]
585 self.subprocess_check_call(cmd) 585 self.subprocess_check_call(cmd)
586 trackDict = { 586 trackDict = {
655 cmd = ["tabix", "-f", "-p", "bed", dest] 655 cmd = ["tabix", "-f", "-p", "bed", dest]
656 self.subprocess_check_call(cmd) 656 self.subprocess_check_call(cmd)
657 657
658 def add_gff(self, data, ext, trackData): 658 def add_gff(self, data, ext, trackData):
659 url = "%s.%s" % (trackData["label"], ext) 659 url = "%s.%s" % (trackData["label"], ext)
660 dest = os.path.realpath("%s/%s" % (self.outdir, url)) 660 dest = "%s/%s" % (self.outdir, url)
661 self._sort_gff(data, dest) 661 self._sort_gff(data, dest)
662 url = url + ".gz" 662 url = url + ".gz"
663 tId = trackData["label"] 663 tId = trackData["label"]
664 trackDict = { 664 trackDict = {
665 "type": "FeatureTrack", 665 "type": "FeatureTrack",
706 ] 706 ]
707 self.subprocess_check_call(cmd) 707 self.subprocess_check_call(cmd)
708 708
709 def add_bed(self, data, ext, trackData): 709 def add_bed(self, data, ext, trackData):
710 url = "%s.%s" % (trackData["label"], ext) 710 url = "%s.%s" % (trackData["label"], ext)
711 dest = os.path.realpath("%s/%s.gz" % (self.outdir, url)) 711 dest = "%s/%s.gz" % (self.outdir, url)
712 self._sort_bed(data, dest) 712 self._sort_bed(data, dest)
713 tId = trackData["label"] 713 tId = trackData["label"]
714 url = url + ".gz" 714 url = url + ".gz"
715 trackDict = { 715 trackDict = {
716 "type": "FeatureTrack", 716 "type": "FeatureTrack",
858 elif dataset_ext == "vcf": 858 elif dataset_ext == "vcf":
859 self.add_vcf(dataset_path, outputTrackConfig) 859 self.add_vcf(dataset_path, outputTrackConfig)
860 else: 860 else:
861 log.warn("Do not know how to handle %s", dataset_ext) 861 log.warn("Do not know how to handle %s", dataset_ext)
862 862
863 def clone_jbrowse(self, jbrowse_dir, destination): 863 def clone_jbrowse(self, destination):
864 """Clone a JBrowse directory into a destination directory.""" 864 """Clone a JBrowse directory into a destination directory."""
865 cmd = ["jbrowse", "create", "-f", self.outdir] 865 cmd = ["jbrowse", "create", "-f", os.path.realpath(destination)]
866 self.subprocess_check_call(cmd) 866 self.subprocess_check_call(cmd)
867 for fn in [ 867 for fn in [
868 "asset-manifest.json", 868 "asset-manifest.json",
869 "favicon.ico", 869 "favicon.ico",
870 "robots.txt", 870 "robots.txt",
872 "version.txt", 872 "version.txt",
873 "test_data", 873 "test_data",
874 ]: 874 ]:
875 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] 875 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)]
876 self.subprocess_check_call(cmd) 876 self.subprocess_check_call(cmd)
877 cmd = ['cp', os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir] 877 cmd = ["cp", os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir]
878 self.subprocess_check_call(cmd)
879
880 def clone_jbrowse2(self, jbrowse_dir, destination):
881 """Clone a JBrowse directory into a destination directory."""
882 cmd = ["cp", "-rv", jbrowse_dir + "/*", self.outdir]
883 self.subprocess_check_call(cmd)
884 for fn in [
885 "asset-manifest.json",
886 "favicon.ico",
887 "robots.txt",
888 "umd_plugin.js",
889 "version.txt",
890 "test_data",
891 ]:
892 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)]
893 self.subprocess_check_call(cmd)
894 cmd = ["cp", os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir]
878 self.subprocess_check_call(cmd) 895 self.subprocess_check_call(cmd)
879 896
880 897
881 if __name__ == "__main__": 898 if __name__ == "__main__":
882 parser = argparse.ArgumentParser(description="", epilog="") 899 parser = argparse.ArgumentParser(description="", epilog="")
883 parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration") 900 parser.add_argument("--xml", help="Track Configuration")
884 901 parser.add_argument("--jbrowse", help="Output from 'which jbrowse'")
885 parser.add_argument("--jbrowse", help="Folder containing a jbrowse release")
886 parser.add_argument("--outdir", help="Output directory", default="out") 902 parser.add_argument("--outdir", help="Output directory", default="out")
887 parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0") 903 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1")
888 args = parser.parse_args() 904 args = parser.parse_args()
889 905
890 tree = ET.parse(args.xml.name) 906 tree = ET.parse(args.xml)
891 root = tree.getroot() 907 root = tree.getroot()
892 908
893 # This should be done ASAP 909 # This should be done ASAP
894 GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text 910 GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text
895 # Sometimes this comes as `localhost` without a protocol 911 # Sometimes this comes as `localhost` without a protocol
896 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): 912 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"):
897 # so we'll prepend `http://` and hope for the best. Requests *should* 913 # so we'll prepend `http://` and hope for the best. Requests *should*
898 # be GET and not POST so it should redirect OK 914 # be GET and not POST so it should redirect OK
899 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL 915 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL
900
901 jc = JbrowseConnector( 916 jc = JbrowseConnector(
902 jbrowse=args.jbrowse,
903 outdir=args.outdir, 917 outdir=args.outdir,
904 genomes=[ 918 genomes=[
905 { 919 {
906 "path": os.path.realpath(x.attrib["path"]), 920 "path": os.path.realpath(x.attrib["path"]),
907 "meta": metadata_from_node(x.find("metadata")), 921 "meta": metadata_from_node(x.find("metadata")),