Mercurial > repos > fubar > jbrowse2dev
comparison jbrowse2/jbrowse2.py @ 10:0db895a99532 draft default tip
Uploaded
author | fubar |
---|---|
date | Fri, 05 Jan 2024 22:26:16 +0000 |
parents | 6a41f87b5d7f |
children |
comparison
equal
deleted
inserted
replaced
9:6a41f87b5d7f | 10:0db895a99532 |
---|---|
106 ) | 106 ) |
107 return metadata | 107 return metadata |
108 | 108 |
109 | 109 |
110 class JbrowseConnector(object): | 110 class JbrowseConnector(object): |
111 def __init__(self, jbrowse, outdir, genomes): | 111 def __init__(self, outdir, genomes): |
112 self.debug = False | 112 self.debug = False |
113 self.usejson = True | 113 self.usejson = True |
114 self.giURL = GALAXY_INFRASTRUCTURE_URL | 114 self.giURL = GALAXY_INFRASTRUCTURE_URL |
115 self.jbrowse = jbrowse | |
116 self.outdir = outdir | 115 self.outdir = outdir |
117 os.makedirs(self.outdir, exist_ok=True) | 116 os.makedirs(self.outdir, exist_ok=True) |
118 self.genome_paths = genomes | 117 self.genome_paths = genomes |
119 self.trackIdlist = [] | 118 self.trackIdlist = [] |
120 self.tracksToAdd = [] | 119 self.tracksToAdd = [] |
121 self.config_json = {} | 120 self.config_json = {} |
122 self.config_json_file = os.path.realpath(os.path.join(outdir, "config.json")) | 121 self.config_json_file = os.path.join(outdir, "config.json") |
123 self.clone_jbrowse(self.jbrowse, self.outdir) | 122 self.clone_jbrowse(destination=self.outdir) |
124 | 123 |
125 def subprocess_check_call(self, command, output=None): | 124 def subprocess_check_call(self, command, output=None): |
126 if output: | 125 if output: |
127 if self.debug: | 126 if self.debug: |
128 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) | 127 log.debug("cd %s && %s > %s", self.outdir, " ".join(command), output) |
131 log.debug("cd %s && %s", self.outdir, " ".join(command)) | 130 log.debug("cd %s && %s", self.outdir, " ".join(command)) |
132 subprocess.check_call(command, cwd=self.outdir) | 131 subprocess.check_call(command, cwd=self.outdir) |
133 | 132 |
134 def subprocess_popen(self, command): | 133 def subprocess_popen(self, command): |
135 if self.debug: | 134 if self.debug: |
136 log.debug("cd %s && %s", self.outdir, command) | 135 log.debug(command) |
137 p = subprocess.Popen( | 136 p = subprocess.Popen( |
138 command, | 137 command, |
138 cwd=self.outdir, | |
139 shell=True, | 139 shell=True, |
140 stdin=subprocess.PIPE, | 140 stdin=subprocess.PIPE, |
141 stdout=subprocess.PIPE, | 141 stdout=subprocess.PIPE, |
142 stderr=subprocess.PIPE, | 142 stderr=subprocess.PIPE, |
143 ) | 143 ) |
144 output, err = p.communicate() | 144 output, err = p.communicate() |
145 retcode = p.returncode | 145 retcode = p.returncode |
146 if retcode != 0: | 146 if retcode != 0: |
147 log.error("cd %s && %s", self.outdir, command) | 147 log.error(command) |
148 log.error(output) | 148 log.error(output) |
149 log.error(err) | 149 log.error(err) |
150 raise RuntimeError("Command failed with exit code %s" % (retcode)) | 150 raise RuntimeError("Command failed with exit code %s" % (retcode)) |
151 | 151 |
152 def subprocess_check_output(self, command): | 152 def subprocess_check_output(self, command): |
153 if self.debug: | 153 if self.debug: |
154 log.debug("cd %s && %s", self.outdir, " ".join(command)) | 154 log.debug(" ".join(command)) |
155 return subprocess.check_output(command, cwd=self.outdir) | 155 return subprocess.check_output(command, cwd=self.outdir) |
156 | |
157 def _jbrowse_bin(self, command): | |
158 return os.path.realpath(os.path.join(self.jbrowse, "bin", command)) | |
159 | 156 |
160 def symlink_or_copy(self, src, dest): | 157 def symlink_or_copy(self, src, dest): |
161 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( | 158 if "GALAXY_JBROWSE_SYMLINKS" in os.environ and bool( |
162 os.environ["GALAXY_JBROWSE_SYMLINKS"] | 159 os.environ["GALAXY_JBROWSE_SYMLINKS"] |
163 ): | 160 ): |
173 if self.debug: | 170 if self.debug: |
174 log.info("genome_node=%s" % str(genome_node)) | 171 log.info("genome_node=%s" % str(genome_node)) |
175 genome_name = genome_node["meta"]["dataset_dname"].strip().split()[0] | 172 genome_name = genome_node["meta"]["dataset_dname"].strip().split()[0] |
176 fapath = genome_node["path"] | 173 fapath = genome_node["path"] |
177 faname = genome_name + ".fa.gz" | 174 faname = genome_name + ".fa.gz" |
178 fadest = os.path.realpath(os.path.join(self.outdir, faname)) | 175 fadest = os.path.join(self.outdir, faname) |
179 cmd = "bgzip -i -c %s > %s && samtools faidx %s" % ( | 176 # fadest = os.path.realpath(os.path.join(self.outdir, faname)) |
177 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( | |
180 fapath, | 178 fapath, |
181 fadest, | 179 fadest, |
182 fadest | 180 fadest, |
181 fadest, | |
183 ) | 182 ) |
184 log.info("### cmd = %s" % ' '.join(cmd)) | 183 if self.debug: |
184 log.info("### cmd = %s" % cmd) | |
185 self.subprocess_popen(cmd) | 185 self.subprocess_popen(cmd) |
186 adapter = { | 186 adapter = { |
187 "type": "BgzipFastaAdapter", | 187 "type": "BgzipFastaAdapter", |
188 "fastaLocation": { | 188 "fastaLocation": { |
189 "uri": faname, | 189 "uri": faname, |
273 url = "%s/api/datasets/%s/display?to_ext=hic " % ( | 273 url = "%s/api/datasets/%s/display?to_ext=hic " % ( |
274 self.giURL, | 274 self.giURL, |
275 dsId, | 275 dsId, |
276 ) | 276 ) |
277 hname = trackData["name"] | 277 hname = trackData["name"] |
278 dest = os.path.realpath(os.path.join(self.outdir, hname)) | 278 dest = os.path.join(self.outdir, hname) |
279 url = hname | 279 url = hname |
280 cmd = ["cp", data, dest] | 280 cmd = ["cp", data, dest] |
281 self.subprocess_check_call(cmd) | 281 self.subprocess_check_call(cmd) |
282 floc = { | 282 floc = { |
283 "uri": hname, | 283 "uri": hname, |
328 } | 328 } |
329 ] | 329 ] |
330 } | 330 } |
331 tId = trackData["label"] | 331 tId = trackData["label"] |
332 fname = "%s.bed" % tId | 332 fname = "%s.bed" % tId |
333 dest = os.path.realpath("%s/%s" % (self.outdir, fname)) | 333 dest = "%s/%s" % (self.outdir, fname) |
334 # self.symlink_or_copy(data, dest) | 334 # self.symlink_or_copy(data, dest) |
335 # Process MAF to bed-like. Need build to munge chromosomes | 335 # Process MAF to bed-like. Need build to munge chromosomes |
336 gname = self.genome_name | 336 gname = self.genome_name |
337 cmd = [ | 337 cmd = [ |
338 "bash", | 338 "bash", |
411 | 411 |
412 # Replace original gff3 file | 412 # Replace original gff3 file |
413 shutil.copy(gff3_rebased.name, gff3) | 413 shutil.copy(gff3_rebased.name, gff3) |
414 os.unlink(gff3_rebased.name) | 414 os.unlink(gff3_rebased.name) |
415 url = "%s.gff3" % trackData["label"] | 415 url = "%s.gff3" % trackData["label"] |
416 dest = os.path.realpath("%s/%s" % (self.outdir, url)) | 416 dest = "%s/%s" % (self.outdir, url) |
417 self._sort_gff(gff3, dest) | 417 self._sort_gff(gff3, dest) |
418 url = url + ".gz" | 418 url = url + ".gz" |
419 tId = trackData["label"] | 419 tId = trackData["label"] |
420 trackDict = { | 420 trackDict = { |
421 "type": "FeatureTrack", | 421 "type": "FeatureTrack", |
465 self.subprocess_check_call(cmd) | 465 self.subprocess_check_call(cmd) |
466 os.unlink(gff3) | 466 os.unlink(gff3) |
467 | 467 |
468 def add_bigwig(self, data, trackData): | 468 def add_bigwig(self, data, trackData): |
469 url = "%s.bw" % trackData["name"] | 469 url = "%s.bw" % trackData["name"] |
470 dest = os.path.realpath(os.path.join(self.outdir, url)) | 470 dest = os.path.join(self.outdir, url) |
471 cmd = ["cp", data, dest] | 471 cmd = ["cp", data, dest] |
472 self.subprocess_check_call(cmd) | 472 self.subprocess_check_call(cmd) |
473 bwloc = {"uri": url} | 473 bwloc = {"uri": url} |
474 tId = trackData["label"] | 474 tId = trackData["label"] |
475 trackDict = { | 475 trackDict = { |
512 self.subprocess_check_call(cmd) | 512 self.subprocess_check_call(cmd) |
513 | 513 |
514 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): | 514 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): |
515 tId = trackData["label"] | 515 tId = trackData["label"] |
516 fname = "%s.bam" % trackData["label"] | 516 fname = "%s.bam" % trackData["label"] |
517 dest = os.path.realpath("%s/%s" % (self.outdir, fname)) | 517 dest = "%s/%s" % (self.outdir, fname) |
518 url = fname | 518 url = fname |
519 self.subprocess_check_call(["cp", data, dest]) | 519 self.subprocess_check_call(["cp", data, dest]) |
520 log.info("### copied %s to %s" % (data, dest)) | 520 log.info("### copied %s to %s" % (data, dest)) |
521 bloc = {"uri": url} | 521 bloc = {"uri": url} |
522 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): | 522 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): |
576 url = "%s/api/datasets/%s/display" % ( | 576 url = "%s/api/datasets/%s/display" % ( |
577 self.giURL, | 577 self.giURL, |
578 trackData["metadata"]["dataset_id"], | 578 trackData["metadata"]["dataset_id"], |
579 ) | 579 ) |
580 url = "%s.vcf.gz" % tId | 580 url = "%s.vcf.gz" % tId |
581 dest = os.path.realpath("%s/%s" % (self.outdir, url)) | 581 dest = "%s/%s" % (self.outdir, url) |
582 cmd = "bgzip -c %s > %s" % (data, dest) | 582 cmd = "bgzip -c %s > %s" % (data, dest) |
583 self.subprocess_popen(cmd) | 583 self.subprocess_popen(cmd) |
584 cmd = ["tabix", "-p", "vcf", dest] | 584 cmd = ["tabix", "-p", "vcf", dest] |
585 self.subprocess_check_call(cmd) | 585 self.subprocess_check_call(cmd) |
586 trackDict = { | 586 trackDict = { |
655 cmd = ["tabix", "-f", "-p", "bed", dest] | 655 cmd = ["tabix", "-f", "-p", "bed", dest] |
656 self.subprocess_check_call(cmd) | 656 self.subprocess_check_call(cmd) |
657 | 657 |
658 def add_gff(self, data, ext, trackData): | 658 def add_gff(self, data, ext, trackData): |
659 url = "%s.%s" % (trackData["label"], ext) | 659 url = "%s.%s" % (trackData["label"], ext) |
660 dest = os.path.realpath("%s/%s" % (self.outdir, url)) | 660 dest = "%s/%s" % (self.outdir, url) |
661 self._sort_gff(data, dest) | 661 self._sort_gff(data, dest) |
662 url = url + ".gz" | 662 url = url + ".gz" |
663 tId = trackData["label"] | 663 tId = trackData["label"] |
664 trackDict = { | 664 trackDict = { |
665 "type": "FeatureTrack", | 665 "type": "FeatureTrack", |
706 ] | 706 ] |
707 self.subprocess_check_call(cmd) | 707 self.subprocess_check_call(cmd) |
708 | 708 |
709 def add_bed(self, data, ext, trackData): | 709 def add_bed(self, data, ext, trackData): |
710 url = "%s.%s" % (trackData["label"], ext) | 710 url = "%s.%s" % (trackData["label"], ext) |
711 dest = os.path.realpath("%s/%s.gz" % (self.outdir, url)) | 711 dest = "%s/%s.gz" % (self.outdir, url) |
712 self._sort_bed(data, dest) | 712 self._sort_bed(data, dest) |
713 tId = trackData["label"] | 713 tId = trackData["label"] |
714 url = url + ".gz" | 714 url = url + ".gz" |
715 trackDict = { | 715 trackDict = { |
716 "type": "FeatureTrack", | 716 "type": "FeatureTrack", |
858 elif dataset_ext == "vcf": | 858 elif dataset_ext == "vcf": |
859 self.add_vcf(dataset_path, outputTrackConfig) | 859 self.add_vcf(dataset_path, outputTrackConfig) |
860 else: | 860 else: |
861 log.warn("Do not know how to handle %s", dataset_ext) | 861 log.warn("Do not know how to handle %s", dataset_ext) |
862 | 862 |
863 def clone_jbrowse(self, jbrowse_dir, destination): | 863 def clone_jbrowse(self, destination): |
864 """Clone a JBrowse directory into a destination directory.""" | 864 """Clone a JBrowse directory into a destination directory.""" |
865 cmd = ["jbrowse", "create", "-f", self.outdir] | 865 cmd = ["jbrowse", "create", "-f", os.path.realpath(destination)] |
866 self.subprocess_check_call(cmd) | 866 self.subprocess_check_call(cmd) |
867 for fn in [ | 867 for fn in [ |
868 "asset-manifest.json", | 868 "asset-manifest.json", |
869 "favicon.ico", | 869 "favicon.ico", |
870 "robots.txt", | 870 "robots.txt", |
872 "version.txt", | 872 "version.txt", |
873 "test_data", | 873 "test_data", |
874 ]: | 874 ]: |
875 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] | 875 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] |
876 self.subprocess_check_call(cmd) | 876 self.subprocess_check_call(cmd) |
877 cmd = ['cp', os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir] | 877 cmd = ["cp", os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir] |
878 self.subprocess_check_call(cmd) | |
879 | |
880 def clone_jbrowse2(self, jbrowse_dir, destination): | |
881 """Clone a JBrowse directory into a destination directory.""" | |
882 cmd = ["cp", "-rv", jbrowse_dir + "/*", self.outdir] | |
883 self.subprocess_check_call(cmd) | |
884 for fn in [ | |
885 "asset-manifest.json", | |
886 "favicon.ico", | |
887 "robots.txt", | |
888 "umd_plugin.js", | |
889 "version.txt", | |
890 "test_data", | |
891 ]: | |
892 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] | |
893 self.subprocess_check_call(cmd) | |
894 cmd = ["cp", os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir] | |
878 self.subprocess_check_call(cmd) | 895 self.subprocess_check_call(cmd) |
879 | 896 |
880 | 897 |
881 if __name__ == "__main__": | 898 if __name__ == "__main__": |
882 parser = argparse.ArgumentParser(description="", epilog="") | 899 parser = argparse.ArgumentParser(description="", epilog="") |
883 parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration") | 900 parser.add_argument("--xml", help="Track Configuration") |
884 | 901 parser.add_argument("--jbrowse", help="Output from 'which jbrowse'") |
885 parser.add_argument("--jbrowse", help="Folder containing a jbrowse release") | |
886 parser.add_argument("--outdir", help="Output directory", default="out") | 902 parser.add_argument("--outdir", help="Output directory", default="out") |
887 parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0") | 903 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1") |
888 args = parser.parse_args() | 904 args = parser.parse_args() |
889 | 905 |
890 tree = ET.parse(args.xml.name) | 906 tree = ET.parse(args.xml) |
891 root = tree.getroot() | 907 root = tree.getroot() |
892 | 908 |
893 # This should be done ASAP | 909 # This should be done ASAP |
894 GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text | 910 GALAXY_INFRASTRUCTURE_URL = root.find("metadata/galaxyUrl").text |
895 # Sometimes this comes as `localhost` without a protocol | 911 # Sometimes this comes as `localhost` without a protocol |
896 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): | 912 if not GALAXY_INFRASTRUCTURE_URL.startswith("http"): |
897 # so we'll prepend `http://` and hope for the best. Requests *should* | 913 # so we'll prepend `http://` and hope for the best. Requests *should* |
898 # be GET and not POST so it should redirect OK | 914 # be GET and not POST so it should redirect OK |
899 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL | 915 GALAXY_INFRASTRUCTURE_URL = "http://" + GALAXY_INFRASTRUCTURE_URL |
900 | |
901 jc = JbrowseConnector( | 916 jc = JbrowseConnector( |
902 jbrowse=args.jbrowse, | |
903 outdir=args.outdir, | 917 outdir=args.outdir, |
904 genomes=[ | 918 genomes=[ |
905 { | 919 { |
906 "path": os.path.realpath(x.attrib["path"]), | 920 "path": os.path.realpath(x.attrib["path"]), |
907 "meta": metadata_from_node(x.find("metadata")), | 921 "meta": metadata_from_node(x.find("metadata")), |