Mercurial > repos > fubar > jbrowse2dev
comparison jbrowse2/jbrowse2.py @ 7:234cf4490901 draft
Uploaded
author | fubar |
---|---|
date | Fri, 05 Jan 2024 04:31:35 +0000 |
parents | 88b9b105c09b |
children | 1e6128ccc82b |
comparison
equal
deleted
inserted
replaced
6:88b9b105c09b | 7:234cf4490901 |
---|---|
106 ) | 106 ) |
107 return metadata | 107 return metadata |
108 | 108 |
109 | 109 |
110 class JbrowseConnector(object): | 110 class JbrowseConnector(object): |
111 def __init__(self, jbrowse, outdir, genomes, standalone=None): | 111 def __init__(self, jbrowse, outdir, genomes): |
112 self.debug = False | 112 self.debug = False |
113 self.usejson = True | 113 self.usejson = True |
114 self.giURL = GALAXY_INFRASTRUCTURE_URL | 114 self.giURL = GALAXY_INFRASTRUCTURE_URL |
115 self.jbrowse = jbrowse | 115 self.jbrowse = jbrowse |
116 self.outdir = outdir | 116 self.outdir = outdir |
117 os.makedirs(self.outdir, exist_ok=True) | 117 os.makedirs(self.outdir, exist_ok=True) |
118 self.genome_paths = genomes | 118 self.genome_paths = genomes |
119 self.standalone = standalone | |
120 self.trackIdlist = [] | 119 self.trackIdlist = [] |
121 self.tracksToAdd = [] | 120 self.tracksToAdd = [] |
122 self.config_json = {} | 121 self.config_json = {} |
123 self.config_json_file = os.path.realpath(os.path.join(outdir, "config.json")) | 122 self.config_json_file = os.path.realpath(os.path.join(outdir, "config.json")) |
124 self.clone_jbrowse(self.jbrowse, self.outdir) | 123 self.clone_jbrowse(self.jbrowse, self.outdir) |
172 assemblies = [] | 171 assemblies = [] |
173 for i, genome_node in enumerate(self.genome_paths): | 172 for i, genome_node in enumerate(self.genome_paths): |
174 if self.debug: | 173 if self.debug: |
175 log.info("genome_node=%s" % str(genome_node)) | 174 log.info("genome_node=%s" % str(genome_node)) |
176 genome_name = genome_node["meta"]["dataset_dname"] | 175 genome_name = genome_node["meta"]["dataset_dname"] |
177 dsId = genome_node["meta"]["dataset_id"] | |
178 fapath = genome_node["path"] | 176 fapath = genome_node["path"] |
179 if self.standalone == "complete": | 177 faname = genome_name + ".fa.gz" |
180 faname = genome_name + ".fa.gz" | 178 fadest = os.path.realpath(os.path.join(self.outdir, faname)) |
181 fadest = os.path.realpath(os.path.join(self.outdir, faname)) | 179 cmd = "bgzip -i -c %s > %s && samtools faidx %s" % ( |
182 cmd = "bgzip -i -c %s > %s && samtools faidx %s" % ( | 180 fapath, |
183 fapath, | 181 fadest, |
184 fadest, | 182 fadest, |
185 fadest, | 183 ) |
186 ) | 184 self.subprocess_popen(cmd) |
187 self.subprocess_popen(cmd) | 185 adapter = { |
188 adapter = { | 186 "type": "BgzipFastaAdapter", |
189 "type": "BgzipFastaAdapter", | 187 "fastaLocation": { |
190 "fastaLocation": { | |
191 "uri": faname, | |
192 }, | |
193 "faiLocation": { | |
194 "uri": faname + ".fai", | |
195 }, | |
196 "gziLocation": { | |
197 "uri": faname + ".gzi", | |
198 }, | |
199 } | |
200 else: | |
201 faurl = "%s/api/datasets/%s/display" % (self.giURL, dsId) | |
202 faname = genome_name + ".fa.fai" | |
203 fastalocation = { | |
204 "uri": faurl, | |
205 } | |
206 failocation = { | |
207 "uri": faname, | 188 "uri": faname, |
208 } | 189 }, |
209 adapter = { | 190 "faiLocation": { |
210 "type": "IndexedFastaAdapter", | 191 "uri": faname + ".fai", |
211 "fastaLocation": fastalocation, | 192 }, |
212 "faiLocation": failocation, | 193 "gziLocation": { |
213 } | 194 "uri": faname + ".gzi", |
214 | 195 }, |
215 cmd = ["samtools", "faidx", fapath, "--fai-idx", faname] | 196 } |
216 self.subprocess_check_call(cmd) | |
217 trackDict = { | 197 trackDict = { |
218 "name": genome_name, | 198 "name": genome_name, |
219 "sequence": { | 199 "sequence": { |
220 "type": "ReferenceSequenceTrack", | 200 "type": "ReferenceSequenceTrack", |
221 "trackId": genome_name, | 201 "trackId": genome_name, |
226 assemblies.append(trackDict) | 206 assemblies.append(trackDict) |
227 self.genome_name = genome_name | 207 self.genome_name = genome_name |
228 if self.usejson: | 208 if self.usejson: |
229 self.config_json["assemblies"] = assemblies | 209 self.config_json["assemblies"] = assemblies |
230 else: | 210 else: |
231 if self.standalone == "complete": | 211 cmd = [ |
232 cmd = [ | 212 "jbrowse", |
233 "jbrowse", | 213 "add-assembly", |
234 "add-assembly", | 214 faname, |
235 faname, | 215 "-t", |
236 "-t", | 216 "bgzipFasta", |
237 "bgzipFasta", | 217 "-n", |
238 "-n", | 218 genome_name, |
239 genome_name, | 219 "--load", |
240 "--load", | 220 "inPlace", |
241 "inPlace", | 221 "--faiLocation", |
242 "--faiLocation", | 222 faname + ".fai", |
243 faname + ".fai", | 223 "--gziLocation", |
244 "--gziLocation", | 224 faname + ".gzi", |
245 faname + ".gzi", | 225 "--target", |
246 "--target", | 226 self.outdir, |
247 self.outdir, | 227 ] |
248 ] | |
249 else: | |
250 cmd = [ | |
251 "jbrowse", | |
252 "add-assembly", | |
253 faname, | |
254 "-t", | |
255 "indexedFasta", | |
256 "-n", | |
257 genome_name, | |
258 "--load", | |
259 "inPlace", | |
260 "--faiLocation", | |
261 faname + ".fai", | |
262 "--target", | |
263 self.outdir, | |
264 ] | |
265 self.subprocess_check_call(cmd) | 228 self.subprocess_check_call(cmd) |
266 | 229 |
267 def add_default_view(self): | 230 def add_default_view(self): |
268 cmd = [ | 231 cmd = [ |
269 "jbrowse", | 232 "jbrowse", |
277 "--target", | 240 "--target", |
278 self.config_json_file, | 241 self.config_json_file, |
279 "-v", | 242 "-v", |
280 " LinearGenomeView", | 243 " LinearGenomeView", |
281 ] | 244 ] |
282 if True or self.debug: | 245 if self.debug: |
283 log.info("### calling set-default-session with cmd=%s" % " ".join(cmd)) | 246 log.info("### calling set-default-session with cmd=%s" % " ".join(cmd)) |
284 self.subprocess_check_call(cmd) | 247 self.subprocess_check_call(cmd) |
285 | 248 |
286 def write_config(self): | 249 def write_config(self): |
287 with open(self.config_json_file, "w") as fp: | 250 with open(self.config_json_file, "w") as fp: |
309 url = "%s/api/datasets/%s/display?to_ext=hic " % ( | 272 url = "%s/api/datasets/%s/display?to_ext=hic " % ( |
310 self.giURL, | 273 self.giURL, |
311 dsId, | 274 dsId, |
312 ) | 275 ) |
313 hname = trackData["name"] | 276 hname = trackData["name"] |
314 if self.standalone == "complete": | 277 dest = os.path.realpath(os.path.join(self.outdir, hname)) |
315 dest = os.path.realpath(os.path.join(self.outdir, hname)) | 278 url = hname |
316 url = hname | 279 cmd = ["cp", data, dest] |
317 cmd = ["cp", data, dest] | 280 self.subprocess_check_call(cmd) |
318 self.subprocess_check_call(cmd) | 281 floc = { |
319 floc = { | 282 "uri": hname, |
320 "uri": hname, | 283 } |
321 } | |
322 else: | |
323 url = "%s/api/datasets/%s/display?to_ext=hic" % (self.giURL, dsId) | |
324 floc = { | |
325 "uri": url, | |
326 } | |
327 trackDict = { | 284 trackDict = { |
328 "type": "HicTrack", | 285 "type": "HicTrack", |
329 "trackId": tId, | 286 "trackId": tId, |
330 "name": hname, | 287 "name": hname, |
331 "assemblyNames": [self.genome_name], | 288 "assemblyNames": [self.genome_name], |
507 self.subprocess_check_call(cmd) | 464 self.subprocess_check_call(cmd) |
508 os.unlink(gff3) | 465 os.unlink(gff3) |
509 | 466 |
510 def add_bigwig(self, data, trackData): | 467 def add_bigwig(self, data, trackData): |
511 url = "%s.bw" % trackData["name"] | 468 url = "%s.bw" % trackData["name"] |
512 if self.standalone == "complete": | 469 dest = os.path.realpath(os.path.join(self.outdir, url)) |
513 dest = os.path.realpath(os.path.join(self.outdir, url)) | 470 cmd = ["cp", data, dest] |
514 cmd = ["cp", data, dest] | 471 self.subprocess_check_call(cmd) |
515 self.subprocess_check_call(cmd) | 472 bwloc = {"uri": url} |
516 bwloc = {"uri": url} | |
517 else: | |
518 dsId = trackData["metadata"]["dataset_id"] | |
519 url = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId) | |
520 bwloc = {"uri": url} | |
521 tId = trackData["label"] | 473 tId = trackData["label"] |
522 trackDict = { | 474 trackDict = { |
523 "type": "QuantitativeTrack", | 475 "type": "QuantitativeTrack", |
524 "trackId": tId, | 476 "trackId": tId, |
525 "name": url, | 477 "name": url, |
560 | 512 |
561 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): | 513 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): |
562 tId = trackData["label"] | 514 tId = trackData["label"] |
563 fname = "%s.bam" % trackData["label"] | 515 fname = "%s.bam" % trackData["label"] |
564 dest = os.path.realpath("%s/%s" % (self.outdir, fname)) | 516 dest = os.path.realpath("%s/%s" % (self.outdir, fname)) |
565 if self.standalone == "complete": | 517 url = fname |
566 url = fname | 518 self.subprocess_check_call(["cp", data, dest]) |
567 self.subprocess_check_call(["cp", data, dest]) | 519 log.info("### copied %s to %s" % (data, dest)) |
568 log.info("### copied %s to %s" % (data, dest)) | 520 bloc = {"uri": url} |
569 bloc = {"uri": url} | |
570 else: | |
571 dsId = trackData["metadata"]["dataset_id"] | |
572 url = "%s/api/datasets/%s/display?to_ext=bam" % (self.giURL, dsId) | |
573 bloc = {"uri": url} | |
574 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): | 521 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): |
575 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | 522 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest |
576 self.subprocess_check_call( | 523 self.subprocess_check_call( |
577 ["cp", os.path.realpath(bam_index), dest + ".bai"] | 524 ["cp", os.path.realpath(bam_index), dest + ".bai"] |
578 ) | 525 ) |
924 "version.txt", | 871 "version.txt", |
925 "test_data", | 872 "test_data", |
926 ]: | 873 ]: |
927 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] | 874 cmd = ["rm", "-rf", os.path.join(self.outdir, fn)] |
928 self.subprocess_check_call(cmd) | 875 self.subprocess_check_call(cmd) |
876 cmd = ['cp', os.path.join(INSTALLED_TO, "servejb2.py"), self.outdir] | |
877 self.subprocess_check_call(cmd) | |
929 | 878 |
930 | 879 |
931 if __name__ == "__main__": | 880 if __name__ == "__main__": |
932 parser = argparse.ArgumentParser(description="", epilog="") | 881 parser = argparse.ArgumentParser(description="", epilog="") |
933 parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration") | 882 parser.add_argument("xml", type=argparse.FileType("r"), help="Track Configuration") |
934 | 883 |
935 parser.add_argument("--jbrowse", help="Folder containing a jbrowse release") | 884 parser.add_argument("--jbrowse", help="Folder containing a jbrowse release") |
936 parser.add_argument("--outdir", help="Output directory", default="out") | 885 parser.add_argument("--outdir", help="Output directory", default="out") |
937 parser.add_argument( | |
938 "--standalone", | |
939 choices=["complete", "minimal", "data"], | |
940 help="Standalone mode includes a copy of JBrowse", | |
941 ) | |
942 parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0") | 886 parser.add_argument("--version", "-V", action="version", version="%(prog)s 0.8.0") |
943 args = parser.parse_args() | 887 args = parser.parse_args() |
944 | 888 |
945 tree = ET.parse(args.xml.name) | 889 tree = ET.parse(args.xml.name) |
946 root = tree.getroot() | 890 root = tree.getroot() |
961 "path": os.path.realpath(x.attrib["path"]), | 905 "path": os.path.realpath(x.attrib["path"]), |
962 "meta": metadata_from_node(x.find("metadata")), | 906 "meta": metadata_from_node(x.find("metadata")), |
963 } | 907 } |
964 for x in root.findall("metadata/genomes/genome") | 908 for x in root.findall("metadata/genomes/genome") |
965 ], | 909 ], |
966 standalone=args.standalone, | |
967 ) | 910 ) |
968 jc.process_genomes() | 911 jc.process_genomes() |
969 | 912 |
970 for track in root.findall("tracks/track"): | 913 for track in root.findall("tracks/track"): |
971 track_conf = {} | 914 track_conf = {} |