Mercurial > repos > fubar > jbrowse2dev
comparison jbrowse2/jbrowse2.py @ 5:42ca8804cd93 draft
urls in minimal, bloat in complete
author | fubar |
---|---|
date | Thu, 04 Jan 2024 02:18:18 +0000 |
parents | 22e3d068fdc9 |
children | 88b9b105c09b |
comparison
equal
deleted
inserted
replaced
4:52842c3f2dda | 5:42ca8804cd93 |
---|---|
185 # We only expect one input genome per run. This for loop is just | 185 # We only expect one input genome per run. This for loop is just |
186 # easier to write than the alternative / catches any possible | 186 # easier to write than the alternative / catches any possible |
187 # issues. | 187 # issues. |
188 genome_name = genome_node["meta"]["dataset_dname"] | 188 genome_name = genome_node["meta"]["dataset_dname"] |
189 dsId = genome_node["meta"]["dataset_id"] | 189 dsId = genome_node["meta"]["dataset_id"] |
190 fapath = genome_node["path"] | |
190 faname = genome_name + ".fasta" | 191 faname = genome_name + ".fasta" |
191 faurl = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId) | |
192 fapath = genome_node["path"] | |
193 faind = os.path.realpath(os.path.join(self.outdir, faname + ".fai")) | 192 faind = os.path.realpath(os.path.join(self.outdir, faname + ".fai")) |
193 if self.standalone == "complete": | |
194 faurl = faname | |
195 fadest = os.path.realpath(os.path.join(self.outdir, faname)) | |
196 cmd = ["cp", fapath, fadest] | |
197 self.subprocess_check_call(cmd) | |
198 else: | |
199 faurl = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId) | |
194 cmd = ["samtools", "faidx", fapath, "--fai-idx", faind] | 200 cmd = ["samtools", "faidx", fapath, "--fai-idx", faind] |
195 self.subprocess_check_call(cmd) | 201 self.subprocess_check_call(cmd) |
196 trackDict = { | 202 trackDict = { |
197 "name": genome_name, | 203 "name": genome_name, |
198 "sequence": { | 204 "sequence": { |
249 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi', | 255 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi', |
250 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438 | 256 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438 |
251 """ | 257 """ |
252 log.info("#### trackData=%s" % trackData) | 258 log.info("#### trackData=%s" % trackData) |
253 tId = trackData["label"] | 259 tId = trackData["label"] |
260 dsId = trackData["metadata"]["dataset_id"] | |
254 url = "%s/api/datasets/%s/display?to_ext=hic " % ( | 261 url = "%s/api/datasets/%s/display?to_ext=hic " % ( |
255 self.giURL, | 262 self.giURL, |
256 trackData["metadata"]["dataset_id"], | 263 dsId, |
257 ) | 264 ) |
265 hname = trackData["name"] | |
266 if self.standalone == "complete": | |
267 dest = os.path.realpath(os.path.join(self.outdir, hname)) | |
268 url = hname | |
269 cmd = ["cp", data, dest] | |
270 self.subprocess_check_call(cmd) | |
271 else: | |
272 url = "%s/api/datasets/%s/display?to_ext=hic" % (self.giURL, dsId) | |
258 trackDict = { | 273 trackDict = { |
259 "type": "HicTrack", | 274 "type": "HicTrack", |
260 "trackId": tId, | 275 "trackId": tId, |
261 "name": trackData["name"], | 276 "name": hname, |
262 "assemblyNames": [self.genome_name], | 277 "assemblyNames": [self.genome_name], |
263 "adapter": { | 278 "adapter": { |
264 "type": "HicAdapter", | 279 "type": "HicAdapter", |
265 "hicLocation": {"uri": url, "locationType": "UriLocation"}, | 280 "hicLocation": {"uri": url, "locationType": "UriLocation"}, |
266 }, | 281 }, |
391 self.tracksToAdd.append(trackDict) | 406 self.tracksToAdd.append(trackDict) |
392 self.trackIdlist.append(tId) | 407 self.trackIdlist.append(tId) |
393 os.unlink(gff3) | 408 os.unlink(gff3) |
394 | 409 |
395 def add_bigwig(self, data, trackData): | 410 def add_bigwig(self, data, trackData): |
396 url = "%s/api/datasets/%s/display" % ( | 411 fname = trackData["name"] |
397 self.giURL, | 412 if self.standalone == "complete": |
398 trackData["metadata"]["dataset_id"], | 413 dest = os.path.realpath(os.path.join(self.outdir, fname)) |
399 ) | 414 url = fname |
415 cmd = ["cp", data, dest] | |
416 self.subprocess_check_call(cmd) | |
417 else: | |
418 dsId = trackData["metadata"]["dataset_id"] | |
419 url = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId) | |
400 tId = trackData["label"] | 420 tId = trackData["label"] |
401 trackDict = { | 421 trackDict = { |
402 "type": "QuantitativeTrack", | 422 "type": "QuantitativeTrack", |
403 "trackId": tId, | 423 "trackId": tId, |
404 "name": trackData["name"], | 424 "name": fname, |
405 "assemblyNames": [ | 425 "assemblyNames": [ |
406 self.genome_name, | 426 self.genome_name, |
407 ], | 427 ], |
408 "adapter": { | 428 "adapter": { |
409 "type": "BigWigAdapter", | 429 "type": "BigWigAdapter", |
419 self.tracksToAdd.append(trackDict) | 439 self.tracksToAdd.append(trackDict) |
420 self.trackIdlist.append(tId) | 440 self.trackIdlist.append(tId) |
421 | 441 |
422 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): | 442 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): |
423 tId = trackData["label"] | 443 tId = trackData["label"] |
424 url = "%s.bam" % trackData["label"] | 444 fname = "%s.bam" % trackData["label"] |
425 dest = os.path.realpath("%s/%s" % (self.outdir, url)) | 445 dest = os.path.realpath("%s/%s" % (self.outdir, fname)) |
426 self.symlink_or_copy(os.path.realpath(data), dest) | 446 if self.standalone == "minimal": |
447 dsId = trackData["metadata"]["dataset_id"] | |
448 url = "%s/api/datasets/%s/display?to_ext=bam" % (self.giURL, dsId) | |
449 else: | |
450 url = fname | |
451 self.symlink_or_copy(data, dest) | |
427 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): | 452 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): |
428 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | 453 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest |
429 self.subprocess_check_call( | 454 self.subprocess_check_call( |
430 ["cp", os.path.realpath(bam_index), dest + ".bai"] | 455 ["cp", os.path.realpath(bam_index), dest + ".bai"] |
431 ) | 456 ) |
445 "assemblyNames": [self.genome_name], | 470 "assemblyNames": [self.genome_name], |
446 "adapter": { | 471 "adapter": { |
447 "type": "BamAdapter", | 472 "type": "BamAdapter", |
448 "bamLocation": {"locationType": "UriLocation", "uri": url}, | 473 "bamLocation": {"locationType": "UriLocation", "uri": url}, |
449 "index": { | 474 "index": { |
450 "location": {"locationType": "UriLocation", "uri": url + ".bai"} | 475 "location": {"locationType": "UriLocation", "uri": fname + ".bai"} |
451 }, | 476 }, |
452 "sequenceAdapter": { | 477 "sequenceAdapter": { |
453 "type": "IndexedFastaAdapter", | 478 "type": "IndexedFastaAdapter", |
454 "fastaLocation": { | 479 "fastaLocation": { |
455 "locationType": "UriLocation", | 480 "locationType": "UriLocation", |
473 tId = trackData["label"] | 498 tId = trackData["label"] |
474 url = "%s/api/datasets/%s/display" % ( | 499 url = "%s/api/datasets/%s/display" % ( |
475 self.giURL, | 500 self.giURL, |
476 trackData["metadata"]["dataset_id"], | 501 trackData["metadata"]["dataset_id"], |
477 ) | 502 ) |
478 | |
479 url = "%s.vcf.gz" % tId | 503 url = "%s.vcf.gz" % tId |
480 dest = os.path.realpath("%s/%s" % (self.outdir, url)) | 504 dest = os.path.realpath("%s/%s" % (self.outdir, url)) |
481 cmd = "bgzip -c %s > %s" % (data, dest) | 505 cmd = "bgzip -c %s > %s" % (data, dest) |
482 self.subprocess_popen(cmd) | 506 self.subprocess_popen(cmd) |
483 cmd = ["tabix", "-p", "vcf", dest] | 507 cmd = ["tabix", "-p", "vcf", dest] |
523 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"]) | 547 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"]) |
524 | 548 |
525 def _sort_bed(self, data, dest): | 549 def _sort_bed(self, data, dest): |
526 # Only index if not already done | 550 # Only index if not already done |
527 if not os.path.exists(dest): | 551 if not os.path.exists(dest): |
528 cmd = ["sort", "-k1,1", "-k2,2n", data] | 552 cmd = "sort -k1,1 -k2,2n %s | bgzip -c > %s" % (data, dest) |
529 with open(dest, "w") as handle: | 553 self.subprocess_popen(cmd) |
530 self.subprocess_check_call(cmd, output=handle) | 554 cmd = ["tabix", "-f", "-p", "bed", dest] |
531 | 555 self.subprocess_check_call(cmd) |
532 self.subprocess_check_call(["bgzip", "-f", dest]) | |
533 self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"]) | |
534 | 556 |
535 def add_gff(self, data, ext, trackData): | 557 def add_gff(self, data, ext, trackData): |
536 url = "%s.%s" % (trackData["label"], ext) | 558 url = "%s.%s" % (trackData["label"], ext) |
537 dest = os.path.realpath("%s/%s" % (self.outdir, url)) | 559 dest = os.path.realpath("%s/%s" % (self.outdir, url)) |
538 self._sort_gff(data, dest) | 560 self._sort_gff(data, dest) |
561 self.tracksToAdd.append(trackDict) | 583 self.tracksToAdd.append(trackDict) |
562 self.trackIdlist.append(tId) | 584 self.trackIdlist.append(tId) |
563 | 585 |
564 def add_bed(self, data, ext, trackData): | 586 def add_bed(self, data, ext, trackData): |
565 url = "%s.%s" % (trackData["label"], ext) | 587 url = "%s.%s" % (trackData["label"], ext) |
566 dest = os.path.realpath("%s/%s" % (self.outdir, url)) | 588 dest = os.path.realpath("%s/%s.gz" % (self.outdir, url)) |
567 self._sort_bed(data, dest) | 589 self._sort_bed(data, dest) |
568 tId = trackData["label"] | 590 tId = trackData["label"] |
569 url = url + ".gz" | 591 url = url + ".gz" |
570 trackDict = { | 592 trackDict = { |
571 "type": "FeatureTrack", | 593 "type": "FeatureTrack", |