comparison jbrowse2/jbrowse2.py @ 5:42ca8804cd93 draft

urls in minimal, bloat in complete
author fubar
date Thu, 04 Jan 2024 02:18:18 +0000
parents 22e3d068fdc9
children 88b9b105c09b
comparison
equal deleted inserted replaced
4:52842c3f2dda 5:42ca8804cd93
185 # We only expect one input genome per run. This for loop is just 185 # We only expect one input genome per run. This for loop is just
186 # easier to write than the alternative / catches any possible 186 # easier to write than the alternative / catches any possible
187 # issues. 187 # issues.
188 genome_name = genome_node["meta"]["dataset_dname"] 188 genome_name = genome_node["meta"]["dataset_dname"]
189 dsId = genome_node["meta"]["dataset_id"] 189 dsId = genome_node["meta"]["dataset_id"]
190 fapath = genome_node["path"]
190 faname = genome_name + ".fasta" 191 faname = genome_name + ".fasta"
191 faurl = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId)
192 fapath = genome_node["path"]
193 faind = os.path.realpath(os.path.join(self.outdir, faname + ".fai")) 192 faind = os.path.realpath(os.path.join(self.outdir, faname + ".fai"))
193 if self.standalone == "complete":
194 faurl = faname
195 fadest = os.path.realpath(os.path.join(self.outdir, faname))
196 cmd = ["cp", fapath, fadest]
197 self.subprocess_check_call(cmd)
198 else:
199 faurl = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId)
194 cmd = ["samtools", "faidx", fapath, "--fai-idx", faind] 200 cmd = ["samtools", "faidx", fapath, "--fai-idx", faind]
195 self.subprocess_check_call(cmd) 201 self.subprocess_check_call(cmd)
196 trackDict = { 202 trackDict = {
197 "name": genome_name, 203 "name": genome_name,
198 "sequence": { 204 "sequence": {
249 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi', 255 uri: 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/fasta/GRCh38.fa.gz.gzi',
250 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438 256 Cool will not be likely to be a good fit - see discussion at https://github.com/GMOD/jbrowse-components/issues/2438
251 """ 257 """
252 log.info("#### trackData=%s" % trackData) 258 log.info("#### trackData=%s" % trackData)
253 tId = trackData["label"] 259 tId = trackData["label"]
260 dsId = trackData["metadata"]["dataset_id"]
254 url = "%s/api/datasets/%s/display?to_ext=hic " % ( 261 url = "%s/api/datasets/%s/display?to_ext=hic " % (
255 self.giURL, 262 self.giURL,
256 trackData["metadata"]["dataset_id"], 263 dsId,
257 ) 264 )
265 hname = trackData["name"]
266 if self.standalone == "complete":
267 dest = os.path.realpath(os.path.join(self.outdir, hname))
268 url = hname
269 cmd = ["cp", data, dest]
270 self.subprocess_check_call(cmd)
271 else:
272 url = "%s/api/datasets/%s/display?to_ext=hic" % (self.giURL, dsId)
258 trackDict = { 273 trackDict = {
259 "type": "HicTrack", 274 "type": "HicTrack",
260 "trackId": tId, 275 "trackId": tId,
261 "name": trackData["name"], 276 "name": hname,
262 "assemblyNames": [self.genome_name], 277 "assemblyNames": [self.genome_name],
263 "adapter": { 278 "adapter": {
264 "type": "HicAdapter", 279 "type": "HicAdapter",
265 "hicLocation": {"uri": url, "locationType": "UriLocation"}, 280 "hicLocation": {"uri": url, "locationType": "UriLocation"},
266 }, 281 },
391 self.tracksToAdd.append(trackDict) 406 self.tracksToAdd.append(trackDict)
392 self.trackIdlist.append(tId) 407 self.trackIdlist.append(tId)
393 os.unlink(gff3) 408 os.unlink(gff3)
394 409
395 def add_bigwig(self, data, trackData): 410 def add_bigwig(self, data, trackData):
396 url = "%s/api/datasets/%s/display" % ( 411 fname = trackData["name"]
397 self.giURL, 412 if self.standalone == "complete":
398 trackData["metadata"]["dataset_id"], 413 dest = os.path.realpath(os.path.join(self.outdir, fname))
399 ) 414 url = fname
415 cmd = ["cp", data, dest]
416 self.subprocess_check_call(cmd)
417 else:
418 dsId = trackData["metadata"]["dataset_id"]
419 url = "%s/api/datasets/%s/display?to_ext=fasta" % (self.giURL, dsId)
400 tId = trackData["label"] 420 tId = trackData["label"]
401 trackDict = { 421 trackDict = {
402 "type": "QuantitativeTrack", 422 "type": "QuantitativeTrack",
403 "trackId": tId, 423 "trackId": tId,
404 "name": trackData["name"], 424 "name": fname,
405 "assemblyNames": [ 425 "assemblyNames": [
406 self.genome_name, 426 self.genome_name,
407 ], 427 ],
408 "adapter": { 428 "adapter": {
409 "type": "BigWigAdapter", 429 "type": "BigWigAdapter",
419 self.tracksToAdd.append(trackDict) 439 self.tracksToAdd.append(trackDict)
420 self.trackIdlist.append(tId) 440 self.trackIdlist.append(tId)
421 441
422 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): 442 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs):
423 tId = trackData["label"] 443 tId = trackData["label"]
424 url = "%s.bam" % trackData["label"] 444 fname = "%s.bam" % trackData["label"]
425 dest = os.path.realpath("%s/%s" % (self.outdir, url)) 445 dest = os.path.realpath("%s/%s" % (self.outdir, fname))
426 self.symlink_or_copy(os.path.realpath(data), dest) 446 if self.standalone == "minimal":
447 dsId = trackData["metadata"]["dataset_id"]
448 url = "%s/api/datasets/%s/display?to_ext=bam" % (self.giURL, dsId)
449 else:
450 url = fname
451 self.symlink_or_copy(data, dest)
427 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): 452 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)):
428 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest 453 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest
429 self.subprocess_check_call( 454 self.subprocess_check_call(
430 ["cp", os.path.realpath(bam_index), dest + ".bai"] 455 ["cp", os.path.realpath(bam_index), dest + ".bai"]
431 ) 456 )
445 "assemblyNames": [self.genome_name], 470 "assemblyNames": [self.genome_name],
446 "adapter": { 471 "adapter": {
447 "type": "BamAdapter", 472 "type": "BamAdapter",
448 "bamLocation": {"locationType": "UriLocation", "uri": url}, 473 "bamLocation": {"locationType": "UriLocation", "uri": url},
449 "index": { 474 "index": {
450 "location": {"locationType": "UriLocation", "uri": url + ".bai"} 475 "location": {"locationType": "UriLocation", "uri": fname + ".bai"}
451 }, 476 },
452 "sequenceAdapter": { 477 "sequenceAdapter": {
453 "type": "IndexedFastaAdapter", 478 "type": "IndexedFastaAdapter",
454 "fastaLocation": { 479 "fastaLocation": {
455 "locationType": "UriLocation", 480 "locationType": "UriLocation",
473 tId = trackData["label"] 498 tId = trackData["label"]
474 url = "%s/api/datasets/%s/display" % ( 499 url = "%s/api/datasets/%s/display" % (
475 self.giURL, 500 self.giURL,
476 trackData["metadata"]["dataset_id"], 501 trackData["metadata"]["dataset_id"],
477 ) 502 )
478
479 url = "%s.vcf.gz" % tId 503 url = "%s.vcf.gz" % tId
480 dest = os.path.realpath("%s/%s" % (self.outdir, url)) 504 dest = os.path.realpath("%s/%s" % (self.outdir, url))
481 cmd = "bgzip -c %s > %s" % (data, dest) 505 cmd = "bgzip -c %s > %s" % (data, dest)
482 self.subprocess_popen(cmd) 506 self.subprocess_popen(cmd)
483 cmd = ["tabix", "-p", "vcf", dest] 507 cmd = ["tabix", "-p", "vcf", dest]
523 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"]) 547 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"])
524 548
525 def _sort_bed(self, data, dest): 549 def _sort_bed(self, data, dest):
526 # Only index if not already done 550 # Only index if not already done
527 if not os.path.exists(dest): 551 if not os.path.exists(dest):
528 cmd = ["sort", "-k1,1", "-k2,2n", data] 552 cmd = "sort -k1,1 -k2,2n %s | bgzip -c > %s" % (data, dest)
529 with open(dest, "w") as handle: 553 self.subprocess_popen(cmd)
530 self.subprocess_check_call(cmd, output=handle) 554 cmd = ["tabix", "-f", "-p", "bed", dest]
531 555 self.subprocess_check_call(cmd)
532 self.subprocess_check_call(["bgzip", "-f", dest])
533 self.subprocess_check_call(["tabix", "-f", "-p", "bed", dest + ".gz"])
534 556
535 def add_gff(self, data, ext, trackData): 557 def add_gff(self, data, ext, trackData):
536 url = "%s.%s" % (trackData["label"], ext) 558 url = "%s.%s" % (trackData["label"], ext)
537 dest = os.path.realpath("%s/%s" % (self.outdir, url)) 559 dest = os.path.realpath("%s/%s" % (self.outdir, url))
538 self._sort_gff(data, dest) 560 self._sort_gff(data, dest)
561 self.tracksToAdd.append(trackDict) 583 self.tracksToAdd.append(trackDict)
562 self.trackIdlist.append(tId) 584 self.trackIdlist.append(tId)
563 585
564 def add_bed(self, data, ext, trackData): 586 def add_bed(self, data, ext, trackData):
565 url = "%s.%s" % (trackData["label"], ext) 587 url = "%s.%s" % (trackData["label"], ext)
566 dest = os.path.realpath("%s/%s" % (self.outdir, url)) 588 dest = os.path.realpath("%s/%s.gz" % (self.outdir, url))
567 self._sort_bed(data, dest) 589 self._sort_bed(data, dest)
568 tId = trackData["label"] 590 tId = trackData["label"]
569 url = url + ".gz" 591 url = url + ".gz"
570 trackDict = { 592 trackDict = {
571 "type": "FeatureTrack", 593 "type": "FeatureTrack",