comparison jbrowse2.py @ 98:b1260bca5fdc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 44d8fc559ecf5463a8f753561976fa26686c96f6
author bgruening
date Wed, 05 Jun 2024 10:00:07 +0000
parents 74074746ccd8
children 990291e918c7
comparison
equal deleted inserted replaced
97:74074746ccd8 98:b1260bca5fdc
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import argparse 3 import argparse
4 import binascii 4 import binascii
5 import datetime 5 import datetime
6 # import hashlib
6 import json 7 import json
7 import logging 8 import logging
8 import os 9 import os
9 import re 10 import re
10 import shutil 11 import shutil
18 19
19 logging.basicConfig(level=logging.DEBUG) 20 logging.basicConfig(level=logging.DEBUG)
20 log = logging.getLogger("jbrowse") 21 log = logging.getLogger("jbrowse")
21 22
22 JB2VER = "v2.11.0" 23 JB2VER = "v2.11.0"
23 # version pinned if cloning - but not cloning now 24 # version pinned if cloning - but not used until now
24 logCommands = True 25 logCommands = True
25 # useful for seeing what's being written but NOT for production setups 26 # useful for seeing what's being written but not for production setups
26 TODAY = datetime.datetime.now().strftime("%Y-%m-%d") 27 TODAY = datetime.datetime.now().strftime("%Y-%m-%d")
27 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__)) 28 SELF_LOCATION = os.path.dirname(os.path.realpath(__file__))
28 GALAXY_INFRASTRUCTURE_URL = None 29 GALAXY_INFRASTRUCTURE_URL = None
29 mapped_chars = { 30 mapped_chars = {
30 ">": "__gt__", 31 ">": "__gt__",
37 "}": "__cc__", 38 "}": "__cc__",
38 "@": "__at__", 39 "@": "__at__",
39 "#": "__pd__", 40 "#": "__pd__",
40 "": "__cn__", 41 "": "__cn__",
41 } 42 }
43
44
45 INDEX_TEMPLATE = """<!doctype html>
46 <html lang="en" style="height:100%">
47 <head>
48 <meta charset="utf-8"/>
49 <link rel="shortcut icon" href="./favicon.ico"/>
50 <meta name="viewport" content="width=device-width,initial-scale=1"/>
51 <meta name="theme-color" content="#000000"/>
52 <meta name="description" content="A fast and flexible genome browser"/>
53 <link rel="manifest" href="./manifest.json"/>
54 <title>JBrowse</title>
55 </script>
56 </head>
57 <body style="overscroll-behavior:none; height:100%; margin: 0;">
58 <iframe
59 id="jbframe"
60 title="JBrowse2"
61 frameborder="0"
62 width="100%"
63 height="100%"
64 src='index_noview.html?config=config.json__SESSION_SPEC__'>
65 </iframe>
66 </body>
67 </html>
68 """
42 69
43 70
44 class ColorScaling(object): 71 class ColorScaling(object):
45 72
46 COLOR_FUNCTION_TEMPLATE = """ 73 COLOR_FUNCTION_TEMPLATE = """
396 def get_cwd(self, cwd): 423 def get_cwd(self, cwd):
397 if cwd: 424 if cwd:
398 return self.outdir 425 return self.outdir
399 else: 426 else:
400 return subprocess.check_output(["pwd"]).decode("utf-8").strip() 427 return subprocess.check_output(["pwd"]).decode("utf-8").strip()
401 # return None
402 428
403 def subprocess_check_call(self, command, output=None, cwd=True): 429 def subprocess_check_call(self, command, output=None, cwd=True):
404 if output: 430 if output:
405 if logCommands: 431 if logCommands:
406 log.debug( 432 log.debug(
427 retcode = p.returncode 453 retcode = p.returncode
428 if retcode != 0: 454 if retcode != 0:
429 log.error(command) 455 log.error(command)
430 log.error(output) 456 log.error(output)
431 log.error(err) 457 log.error(err)
432 raise RuntimeError("Command failed with exit code %s" % (retcode)) 458 raise RuntimeError(f"Command ( {command} ) failed with exit code {retcode}")
433 459
434 def subprocess_check_output(self, command): 460 def subprocess_check_output(self, command):
435 if logCommands: 461 if logCommands:
436 log.debug(" ".join(command)) 462 log.debug(" ".join(command))
437 return subprocess.check_output(command, cwd=self.outdir) 463 return subprocess.check_output(command, cwd=self.outdir)
471 with urllib.request.urlopen(url, context=scontext) as f: 497 with urllib.request.urlopen(url, context=scontext) as f:
472 fl = f.readlines() 498 fl = f.readlines()
473 nrow = len(fl) 499 nrow = len(fl)
474 except Exception: 500 except Exception:
475 nrow = 0 501 nrow = 0
476 logging.debug("### getNrow %s returning %d" % (url, nrow)) 502 logging.debug("getNrow %s returning %d" % (url, nrow))
477 return nrow 503 return nrow
478 504
479 def process_genomes(self, genomes): 505 def process_genomes(self, genomes):
480 assembly = [] 506 assembly = []
481 assmeta = [] 507 assmeta = []
512 self.assmeta[primaryGenome] = assmeta 538 self.assmeta[primaryGenome] = assmeta
513 self.tracksToAdd[primaryGenome] = [] 539 self.tracksToAdd[primaryGenome] = []
514 return primaryGenome 540 return primaryGenome
515 541
516 def make_assembly(self, fapath, gname, useuri): 542 def make_assembly(self, fapath, gname, useuri):
517 """added code to grab the first contig name and length for broken default session from Anthony and Helena's code
518 that poor Bjoern is trying to figure out.
519 """
520 if useuri: 543 if useuri:
521 faname = fapath 544 faname = fapath
522 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) 545 scontext = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
523 scontext.check_hostname = False 546 scontext.check_hostname = False
524 scontext.verify_mode = ssl.VerifyMode.CERT_NONE 547 scontext.verify_mode = ssl.VerifyMode.CERT_NONE
673 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", 696 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js",
674 } 697 }
675 ] 698 ]
676 } 699 }
677 categ = trackData["category"] 700 categ = trackData["category"]
678 fname = tId 701 fname = f"{tId}"
679 dest = "%s/%s" % (self.outdir, fname) 702 dest = os.path.join(self.outdir, fname)
680 gname = trackData["assemblyNames"] 703 gname = trackData["assemblyNames"]
681 704
682 cmd = [ 705 cmd = [
683 "bash", 706 "bash",
684 os.path.join(INSTALLED_TO, "convertMAF.sh"), 707 os.path.join(INSTALLED_TO, "convertMAF.sh"),
929 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) 952 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict)
930 self.trackIdlist.append(tId) 953 self.trackIdlist.append(tId)
931 954
932 def add_vcf(self, data, trackData): 955 def add_vcf(self, data, trackData):
933 tId = trackData["label"] 956 tId = trackData["label"]
934 # url = "%s/api/datasets/%s/display" % (
935 # self.giURL,
936 # trackData["metadata"]["dataset_id"],
937 # )
938 categ = trackData["category"] 957 categ = trackData["category"]
939 useuri = trackData["useuri"].lower() == "yes" 958 useuri = trackData["useuri"].lower() == "yes"
940 if useuri: 959 if useuri:
941 url = data 960 url = data
942 else: 961 else:
1193 track_human_label = track_human_label.replace(" ", "_") 1212 track_human_label = track_human_label.replace(" ", "_")
1194 outputTrackConfig = { 1213 outputTrackConfig = {
1195 "category": category, 1214 "category": category,
1196 "style": {}, 1215 "style": {},
1197 } 1216 }
1217
1218 # hashData = [
1219 # str(dataset_path),
1220 # track_human_label,
1221 # track["category"],
1222 # ]
1223 # hashData = "|".join(hashData).encode("utf-8")
1224 # hash_string = hashlib.md5(hashData).hexdigest()
1225
1198 outputTrackConfig["assemblyNames"] = track["assemblyNames"] 1226 outputTrackConfig["assemblyNames"] = track["assemblyNames"]
1199 outputTrackConfig["key"] = track_human_label 1227 outputTrackConfig["key"] = track_human_label
1200 outputTrackConfig["useuri"] = useuri 1228 outputTrackConfig["useuri"] = useuri
1201 outputTrackConfig["path"] = dataset_path 1229 outputTrackConfig["path"] = dataset_path
1202 outputTrackConfig["ext"] = dataset_ext 1230 outputTrackConfig["ext"] = dataset_ext
1203
1204 outputTrackConfig["trackset"] = track.get("trackset", {}) 1231 outputTrackConfig["trackset"] = track.get("trackset", {})
1205 outputTrackConfig["label"] = track["label"] 1232 outputTrackConfig["label"] = track["label"]
1233 # outputTrackConfig["label"] = "%s_%i_%s_%s" % (
1234 # dataset_ext,
1235 # trackIndex,
1236 # track_human_label,
1237 # hash_string,
1238 # )
1239
1206 outputTrackConfig["metadata"] = extra_metadata 1240 outputTrackConfig["metadata"] = extra_metadata
1207 outputTrackConfig["name"] = track_human_label 1241 outputTrackConfig["name"] = track_human_label
1208 if track["label"] in self.trackIdlist: 1242 if track["label"] in self.trackIdlist:
1209 logging.error( 1243 logging.error(
1210 "### not adding %s already in %s" 1244 "### not adding %s already in %s"
1235 "hic", 1269 "hic",
1236 dataset_path, 1270 dataset_path,
1237 hic_path, 1271 hic_path,
1238 ] 1272 ]
1239 ) 1273 )
1240 logging.debug(
1241 "### ext=cool: wasCool=%s, hic_path=%s"
1242 % (outputTrackConfig["wasCool"], hic_path)
1243 )
1244 self.add_hic( 1274 self.add_hic(
1245 hic_path, 1275 hic_path,
1246 outputTrackConfig, 1276 outputTrackConfig,
1247 ) 1277 )
1248 elif dataset_ext in ("bed",): 1278 elif dataset_ext in ("bed",):
1261 dataset_path, 1291 dataset_path,
1262 outputTrackConfig, 1292 outputTrackConfig,
1263 ) 1293 )
1264 elif dataset_ext == "bam": 1294 elif dataset_ext == "bam":
1265 real_indexes = track["conf"]["options"]["bam"]["bam_index"] 1295 real_indexes = track["conf"]["options"]["bam"]["bam_index"]
1266 logging.debug("**** add bam got %s for indexes" % real_indexes)
1267 self.add_bam( 1296 self.add_bam(
1268 dataset_path, 1297 dataset_path,
1269 outputTrackConfig, 1298 outputTrackConfig,
1270 bam_indexes=real_indexes, 1299 bam_indexes=real_indexes,
1271 ) 1300 )
1272 elif dataset_ext == "cram": 1301 elif dataset_ext == "cram":
1273 real_indexes = track["conf"]["options"]["cram"]["cram_index"] 1302 real_indexes = track["conf"]["options"]["cram"]["cram_index"]
1274 logging.debug("**** add cram got %s for indexes" % real_indexes)
1275 self.add_cram( 1303 self.add_cram(
1276 dataset_path, 1304 dataset_path,
1277 outputTrackConfig, 1305 outputTrackConfig,
1278 cram_indexes=real_indexes, 1306 cram_indexes=real_indexes,
1279 ) 1307 )
1290 dataset_path, 1318 dataset_path,
1291 outputTrackConfig, 1319 outputTrackConfig,
1292 track["conf"]["options"]["paf"], 1320 track["conf"]["options"]["paf"],
1293 ) 1321 )
1294 else: 1322 else:
1295 logging.warn("Do not know how to handle %s", dataset_ext) 1323 logging.warning("Do not know how to handle %s", dataset_ext)
1296 # Return non-human label for use in other fields 1324 # Return non-human label for use in other fields
1297 yield outputTrackConfig["label"] 1325 yield outputTrackConfig["label"]
1298 1326
1299 def add_default_session(self, default_data): 1327 def add_default_session(self, default_data):
1300 """ 1328 """
1301 default session settings are hard and fragile. 1329 default session settings are hard and fragile.
1302 .add_default_view() and other configuration code adapted from 1330 .add_default_view() and other configuration code adapted from
1303 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py 1331 https://github.com/abretaud/tools-iuc/blob/jbrowse2/tools/jbrowse2/jbrowse2.py
1304 """ 1332 """
1305 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708 1333 # TODO using the default session for now, but check out session specs in the future https://github.com/GMOD/jbrowse-components/issues/2708
1306 bpPerPx = 50 # this is tricky since browser window width is unknown - this seems a compromise that sort of works....
1307 track_types = {} 1334 track_types = {}
1308 with open(self.config_json_file, "r") as config_file: 1335 with open(self.config_json_file, "r") as config_file:
1309 config_json = json.load(config_file) 1336 config_json = json.load(config_file)
1310 if self.config_json: 1337 if self.config_json:
1311 config_json.update(self.config_json) 1338 config_json.update(self.config_json)
1340 "type": track_types[tId], 1367 "type": track_types[tId],
1341 "configuration": tId, 1368 "configuration": tId,
1342 "displays": [style_data], 1369 "displays": [style_data],
1343 } 1370 }
1344 ) 1371 )
1372 view_json = {
1373 "type": "LinearGenomeView",
1374 "offsetPx": 0,
1375 "minimized": False,
1376 "tracks": tracks_data,
1377 }
1345 first = [x for x in self.ass_first_contigs if x[0] == gnome] 1378 first = [x for x in self.ass_first_contigs if x[0] == gnome]
1346 drdict = { 1379 if len(first) > 0:
1380 [gnome, refName, end] = first[0]
1381 start = 0
1382 end = int(end)
1383 drdict = {
1384 "refName": refName,
1385 "start": start,
1386 "end": end,
1347 "reversed": False, 1387 "reversed": False,
1348 "assemblyName": gnome, 1388 "assemblyName": gnome,
1349 } 1389 }
1350 if len(first) > 0:
1351 [gnome, refName, end] = first[0]
1352 drdict["refName"] = refName
1353 drdict["start"] = 0
1354 end = int(end)
1355 drdict["end"] = end
1356 else: 1390 else:
1357 ddl = default_data.get("defaultLocation", None) 1391 ddl = default_data.get("defaultLocation", None)
1358 if ddl: 1392 if ddl:
1359 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) 1393 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl)
1360 # allow commas like 100,000 but ignore as integer 1394 # allow commas like 100,000 but ignore as integer
1368 else: 1402 else:
1369 logging.info( 1403 logging.info(
1370 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix" 1404 "@@@ regexp could not match contig:start..end in the supplied location %s - please fix"
1371 % ddl 1405 % ddl
1372 ) 1406 )
1373 view_json = {
1374 "type": "LinearGenomeView",
1375 "offsetPx": 0,
1376 "bpPerPx" : bpPerPx,
1377 "minimized": False,
1378 "tracks": tracks_data
1379 }
1380 if drdict.get("refName", None): 1407 if drdict.get("refName", None):
1381 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome 1408 # TODO displayedRegions is not just zooming to the region, it hides the rest of the chromosome
1382 view_json["displayedRegions"] = [ 1409 view_json["displayedRegions"] = [
1383 drdict, 1410 drdict,
1384 ] 1411 ]
1390 session_views.append(view_json) 1417 session_views.append(view_json)
1391 session_name = default_data.get("session_name", "New session") 1418 session_name = default_data.get("session_name", "New session")
1392 for key, value in mapped_chars.items(): 1419 for key, value in mapped_chars.items():
1393 session_name = session_name.replace(value, key) 1420 session_name = session_name.replace(value, key)
1394 session_json["name"] = session_name 1421 session_json["name"] = session_name
1422
1395 if "views" not in session_json: 1423 if "views" not in session_json:
1396 session_json["views"] = session_views 1424 session_json["views"] = session_views
1397 else: 1425 else:
1398 session_json["views"] += session_views 1426 session_json["views"] += session_views
1427
1399 pp = json.dumps(session_views, indent=2) 1428 pp = json.dumps(session_views, indent=2)
1400 config_json["defaultSession"] = session_json 1429 config_json["defaultSession"] = session_json
1401 self.config_json.update(config_json) 1430 self.config_json.update(config_json)
1402 logging.debug("defaultSession=%s" % (pp)) 1431 logging.debug("defaultSession=%s" % (pp))
1403 with open(self.config_json_file, "w") as config_file: 1432 with open(self.config_json_file, "w") as config_file:
1428 Xrefs to understand the choices: 1457 Xrefs to understand the choices:
1429 https://github.com/GMOD/jbrowse-components/issues/2708 1458 https://github.com/GMOD/jbrowse-components/issues/2708
1430 https://github.com/GMOD/jbrowse-components/discussions/3568 1459 https://github.com/GMOD/jbrowse-components/discussions/3568
1431 https://github.com/GMOD/jbrowse-components/pull/4148 1460 https://github.com/GMOD/jbrowse-components/pull/4148
1432 """ 1461 """
1433
1434
1435 INDEX_TEMPLATE = """<!doctype html>
1436 <html lang="en" style="height:100%">
1437 <head>
1438 <meta charset="utf-8"/>
1439 <link rel="shortcut icon" href="./favicon.ico"/>
1440 <meta name="viewport" content="width=device-width,initial-scale=1"/>
1441 <meta name="theme-color" content="#000000"/>
1442 <meta name="description" content="A fast and flexible genome browser"/>
1443 <link rel="manifest" href="./manifest.json"/>
1444 <title>JBrowse</title>
1445 </script>
1446 </head>
1447 <body style="overscroll-behavior:none; height:100%; margin: 0;">
1448 <iframe
1449 id="jbframe"
1450 title="JBrowse2"
1451 frameborder="0"
1452 width="100%"
1453 height="100%"
1454 src='index_noview.html?config=config.json__SESSION_SPEC__'>
1455 </iframe>
1456 </body>
1457 </html>
1458 """
1459
1460 new_index = "Nothing written" 1462 new_index = "Nothing written"
1461 session_spec = {"views": []} 1463 session_spec = {"views": []}
1462 logging.debug("def ass_first=%s\ndata=%s" % (self.ass_first_contigs, data)) 1464 logging.debug("def ass_first=%s\ndata=%s" % (self.ass_first_contigs, data))
1463 for first_contig in self.ass_first_contigs: 1465 for first_contig in self.ass_first_contigs:
1464 logging.debug("first contig=%s" % self.ass_first_contigs) 1466 logging.debug("first contig=%s" % self.ass_first_contigs)
1520 self.config_json.update(config_json) 1522 self.config_json.update(config_json)
1521 with open(config_path, "w") as config_file: 1523 with open(config_path, "w") as config_file:
1522 json.dump(self.config_json, config_file, indent=2) 1524 json.dump(self.config_json, config_file, indent=2)
1523 1525
1524 def clone_jbrowse(self, realclone=False): 1526 def clone_jbrowse(self, realclone=False):
1525 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now 1527 """
1526 Leave as True between version updates on temporary tools - requires manual conda trigger :( 1528 Clone a JBrowse directory into a destination directory.
1529
1530 `realclone=true` will use the `jbrowse create` command.
1531 To allow running on internet-less compute and for reproducibility
1532 use frozen code with `realclone=false
1533
1527 """ 1534 """
1528 dest = self.outdir 1535 dest = self.outdir
1529 if realclone: 1536 if realclone:
1530 self.subprocess_check_call( 1537 self.subprocess_check_call(
1531 ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"] 1538 ["jbrowse", "create", dest, "-f", "--tag", f"{JB2VER}"]