comparison jbrowse2.py @ 104:9e3f69d9fed1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit a1537aea75fc902d0e38c0b7c698830a939648b1-dirty
author fubar
date Sun, 23 Jun 2024 00:13:57 +0000
parents 099370690407
children a074cd6b5905
comparison
equal deleted inserted replaced
103:dc3fe98e7b37 104:9e3f69d9fed1
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import argparse 3 import argparse
4 import binascii 4 import binascii
5 import copy
5 import datetime 6 import datetime
6 # import hashlib 7 # import hashlib
7 import json 8 import json
8 import logging 9 import logging
9 import os 10 import os
677 "category": [ 678 "category": [
678 categ, 679 categ,
679 ], 680 ],
680 "adapter": {"type": "HicAdapter", "hicLocation": {"uri": uri}}, 681 "adapter": {"type": "HicAdapter", "hicLocation": {"uri": uri}},
681 } 682 }
682 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) 683 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
683 self.trackIdlist.append(tId) 684 self.trackIdlist.append(copy.copy(tId))
684 685
685 def add_maf(self, data, trackData): 686 def add_maf(self, data, trackData):
686 """ 687 """
687 from https://github.com/cmdcolin/maf2bed 688 from https://github.com/cmdcolin/maf2bed
688 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name 689 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name
751 {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId}, 752 {"type": "LinearArcDisplay", "displayId": "%s-LinearArcDisplay" % tId},
752 ], 753 ],
753 } 754 }
754 style_json = self._prepare_track_style(trackDict) 755 style_json = self._prepare_track_style(trackDict)
755 trackDict["style"] = style_json 756 trackDict["style"] = style_json
756 self.tracksToAdd[gname].append(trackDict) 757 self.tracksToAdd[gname].append(copy.copy(trackDict))
757 self.trackIdlist.append(tId) 758 self.trackIdlist.append(copy.copy(tId))
758 if self.config_json.get("plugins", None): 759 if self.config_json.get("plugins", None):
759 self.config_json["plugins"].append(mafPlugin[0]) 760 self.config_json["plugins"].append(mafPlugin[0])
760 else: 761 else:
761 self.config_json.update(mafPlugin) 762 self.config_json.update(mafPlugin)
762 763
825 } 826 }
826 ], 827 ],
827 } 828 }
828 style_json = self._prepare_track_style(trackDict) 829 style_json = self._prepare_track_style(trackDict)
829 trackDict["style"] = style_json 830 trackDict["style"] = style_json
830 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) 831 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
831 self.trackIdlist.append(tId) 832 self.trackIdlist.append(copy.copy(tId))
832 833
833 def add_bam(self, data, trackData, bam_indexes=None, **kwargs): 834 def add_bam(self, data, trackData, bam_indexes=None, **kwargs):
834 tId = trackData["label"] 835 tId = trackData["label"]
835 realFName = trackData["path"] 836 realFName = trackData["path"]
836 useuri = trackData["useuri"].lower() == "yes" 837 useuri = trackData["useuri"].lower() == "yes"
882 }, 883 },
883 ], 884 ],
884 } 885 }
885 style_json = self._prepare_track_style(trackDict) 886 style_json = self._prepare_track_style(trackDict)
886 trackDict["style"] = style_json 887 trackDict["style"] = style_json
887 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) 888 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
888 self.trackIdlist.append(tId) 889 self.trackIdlist.append(copy.copy(tId))
889 890
890 def add_cram(self, data, trackData, cram_indexes=None, **kwargs): 891 def add_cram(self, data, trackData, cram_indexes=None, **kwargs):
891 tId = trackData["label"] 892 tId = trackData["label"]
892 realFName = trackData["path"] 893 realFName = trackData["path"]
893 categ = trackData["category"] 894 categ = trackData["category"]
948 }, 949 },
949 ], 950 ],
950 } 951 }
951 style_json = self._prepare_track_style(trackDict) 952 style_json = self._prepare_track_style(trackDict)
952 trackDict["style"] = style_json 953 trackDict["style"] = style_json
953 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) 954 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
954 self.trackIdlist.append(tId) 955 self.trackIdlist.append(copy.copy(tId))
955 956
956 def add_vcf(self, data, trackData): 957 def add_vcf(self, data, trackData):
957 tId = trackData["label"] 958 tId = trackData["label"]
958 categ = trackData["category"] 959 categ = trackData["category"]
959 useuri = trackData["useuri"].lower() == "yes" 960 useuri = trackData["useuri"].lower() == "yes"
998 }, 999 },
999 ], 1000 ],
1000 } 1001 }
1001 style_json = self._prepare_track_style(trackDict) 1002 style_json = self._prepare_track_style(trackDict)
1002 trackDict["style"] = style_json 1003 trackDict["style"] = style_json
1003 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) 1004 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
1004 self.trackIdlist.append(tId) 1005 self.trackIdlist.append(copy.copy(tId))
1005 1006
1006 def _sort_gff(self, data, dest): 1007 def _sort_gff(self, data, dest):
1007 # Only index if not already done 1008 # Only index if not already done
1008 if not os.path.exists(dest): 1009 if not os.path.exists(dest):
1009 cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s'" % ( 1010 cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s'" % (
1061 }, 1062 },
1062 ], 1063 ],
1063 } 1064 }
1064 style_json = self._prepare_track_style(trackDict) 1065 style_json = self._prepare_track_style(trackDict)
1065 trackDict["style"] = style_json 1066 trackDict["style"] = style_json
1066 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) 1067 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
1067 self.trackIdlist.append(tId) 1068 self.trackIdlist.append(copy.copy(tId))
1068 1069
1069 def add_bed(self, data, ext, trackData): 1070 def add_bed(self, data, ext, trackData):
1070 tId = trackData["label"] 1071 tId = trackData["label"]
1071 categ = trackData["category"] 1072 categ = trackData["category"]
1072 useuri = trackData["useuri"].lower() == "yes" 1073 useuri = trackData["useuri"].lower() == "yes"
1110 }, 1111 },
1111 ], 1112 ],
1112 } 1113 }
1113 style_json = self._prepare_track_style(trackDict) 1114 style_json = self._prepare_track_style(trackDict)
1114 trackDict["style"] = style_json 1115 trackDict["style"] = style_json
1115 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) 1116 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
1116 self.trackIdlist.append(tId) 1117 self.trackIdlist.append(copy.copy(tId))
1117 1118
1118 def add_paf(self, data, trackData, pafOpts, **kwargs): 1119 def add_paf(self, data, trackData, pafOpts, **kwargs):
1119 tname = trackData["name"] 1120 tname = trackData["name"]
1120 tId = trackData["label"] 1121 tId = trackData["label"]
1121 url = tId 1122 url = tId
1145 passnames.append(gname) 1146 passnames.append(gname)
1146 useuri = pafOpts["useuri"] == "true" 1147 useuri = pafOpts["useuri"] == "true"
1147 if gname not in self.genome_names: 1148 if gname not in self.genome_names:
1148 # ignore if already there - eg for duplicates among pafs. 1149 # ignore if already there - eg for duplicates among pafs.
1149 asstrack, first_contig = self.make_assembly(gpath, gname, useuri) 1150 asstrack, first_contig = self.make_assembly(gpath, gname, useuri)
1150 self.genome_names.append(gname) 1151 self.genome_names.append(copy.copy(gname))
1151 self.tracksToAdd[gname] = [] 1152 self.tracksToAdd[gname] = []
1152 self.assemblies.append(asstrack) 1153 self.assemblies.append(copy.copy(asstrack))
1153 self.ass_first_contigs.append(first_contig) 1154 self.ass_first_contigs.append(copy.copy(first_contig))
1154 trackDict = { 1155 trackDict = {
1155 "type": "SyntenyTrack", 1156 "type": "SyntenyTrack",
1156 "trackId": tId, 1157 "trackId": tId,
1157 "assemblyNames": passnames, 1158 "assemblyNames": passnames,
1158 "category": [ 1159 "category": [
1192 style_json = { 1193 style_json = {
1193 "type": "LinearBasicDisplay", 1194 "type": "LinearBasicDisplay",
1194 "displayId": "%s-LinearBasicDisplay" % tId, 1195 "displayId": "%s-LinearBasicDisplay" % tId,
1195 } 1196 }
1196 trackDict["style"] = style_json 1197 trackDict["style"] = style_json
1197 self.tracksToAdd[trackData["assemblyNames"]].append(trackDict) 1198 self.tracksToAdd[trackData["assemblyNames"]].append(copy.copy(trackDict))
1198 self.trackIdlist.append(tId) 1199 self.trackIdlist.append(copy.copy(tId))
1199 1200
1200 def process_annotations(self, track): 1201 def process_annotations(self, track):
1201 category = track["category"].replace("__pd__date__pd__", TODAY) 1202 category = track["category"].replace("__pd__date__pd__", TODAY)
1202 tt1 = ",/ :;\\" 1203 tt1 = ",/ :;\\"
1203 tt2 = "______" 1204 tt2 = "______"
1309 track["conf"]["options"]["paf"], 1310 track["conf"]["options"]["paf"],
1310 ) 1311 )
1311 else: 1312 else:
1312 logging.warning("Do not know how to handle %s", dataset_ext) 1313 logging.warning("Do not know how to handle %s", dataset_ext)
1313 # Return non-human label for use in other fields 1314 # Return non-human label for use in other fields
1315 logging.debug("### processanno ext=%s trackstoadd = %s" % (dataset_ext, self.tracksToAdd))
1314 yield outputTrackConfig["label"] 1316 yield outputTrackConfig["label"]
1315 1317
1316 def add_default_session(self, default_data): 1318 def add_default_session(self, default_data):
1317 """ 1319 """
1318 default session settings are hard and fragile. 1320 default session settings are hard and fragile.
1336 tracks_data = [] 1338 tracks_data = []
1337 for track_conf in self.tracksToAdd[gnome]: 1339 for track_conf in self.tracksToAdd[gnome]:
1338 tId = track_conf["trackId"] 1340 tId = track_conf["trackId"]
1339 if tId in default_data[gnome]["visibility"]["default_on"]: 1341 if tId in default_data[gnome]["visibility"]["default_on"]:
1340 track_types[tId] = track_conf["type"] 1342 track_types[tId] = track_conf["type"]
1341 style_data = default_data[gnome]["style"].get(tId, None) 1343 style_data = default_data[gnome]["style"].get(tId, {})
1342 if not style_data: 1344 if not style_data:
1343 logging.debug( 1345 logging.debug(
1344 "### No style data for %s in available default data %s" 1346 "No style data for %s in available default data %s"
1345 % (tId, default_data) 1347 % (tId, default_data)
1346 ) 1348 )
1347 style_data = {"type": "LinearBasicDisplay"} 1349 else:
1348 if "displays" in track_conf: 1350 logging.debug(
1349 disp = track_conf["displays"][0]["type"] 1351 "style data for %s = %s"
1350 style_data["type"] = disp 1352 % (tId, style_data)
1353 )
1354
1355 if style_data.get('type',None):
1356 style_data["type"] = "LinearBasicDisplay"
1357 if "displays" in track_conf:
1358 disp = track_conf["displays"][0]["type"]
1359 style_data["type"] = disp
1351 if track_conf.get("style_labels", None): 1360 if track_conf.get("style_labels", None):
1352 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work 1361 # TODO fix this: it should probably go in a renderer block (SvgFeatureRenderer) but still does not work
1353 # TODO move this to per track displays? 1362 # TODO move this to per track displays?
1354 style_data["labels"] = track_conf["style_labels"] 1363 style_data["labels"] = track_conf["style_labels"]
1355 tracks_data.append( 1364 tracks_data.append(
1549 1558
1550 1559
1551 def parse_style_conf(item): 1560 def parse_style_conf(item):
1552 if item.text.lower() in ["false", "true", "yes", "no"]: 1561 if item.text.lower() in ["false", "true", "yes", "no"]:
1553 return item.text.lower in ("yes", "true") 1562 return item.text.lower in ("yes", "true")
1554 else: 1563 elif item.text.isdigit():
1555 return item.text 1564 return int(item.text)
1565 return item.text
1556 1566
1557 1567
1558 if __name__ == "__main__": 1568 if __name__ == "__main__":
1559 parser = argparse.ArgumentParser(description="", epilog="") 1569 parser = argparse.ArgumentParser(description="", epilog="")
1560 parser.add_argument("--xml", help="Track Configuration") 1570 parser.add_argument("--xml", help="Track Configuration")
1630 track_conf["useuri"], 1640 track_conf["useuri"],
1631 os.path.realpath(x.attrib["path"]), 1641 os.path.realpath(x.attrib["path"]),
1632 ) 1642 )
1633 ) 1643 )
1634 else: 1644 else:
1635 if trackfiles: 1645 metadata = metadata_from_node(x.find("metadata"))
1636 metadata = metadata_from_node(x.find("metadata")) 1646 track_conf["dataset_id"] = metadata.get(
1637 track_conf["dataset_id"] = metadata.get( 1647 "dataset_id", "None"
1638 "dataset_id", "None" 1648 )
1649 if x.attrib["useuri"].lower() == "yes":
1650 tfa = (
1651 x.attrib["path"],
1652 x.attrib["ext"],
1653 x.attrib["useuri"],
1654 track_conf["label"],
1655 metadata,
1639 ) 1656 )
1640 if x.attrib["useuri"].lower() == "yes": 1657 else:
1641 tfa = ( 1658 tfa = (
1642 x.attrib["path"], 1659 os.path.realpath(x.attrib["path"]),
1643 x.attrib["ext"], 1660 x.attrib["ext"],
1644 x.attrib["useuri"], 1661 x.attrib["useuri"],
1645 track_conf["label"], 1662 track_conf["label"],
1646 metadata, 1663 metadata,
1647 ) 1664 )
1648 else: 1665 track_conf["trackfiles"].append(tfa)
1649 tfa = (
1650 os.path.realpath(x.attrib["path"]),
1651 x.attrib["ext"],
1652 x.attrib["useuri"],
1653 track_conf["label"],
1654 metadata,
1655 )
1656 track_conf["trackfiles"].append(tfa)
1657 1666
1658 if is_multi_bigwig: 1667 if is_multi_bigwig:
1659 metadata = metadata_from_node(x.find("metadata")) 1668 metadata = metadata_from_node(x.find("metadata"))
1660 1669
1661 track_conf["trackfiles"].append( 1670 track_conf["trackfiles"].append(
1668 ) 1677 )
1669 1678
1670 track_conf["category"] = track.attrib["cat"] 1679 track_conf["category"] = track.attrib["cat"]
1671 track_conf["format"] = track.attrib["format"] 1680 track_conf["format"] = track.attrib["format"]
1672 track_conf["conf"] = etree_to_dict(track.find("options")) 1681 track_conf["conf"] = etree_to_dict(track.find("options"))
1673 logging
1674 keys = jc.process_annotations(track_conf) 1682 keys = jc.process_annotations(track_conf)
1675 if keys: 1683 if keys:
1676 for key in keys: 1684 for key in keys:
1677 vis = track.attrib.get("visibility", "default_off") 1685 vis = track.attrib.get("visibility", "default_off")
1678 if not vis: 1686 if not vis:
1682 stile = {} 1690 stile = {}
1683 for trak in trakdat: 1691 for trak in trakdat:
1684 if trak["trackId"] == key: 1692 if trak["trackId"] == key:
1685 stile = trak.get("style", {}) 1693 stile = trak.get("style", {})
1686 if track.find("options/style"): 1694 if track.find("options/style"):
1687 supdate = { 1695 for item in track.find("options/style"):
1688 item.tag: parse_style_conf(item) 1696 if item.text:
1689 for item in track.find("options/style") 1697 stile[item.tag] = parse_style_conf(item)
1690 } 1698 logging.debug("stile=%s" % stile)
1691 stile.update(supdate)
1692 default_session_data[primaryGenome]["style"][key] = stile 1699 default_session_data[primaryGenome]["style"][key] = stile
1693 if track.find("options/style_labels"): 1700 if track.find("options/style_labels"):
1694 default_session_data[primaryGenome]["style_labels"][key] = { 1701 default_session_data[primaryGenome]["style_labels"][key] = {
1695 item.tag: parse_style_conf(item) 1702 item.tag: parse_style_conf(item)
1696 for item in track.find("options/style_labels") 1703 for item in track.find("options/style_labels")
1712 "tertiary_color": root.find("metadata/general/tertiary_color").text, 1719 "tertiary_color": root.find("metadata/general/tertiary_color").text,
1713 "quaternary_color": root.find("metadata/general/quaternary_color").text, 1720 "quaternary_color": root.find("metadata/general/quaternary_color").text,
1714 "font_size": root.find("metadata/general/font_size").text, 1721 "font_size": root.find("metadata/general/font_size").text,
1715 } 1722 }
1716 jc.add_general_configuration(general_data) 1723 jc.add_general_configuration(general_data)
1724 jc.add_default_session(default_session_data)
1717 trackconf = jc.config_json.get("tracks", []) 1725 trackconf = jc.config_json.get("tracks", [])
1718 for gnome in jc.genome_names: 1726 for gnome in jc.genome_names:
1719 gtracks = jc.tracksToAdd[gnome] 1727 gtracks = jc.tracksToAdd[gnome]
1720 if len(gtracks) > 0: 1728 if len(gtracks) > 0:
1721 logging.debug( 1729 logging.debug(
1737 jc.trackIdlist, 1745 jc.trackIdlist,
1738 json.dumps(trackconf, indent=2), 1746 json.dumps(trackconf, indent=2),
1739 ) 1747 )
1740 ) 1748 )
1741 jc.write_config() 1749 jc.write_config()
1742 jc.add_default_session(default_session_data)
1743 # note that this can be left in the config.json but has NO EFFECT if add_defsess_to_index is called. 1750 # note that this can be left in the config.json but has NO EFFECT if add_defsess_to_index is called.
1744 # jc.add_defsess_to_index(default_session_data) 1751 # jc.add_defsess_to_index(default_session_data)
1745 # jc.text_index() not sure what broke here. 1752 # jc.text_index() not sure what broke here.