Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/galaxy/util/dbkeys.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 """ | |
| 2 Functionality for dealing with dbkeys. | |
| 3 """ | |
| 4 import logging | |
| 5 import os.path | |
| 6 import re | |
| 7 from json import loads | |
| 8 | |
| 9 from galaxy.util import ( | |
| 10 galaxy_directory, | |
| 11 unicodify, | |
| 12 ) | |
| 13 from galaxy.util.object_wrapper import sanitize_lists_to_string | |
| 14 | |
| 15 log = logging.getLogger(__name__) | |
| 16 | |
| 17 | |
| 18 def read_dbnames(filename): | |
| 19 """ Read build names from file """ | |
| 20 db_names = [] | |
| 21 try: | |
| 22 ucsc_builds = {} | |
| 23 man_builds = [] # assume these are integers | |
| 24 name_to_db_base = {} | |
| 25 if filename is None: | |
| 26 # Should only be happening with the galaxy.tools.parameters.basic:GenomeBuildParameter docstring unit test | |
| 27 filename = os.path.join(galaxy_directory(), 'tool-data', 'shared', 'ucsc', 'builds.txt.sample') | |
| 28 for line in open(filename): | |
| 29 try: | |
| 30 if line[0:1] == "#": | |
| 31 continue | |
| 32 fields = line.replace("\r", "").replace("\n", "").split("\t") | |
| 33 # Special case of unspecified build is at top of list | |
| 34 if fields[0] == "?": | |
| 35 db_names.insert(0, (fields[0], fields[1])) | |
| 36 continue | |
| 37 try: # manual build (i.e. microbes) | |
| 38 int(fields[0]) | |
| 39 man_builds.append((fields[1], fields[0])) | |
| 40 except Exception: # UCSC build | |
| 41 db_base = fields[0].rstrip('0123456789') | |
| 42 if db_base not in ucsc_builds: | |
| 43 ucsc_builds[db_base] = [] | |
| 44 name_to_db_base[fields[1]] = db_base | |
| 45 # we want to sort within a species numerically by revision number | |
| 46 build_rev = re.compile(r'\d+$') | |
| 47 try: | |
| 48 build_rev = int(build_rev.findall(fields[0])[0]) | |
| 49 except Exception: | |
| 50 build_rev = 0 | |
| 51 ucsc_builds[db_base].append((build_rev, fields[0], fields[1])) | |
| 52 except Exception: | |
| 53 continue | |
| 54 sort_names = sorted(name_to_db_base.keys()) | |
| 55 for name in sort_names: | |
| 56 db_base = name_to_db_base[name] | |
| 57 ucsc_builds[db_base].sort() | |
| 58 ucsc_builds[db_base].reverse() | |
| 59 ucsc_builds[db_base] = [(build, name) for _, build, name in ucsc_builds[db_base]] | |
| 60 db_names = list(db_names + ucsc_builds[db_base]) | |
| 61 if len(db_names) > 1 and len(man_builds) > 0: | |
| 62 db_names.append((GenomeBuilds.default_value, '----- Additional Species Are Below -----')) | |
| 63 man_builds.sort() | |
| 64 man_builds = [(build, name) for name, build in man_builds] | |
| 65 db_names = list(db_names + man_builds) | |
| 66 except Exception as e: | |
| 67 log.error("ERROR: Unable to read builds file: %s", unicodify(e)) | |
| 68 return db_names | |
| 69 | |
| 70 | |
| 71 class GenomeBuilds: | |
| 72 default_value = "?" | |
| 73 default_name = "unspecified (?)" | |
| 74 | |
| 75 def __init__(self, app, data_table_name="__dbkeys__", load_old_style=True): | |
| 76 self._app = app | |
| 77 self._data_table_name = data_table_name | |
| 78 self._static_chrom_info_path = app.config.len_file_path | |
| 79 # A dbkey can be listed multiple times, but with different names, so we can't use dictionaries for lookups | |
| 80 if load_old_style: | |
| 81 self._static_dbkeys = list(read_dbnames(app.config.builds_file_path)) | |
| 82 else: | |
| 83 self._static_dbkeys = [] | |
| 84 | |
| 85 def get_genome_build_names(self, trans=None): | |
| 86 # FIXME: how to deal with key duplicates? | |
| 87 rval = [(self.default_value, self.default_name)] | |
| 88 # load user custom genome builds | |
| 89 if trans is not None: | |
| 90 if trans.history: | |
| 91 # This is a little bit Odd. We are adding every .len file in the current history to dbkey list, | |
| 92 # but this is previous behavior from trans.db_names, so we'll continue to do it. | |
| 93 # It does allow one-off, history specific dbkeys to be created by a user. But we are not filtering, | |
| 94 # so a len file will be listed twice (as the build name and again as dataset name), | |
| 95 # if custom dbkey creation/conversion occurred within the current history. | |
| 96 datasets = trans.sa_session.query(self._app.model.HistoryDatasetAssociation) \ | |
| 97 .filter_by(deleted=False, history_id=trans.history.id, extension="len") | |
| 98 for dataset in datasets: | |
| 99 rval.append((dataset.dbkey, f"{dataset.name} ({dataset.dbkey}) [History]")) | |
| 100 user = trans.user | |
| 101 if user and hasattr(user, 'preferences') and 'dbkeys' in user.preferences: | |
| 102 user_keys = loads(user.preferences['dbkeys']) | |
| 103 for key, chrom_dict in user_keys.items(): | |
| 104 rval.append((key, "{} ({}) [Custom]".format(chrom_dict['name'], key))) | |
| 105 # Load old builds.txt static keys | |
| 106 rval.extend(self._static_dbkeys) | |
| 107 # load dbkeys from dbkey data table | |
| 108 dbkey_table = self._app.tool_data_tables.get(self._data_table_name, None) | |
| 109 if dbkey_table is not None: | |
| 110 for field_dict in dbkey_table.get_named_fields_list(): | |
| 111 rval.append((field_dict['value'], field_dict['name'])) | |
| 112 return rval | |
| 113 | |
| 114 def get_chrom_info(self, dbkey, trans=None, custom_build_hack_get_len_from_fasta_conversion=True): | |
| 115 # FIXME: flag to turn off custom_build_hack_get_len_from_fasta_conversion should not be required | |
| 116 chrom_info = None | |
| 117 db_dataset = None | |
| 118 # Collect chromInfo from custom builds | |
| 119 if trans: | |
| 120 db_dataset = trans.db_dataset_for(dbkey) | |
| 121 if db_dataset: | |
| 122 chrom_info = db_dataset.file_name | |
| 123 else: | |
| 124 # Do Custom Build handling | |
| 125 if trans.user and ('dbkeys' in trans.user.preferences) and (dbkey in loads(trans.user.preferences['dbkeys'])): | |
| 126 custom_build_dict = loads(trans.user.preferences['dbkeys'])[dbkey] | |
| 127 # HACK: the attempt to get chrom_info below will trigger the | |
| 128 # fasta-to-len converter if the dataset is not available or, | |
| 129 # which will in turn create a recursive loop when | |
| 130 # running the fasta-to-len tool. So, use a hack in the second | |
| 131 # condition below to avoid getting chrom_info when running the | |
| 132 # fasta-to-len converter. | |
| 133 if 'fasta' in custom_build_dict and custom_build_hack_get_len_from_fasta_conversion: | |
| 134 # Build is defined by fasta; get len file, which is obtained from converting fasta. | |
| 135 build_fasta_dataset = trans.sa_session.query(trans.app.model.HistoryDatasetAssociation).get(custom_build_dict['fasta']) | |
| 136 chrom_info = build_fasta_dataset.get_converted_dataset(trans, 'len').file_name | |
| 137 elif 'len' in custom_build_dict: | |
| 138 # Build is defined by len file, so use it. | |
| 139 chrom_info = trans.sa_session.query(trans.app.model.HistoryDatasetAssociation).get(custom_build_dict['len']).file_name | |
| 140 # Check Data table | |
| 141 if not chrom_info: | |
| 142 dbkey_table = self._app.tool_data_tables.get(self._data_table_name, None) | |
| 143 if dbkey_table is not None: | |
| 144 chrom_info = dbkey_table.get_entry('value', dbkey, 'len_path', default=None) | |
| 145 # use configured server len path | |
| 146 if not chrom_info: | |
| 147 # Default to built-in build. | |
| 148 # Since we are using an unverified dbkey, we will sanitize the dbkey before use | |
| 149 chrom_info = os.path.join(self._static_chrom_info_path, "%s.len" % sanitize_lists_to_string(dbkey)) | |
| 150 chrom_info = os.path.abspath(chrom_info) | |
| 151 return (chrom_info, db_dataset) |
