Mercurial > repos > iuc > data_manager_snpeff
changeset 13:c3cdca7f40a2 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_snpeff commit 225ce35e1c3514cdba7a3637eba9aee964fa85f7
| author | iuc |
|---|---|
| date | Fri, 13 Mar 2026 13:07:48 +0000 |
| parents | 617d95d798fa |
| children | |
| files | data_manager/data_manager_snpEff_databases.py data_manager/data_manager_snpEff_databases.xml data_manager/data_manager_snpEff_download.py data_manager_conf.xml |
| diffstat | 4 files changed, 59 insertions(+), 36 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_snpEff_databases.py Tue Mar 10 10:12:26 2026 +0000 +++ b/data_manager/data_manager_snpEff_databases.py Fri Mar 13 13:07:48 2026 +0000 @@ -2,10 +2,24 @@ import json import optparse import os +import re import subprocess import sys +def getSnpeffVersion(): + snpeff_version = 'SnpEff ?.?' + args = ['snpEff', '-version'] + try: + version_output = subprocess.check_output(args, shell=False).decode() + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) + m = re.match(r'^(SnpEff)\s*(\d+\.\d+).*$', version_output) + if m: + snpeff_version = m.groups()[0] + m.groups()[1] + return snpeff_version + + def fetch_databases(data_manager_dict, target_directory): if not os.path.exists(target_directory): os.makedirs(target_directory) @@ -18,6 +32,8 @@ data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) data_manager_dict['data_tables']['snpeffv_databases'] = data_manager_dict['data_tables'].get('snpeffv_databases', []) data_table_entries = [] + snpeff_version = getSnpeffVersion() + with open(databases_path, 'r') as fh: for line in fh: fields = line.split('\t') @@ -29,7 +45,7 @@ if genome_version == '30c2c903' or fields[1].strip() == 'TestCase' or fields[1].strip().startswith('Test_'): continue description = fields[1].strip() + ' : ' + genome_version - data_table_entries.append(dict(value=genome_version, name=description)) + data_table_entries.append(dict(key=snpeff_version + '_' + genome_version, version=snpeff_version, value=genome_version, name=description)) data_manager_dict['data_tables']['snpeffv_databases'] = data_table_entries return data_manager_dict
--- a/data_manager/data_manager_snpEff_databases.xml Tue Mar 10 10:12:26 2026 +0000 +++ b/data_manager/data_manager_snpEff_databases.xml Fri Mar 13 13:07:48 2026 +0000 @@ -16,7 +16,7 @@ <output name="out_file"> <assert_contents> <!-- Check that a genome was added --> - <has_text text="GRCh38.86" /> + <has_text text="ebola_zaire" /> </assert_contents> </output> </test>
--- a/data_manager/data_manager_snpEff_download.py Tue Mar 10 10:12:26 2026 +0000 +++ b/data_manager/data_manager_snpEff_download.py Fri Mar 13 13:07:48 2026 +0000 @@ -47,18 +47,14 @@ def getSnpeffVersion(): snpeff_version = 'SnpEff ?.?' - stderr_path = 'snpeff.err' - args = ['snpEff', '-h'] - with open(stderr_path, 'w') as stderr_fh: - return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno()) - if return_code != 255: - sys.exit(return_code) - with open(stderr_path) as fh: - for line in fh: - m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line) - if m: - snpeff_version = m.groups()[0] + m.groups()[1] - break + args = ['snpEff', '-version'] + try: + version_output = subprocess.check_output(args, shell=False).decode() + except subprocess.CalledProcessError as e: + sys.exit(e.returncode) + m = re.match(r'^(SnpEff)\s*(\d+\.\d+).*$', version_output) + if m: + snpeff_version = m.groups()[0] + m.groups()[1] return snpeff_version @@ -101,34 +97,40 @@ genome_path = os.path.join(data_dir, genome_version) snpeff_version = getSnpeffVersion() key = snpeff_version + '_' + genome_version + db_version = None + genomedb_name = regulationdb_name = "" + if os.path.isdir(genome_path): for dirpath, _, files in os.walk(genome_path): for fname in files: if fname.startswith('snpEffectPredictor'): # if snpEffectPredictor.bin download succeeded - name = genome_version + (' : ' + organism if organism else '') - data_table_entry = dict( - key=key, - version=getSnpeffDbVersion(os.path.join(dirpath, fname)) or snpeff_version, - value=genome_version, - name=name, - path=data_dir - ) - _add_data_table_entry(data_manager_dict, 'snpeffv_genomedb', data_table_entry) + genomedb_name = genome_version + (' : ' + organism if organism else '') + db_version = getSnpeffDbVersion(os.path.join(dirpath, fname)) or snpeff_version else: m = re.match(regulation_pattern, fname) if m: - name = m.groups()[0] - data_table_entry = dict(key=key, version=snpeff_version, genome=genome_version, value=name, name=name) - _add_data_table_entry(data_manager_dict, 'snpeffv_regulationdb', data_table_entry) - return data_manager_dict - + regulationdb_name = m.groups()[0] -def _add_data_table_entry(data_manager_dict, data_table, data_table_entry): - data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) - data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, []) - data_manager_dict['data_tables'][data_table].append(data_table_entry) - return data_manager_dict + if db_version: + data_table_entry = dict( + key=key, + version=db_version, + value=genome_version, + name=genomedb_name, + path=f"snpEff/{db_version}/data" + ) + data_manager_dict['data_tables']['snpeffv_genomedb'].append(data_table_entry) + + if regulationdb_name: + data_table_entry = dict( + key=key, + version=db_version or snpeff_version, + genome=genome_version, + value=regulationdb_name, + name=regulationdb_name + ) + data_manager_dict['data_tables']['snpeffv_regulationdb'].append(data_table_entry) def main(): @@ -143,7 +145,12 @@ params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) - data_manager_dict = {} + data_manager_dict = { + 'data_tables': { + 'snpeffv_genomedb': [], + 'snpeffv_regulationdb': [] + } + } # Create SnpEff Reference Data for genome_version, organism in zip(options.genome_version.split(','), getOrganismNames(options.genome_version, options.organism).split(',')):
--- a/data_manager_conf.xml Tue Mar 10 10:12:26 2026 +0000 +++ b/data_manager_conf.xml Fri Mar 13 13:07:48 2026 +0000 @@ -19,9 +19,9 @@ <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> <column name="path" output_ref="out_file" > <move type="directory" relativize_symlinks="True"> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">snpEff/v4_3/data</target> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${path}</target> </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/snpEff/v4_3/data</value_translation> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${path}</value_translation> <value_translation type="function">abspath</value_translation> </column> </output>
