Mercurial > repos > iuc > data_manager_omamer
diff data_manager/omamer.py @ 0:e1502e4f8725 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_omamer commit 8ff9ada22d22cb94ddfff51bcdd3ab7d30104f1a
author | iuc |
---|---|
date | Wed, 21 Feb 2024 19:26:53 +0000 |
parents | |
children | 57db282ca204 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/omamer.py Wed Feb 21 19:26:53 2024 +0000 @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +import argparse +import json +import os +import sys +from pathlib import Path + +import requests + +# URL for downloading OMAmer datasets +OMAMER_DATASETS_URL = "https://omabrowser.org/All/{dataset}" + +# List of OMAmer data sets with versions +OMAMER_DATASETS = { + "Primates": "Primates-v2.0.0.h5", + "Viridiplantae": "Viridiplantae-v2.0.0.h5", + "Metazoa": "Metazoa-v2.0.0.h5", + "LUCA": "LUCA-v0.2.5.h5", +} + + +DEFAULT_OUTPUT_DIR = "database_omamer" + + +def download_file(url, dest): + try: + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(dest, 'wb') as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + print(f"Downloaded: {url} to {dest}") + except requests.exceptions.RequestException as e: + print(f"Error downloading {url}: {e}") + sys.exit(1) + + +def main(args): + # Set output directory to default + output_dir = DEFAULT_OUTPUT_DIR + + # Create output directory if none exists + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Check if the selected dataset exists + if args.name not in OMAMER_DATASETS: + print(f"Error: Selected dataset '{args.name}' not found.") + sys.exit(1) + + # Download the selected OMAmer dataset + dataset = OMAMER_DATASETS[args.name] + url = OMAMER_DATASETS_URL.format(dataset=dataset) + base_name = os.path.splitext(dataset)[0] + destination_path = os.path.join(output_dir, base_name) + download_file(url, destination_path) + + data_manager_entry = { + "value": os.path.splitext(os.path.basename(base_name))[0], + "name": os.path.splitext(os.path.basename(base_name))[0], + "version": "2.0.2", + "path": str(Path(output_dir)), + } + + # Creates a JSON dictionary representing the Data Manager configuration + data_manager_json = {"data_tables": {"omamer_data": [data_manager_entry]}} + + # Writes this JSON dictionary to the specified output file + with open(args.json, "w") as fh: + json.dump(data_manager_json, fh, indent=2, sort_keys=True) + + +if __name__ == "__main__": + # Set up argparse to specify expected command line arguments + parser = argparse.ArgumentParser(description='Download data for OMAmer') + parser.add_argument('--name', default='Primates', choices=OMAMER_DATASETS.keys(), help='Select dataset to download') + parser.add_argument('--json', help='Path to JSON file') + + args = parser.parse_args() + + main(args)