Mercurial > repos > iuc > data_manager_omamer
comparison data_manager/omamer.py @ 0:e1502e4f8725 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_omamer commit 8ff9ada22d22cb94ddfff51bcdd3ab7d30104f1a
author | iuc |
---|---|
date | Wed, 21 Feb 2024 19:26:53 +0000 |
parents | |
children | 57db282ca204 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e1502e4f8725 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import json | |
5 import os | |
6 import sys | |
7 from pathlib import Path | |
8 | |
9 import requests | |
10 | |
11 # URL for downloading OMAmer datasets | |
12 OMAMER_DATASETS_URL = "https://omabrowser.org/All/{dataset}" | |
13 | |
14 # List of OMAmer data sets with versions | |
15 OMAMER_DATASETS = { | |
16 "Primates": "Primates-v2.0.0.h5", | |
17 "Viridiplantae": "Viridiplantae-v2.0.0.h5", | |
18 "Metazoa": "Metazoa-v2.0.0.h5", | |
19 "LUCA": "LUCA-v0.2.5.h5", | |
20 } | |
21 | |
22 | |
23 DEFAULT_OUTPUT_DIR = "database_omamer" | |
24 | |
25 | |
26 def download_file(url, dest): | |
27 try: | |
28 with requests.get(url, stream=True) as r: | |
29 r.raise_for_status() | |
30 with open(dest, 'wb') as f: | |
31 for chunk in r.iter_content(chunk_size=8192): | |
32 f.write(chunk) | |
33 print(f"Downloaded: {url} to {dest}") | |
34 except requests.exceptions.RequestException as e: | |
35 print(f"Error downloading {url}: {e}") | |
36 sys.exit(1) | |
37 | |
38 | |
39 def main(args): | |
40 # Set output directory to default | |
41 output_dir = DEFAULT_OUTPUT_DIR | |
42 | |
43 # Create output directory if none exists | |
44 if not os.path.exists(output_dir): | |
45 os.makedirs(output_dir) | |
46 | |
47 # Check if the selected dataset exists | |
48 if args.name not in OMAMER_DATASETS: | |
49 print(f"Error: Selected dataset '{args.name}' not found.") | |
50 sys.exit(1) | |
51 | |
52 # Download the selected OMAmer dataset | |
53 dataset = OMAMER_DATASETS[args.name] | |
54 url = OMAMER_DATASETS_URL.format(dataset=dataset) | |
55 base_name = os.path.splitext(dataset)[0] | |
56 destination_path = os.path.join(output_dir, base_name) | |
57 download_file(url, destination_path) | |
58 | |
59 data_manager_entry = { | |
60 "value": os.path.splitext(os.path.basename(base_name))[0], | |
61 "name": os.path.splitext(os.path.basename(base_name))[0], | |
62 "version": "2.0.2", | |
63 "path": str(Path(output_dir)), | |
64 } | |
65 | |
66 # Creates a JSON dictionary representing the Data Manager configuration | |
67 data_manager_json = {"data_tables": {"omamer_data": [data_manager_entry]}} | |
68 | |
69 # Writes this JSON dictionary to the specified output file | |
70 with open(args.json, "w") as fh: | |
71 json.dump(data_manager_json, fh, indent=2, sort_keys=True) | |
72 | |
73 | |
74 if __name__ == "__main__": | |
75 # Set up argparse to specify expected command line arguments | |
76 parser = argparse.ArgumentParser(description='Download data for OMAmer') | |
77 parser.add_argument('--name', default='Primates', choices=OMAMER_DATASETS.keys(), help='Select dataset to download') | |
78 parser.add_argument('--json', help='Path to JSON file') | |
79 | |
80 args = parser.parse_args() | |
81 | |
82 main(args) |