comparison data_manager/data_manager_fetch_motus_db.py @ 0:0370fe3cf518 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/ commit f42263c2875a78500f141027803a38606ecb0f14
author bgruening
date Thu, 10 Oct 2024 11:00:40 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0370fe3cf518
1 #!/usr/bin/env python
2
3 import argparse
4 import json
5 import os
6 import shutil
7 import subprocess
8 import sys
9 import tarfile
10 from datetime import datetime
11
12 import wget
13
14 version_mapping = {
15 "3.1.0": "https://zenodo.org/records/7778108/files/db_mOTU_v3.1.0.tar.gz",
16 "3.0.1": "https://zenodo.org/records/5140350/files/db_mOTU_v3.0.1.tar.gz",
17 "3.0.0": "https://zenodo.org/records/5012106/files/db_mOTU_v3.0.0.tar.gz",
18 }
19
20
21 def download_untar_store(url, tmp_path, dest_path):
22 """
23 Download a tar.gz file containing one folder,
24 extract that folder and move the content inside dest_path
25 """
26
27 extract_path = os.path.join(tmp_path, "extract")
28
29 os.makedirs(tmp_path, exist_ok=True)
30
31 # download data
32 filename = wget.download(url, out=tmp_path)
33 tarfile_path = os.path.join(tmp_path, filename)
34 tar = tarfile.open(tarfile_path)
35 tar.extractall(extract_path)
36
37 if len(list(os.listdir(extract_path))) > 1:
38 print("More then one folder in zipped file, aborting !")
39 else:
40 for folder in os.listdir(extract_path):
41 folder_path = os.path.join(extract_path, folder)
42
43 print(f"Copy data to {dest_path}")
44 shutil.copytree(folder_path, dest_path)
45 print("Done !")
46
47 shutil.rmtree(tmp_path)
48
49
50 def main():
51 # Parse Command Line
52 parser = argparse.ArgumentParser(description="Create data manager JSON.")
53 parser.add_argument("--out", dest="output", action="store", help="JSON filename")
54 parser.add_argument(
55 "--version", dest="version", action="store", help="Version of the DB"
56 )
57 parser.add_argument(
58 "--test",
59 action="store_true",
60 help="option to test the script with an lighted database",
61 )
62
63 args = parser.parse_args()
64
65 # the output file of a DM is a json containing args that can be used by the DM
66 # most tools mainly use these args to find the extra_files_path for the DM, which can be used
67 # to store the DB data
68 with open(args.output) as fh:
69 params = json.load(fh)
70
71 workdir = params["output_data"][0]["extra_files_path"]
72 os.mkdir(workdir)
73
74 time = datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
75 db_value = "db_from_{0}".format(time)
76 db_path = os.path.join(workdir, db_value)
77 tmp_path = os.path.join(workdir, "tmp")
78 url = version_mapping[args.version]
79
80 # create DB
81 if args.test: # the test only checks that the pharokka download script is available
82
83 # check if link is there
84 command_args = ["wget", "--spider", url]
85 proc = subprocess.Popen(args=command_args, shell=False)
86 return_code = proc.wait()
87 if return_code:
88 print("Error downloading motus database.", file=sys.stderr)
89 sys.exit(return_code)
90
91 # copy the test DB
92 # TODO ones available: https://github.com/motu-tool/mOTUs/issues/121
93 test_db_path = os.path.join(
94 os.path.dirname(os.path.realpath(__file__)), "motus_test_DB_non_functional"
95 )
96 command_args = ["cp", "-r", test_db_path, db_path]
97 proc = subprocess.Popen(args=command_args, shell=False)
98 return_code = proc.wait()
99 if return_code:
100 print("Error copying motus database.", file=sys.stderr)
101 sys.exit(return_code)
102
103 else:
104
105 # download data
106 download_untar_store(url, tmp_path, db_path)
107
108 # Update Data Manager JSON and write to file
109 data_manager_entry = {
110 "data_tables": {
111 "motus_db_versioned": {
112 "value": db_value,
113 "version": args.version,
114 "name": f"mOTUs DB version {args.version} downloaded at {datetime.now()}",
115 "path": db_path,
116 }
117 }
118 }
119
120 with open(os.path.join(args.output), "w+") as fh:
121 json.dump(data_manager_entry, fh, sort_keys=True)
122
123
124 if __name__ == "__main__":
125 main()