comparison data_manager_fetch_mapseq_db.py @ 0:dbf2735e8480 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/ commit 66e797aaa79b92c282a8127260cdfd5702207e35
author iuc
date Wed, 13 Sep 2023 19:54:19 +0000
parents
children 4cd97cc67061
comparison
equal deleted inserted replaced
-1:000000000000 0:dbf2735e8480
1 #!/usr/bin/env python
2
3 import argparse
4 import json
5 import os
6 import shutil
7 import tarfile
8 from datetime import datetime
9
10 import wget
11
12 DB_paths = {
13 "mgnify_lsu": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/silva_lsu-20200130.tar.gz",
14 "mgnify_ssu": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/silva_ssu-20200130.tar.gz",
15 "mgnify_its_unite": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/UNITE-20200214.tar.gz",
16 "mgnify_its_itsonedb": "ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/pipeline-5.0/ref-dbs/ITSoneDB-20200214.tar.gz",
17 "test_lsu": "https://zenodo.org/record/8205348/files/test_lsu.tar.gz",
18 }
19
20 DB_names = {
21 "mgnify_lsu": "MGnify LSU (v5.0.7) - silva_lsu-20200130",
22 "mgnify_ssu": "MGnify SSU (v5.0.7) - silva_ssu-20200130",
23 "mgnify_its_unite": "MGnify ITS ITSonedb (v5.0.7) - ITSoneDB-20200214",
24 "mgnify_its_itsonedb": "MGnify ITS UNITE (v5.0.7) - UNITE-20200214",
25 "test_lsu": "Trimmed LSU Test DB",
26 }
27
28
29 def download_untar_store(url, tmp_path, dest_path):
30 """
31 Download a tar.gz file containing one folder,
32 extract that folder and move the content inside dest_path
33 """
34
35 extract_path = os.path.join(tmp_path, "extract")
36
37 os.makedirs(tmp_path, exist_ok=True)
38
39 # download data
40 filename = wget.download(url, out=tmp_path)
41 tarfile_path = os.path.join(tmp_path, filename)
42 tar = tarfile.open(tarfile_path)
43 tar.extractall(extract_path)
44
45 if len(list(os.listdir(extract_path))) > 1:
46 print("More then one folder in zipped file, aborting !")
47 else:
48 for folder in os.listdir(extract_path):
49 folder_path = os.path.join(extract_path, folder)
50
51 print(f"Copy data to {dest_path}")
52 shutil.copytree(folder_path, dest_path)
53 print("Done !")
54
55 shutil.rmtree(tmp_path)
56
57
58 def main():
59 # Parse Command Line
60 parser = argparse.ArgumentParser(description="Create data manager JSON.")
61 parser.add_argument("--out", dest="output", action="store", help="JSON filename")
62 parser.add_argument("--version", dest="version", action="store", help="Version of the DB")
63 parser.add_argument("--database-type", dest="db_type", action="store", help="Db type")
64 parser.add_argument(
65 "--test",
66 action="store_true",
67 help="option to test the script with an lighted database",
68 )
69
70 args = parser.parse_args()
71
72 # the output file of a DM is a json containing args that can be used by the DM
73 # most tools mainly use these args to find the extra_files_path for the DM, which can be used
74 # to store the DB data
75 with open(args.output) as fh:
76 params = json.load(fh)
77
78 print(params)
79
80 workdir = params["output_data"][0]["extra_files_path"]
81 os.mkdir(workdir)
82
83 time = datetime.utcnow().strftime("%Y-%m-%d")
84 db_value = f"{args.db_type}_from_{time}"
85
86 # output paths
87 db_path = os.path.join(workdir, db_value)
88 tmp_path = os.path.join(workdir, "tmp")
89
90 # create DB
91 if args.test:
92 url = DB_paths["test_lsu"]
93 else:
94 url = DB_paths[args.db_type]
95
96 # download data
97 download_untar_store(url, tmp_path, db_path)
98
99 db_name = DB_names[args.db_type]
100 # Update Data Manager JSON and write to file
101 data_manager_entry = {
102 "data_tables": {
103 "mapseq_db": {
104 "value": db_value,
105 "name": f"{db_name} downloaded at {time}",
106 "version": args.version,
107 "path": db_path,
108 }
109 }
110 }
111
112 with open(os.path.join(args.output), "w+") as fh:
113 json.dump(data_manager_entry, fh, sort_keys=True)
114
115
116 if __name__ == "__main__":
117 main()