Mercurial > repos > bgruening > data_manager_motus
comparison data_manager/data_manager_fetch_motus_db.py @ 0:0370fe3cf518 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/ commit f42263c2875a78500f141027803a38606ecb0f14
| author | bgruening | 
|---|---|
| date | Thu, 10 Oct 2024 11:00:40 +0000 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:0370fe3cf518 | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import argparse | |
| 4 import json | |
| 5 import os | |
| 6 import shutil | |
| 7 import subprocess | |
| 8 import sys | |
| 9 import tarfile | |
| 10 from datetime import datetime | |
| 11 | |
| 12 import wget | |
| 13 | |
| 14 version_mapping = { | |
| 15 "3.1.0": "https://zenodo.org/records/7778108/files/db_mOTU_v3.1.0.tar.gz", | |
| 16 "3.0.1": "https://zenodo.org/records/5140350/files/db_mOTU_v3.0.1.tar.gz", | |
| 17 "3.0.0": "https://zenodo.org/records/5012106/files/db_mOTU_v3.0.0.tar.gz", | |
| 18 } | |
| 19 | |
| 20 | |
| 21 def download_untar_store(url, tmp_path, dest_path): | |
| 22 """ | |
| 23 Download a tar.gz file containing one folder, | |
| 24 extract that folder and move the content inside dest_path | |
| 25 """ | |
| 26 | |
| 27 extract_path = os.path.join(tmp_path, "extract") | |
| 28 | |
| 29 os.makedirs(tmp_path, exist_ok=True) | |
| 30 | |
| 31 # download data | |
| 32 filename = wget.download(url, out=tmp_path) | |
| 33 tarfile_path = os.path.join(tmp_path, filename) | |
| 34 tar = tarfile.open(tarfile_path) | |
| 35 tar.extractall(extract_path) | |
| 36 | |
| 37 if len(list(os.listdir(extract_path))) > 1: | |
| 38 print("More then one folder in zipped file, aborting !") | |
| 39 else: | |
| 40 for folder in os.listdir(extract_path): | |
| 41 folder_path = os.path.join(extract_path, folder) | |
| 42 | |
| 43 print(f"Copy data to {dest_path}") | |
| 44 shutil.copytree(folder_path, dest_path) | |
| 45 print("Done !") | |
| 46 | |
| 47 shutil.rmtree(tmp_path) | |
| 48 | |
| 49 | |
| 50 def main(): | |
| 51 # Parse Command Line | |
| 52 parser = argparse.ArgumentParser(description="Create data manager JSON.") | |
| 53 parser.add_argument("--out", dest="output", action="store", help="JSON filename") | |
| 54 parser.add_argument( | |
| 55 "--version", dest="version", action="store", help="Version of the DB" | |
| 56 ) | |
| 57 parser.add_argument( | |
| 58 "--test", | |
| 59 action="store_true", | |
| 60 help="option to test the script with an lighted database", | |
| 61 ) | |
| 62 | |
| 63 args = parser.parse_args() | |
| 64 | |
| 65 # the output file of a DM is a json containing args that can be used by the DM | |
| 66 # most tools mainly use these args to find the extra_files_path for the DM, which can be used | |
| 67 # to store the DB data | |
| 68 with open(args.output) as fh: | |
| 69 params = json.load(fh) | |
| 70 | |
| 71 workdir = params["output_data"][0]["extra_files_path"] | |
| 72 os.mkdir(workdir) | |
| 73 | |
| 74 time = datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | |
| 75 db_value = "db_from_{0}".format(time) | |
| 76 db_path = os.path.join(workdir, db_value) | |
| 77 tmp_path = os.path.join(workdir, "tmp") | |
| 78 url = version_mapping[args.version] | |
| 79 | |
| 80 # create DB | |
| 81 if args.test: # the test only checks that the pharokka download script is available | |
| 82 | |
| 83 # check if link is there | |
| 84 command_args = ["wget", "--spider", url] | |
| 85 proc = subprocess.Popen(args=command_args, shell=False) | |
| 86 return_code = proc.wait() | |
| 87 if return_code: | |
| 88 print("Error downloading motus database.", file=sys.stderr) | |
| 89 sys.exit(return_code) | |
| 90 | |
| 91 # copy the test DB | |
| 92 # TODO ones available: https://github.com/motu-tool/mOTUs/issues/121 | |
| 93 test_db_path = os.path.join( | |
| 94 os.path.dirname(os.path.realpath(__file__)), "motus_test_DB_non_functional" | |
| 95 ) | |
| 96 command_args = ["cp", "-r", test_db_path, db_path] | |
| 97 proc = subprocess.Popen(args=command_args, shell=False) | |
| 98 return_code = proc.wait() | |
| 99 if return_code: | |
| 100 print("Error copying motus database.", file=sys.stderr) | |
| 101 sys.exit(return_code) | |
| 102 | |
| 103 else: | |
| 104 | |
| 105 # download data | |
| 106 download_untar_store(url, tmp_path, db_path) | |
| 107 | |
| 108 # Update Data Manager JSON and write to file | |
| 109 data_manager_entry = { | |
| 110 "data_tables": { | |
| 111 "motus_db_versioned": { | |
| 112 "value": db_value, | |
| 113 "version": args.version, | |
| 114 "name": f"mOTUs DB version {args.version} downloaded at {datetime.now()}", | |
| 115 "path": db_path, | |
| 116 } | |
| 117 } | |
| 118 } | |
| 119 | |
| 120 with open(os.path.join(args.output), "w+") as fh: | |
| 121 json.dump(data_manager_entry, fh, sort_keys=True) | |
| 122 | |
| 123 | |
| 124 if __name__ == "__main__": | |
| 125 main() | 
