view data_manager/data_manager_fetch_motus_db.py @ 0:0370fe3cf518 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/data_managers/ commit f42263c2875a78500f141027803a38606ecb0f14
author bgruening
date Thu, 10 Oct 2024 11:00:40 +0000
parents
children
line wrap: on
line source

#!/usr/bin/env python

import argparse
import json
import os
import shutil
import subprocess
import sys
import tarfile
from datetime import datetime

import wget

version_mapping = {
    "3.1.0": "https://zenodo.org/records/7778108/files/db_mOTU_v3.1.0.tar.gz",
    "3.0.1": "https://zenodo.org/records/5140350/files/db_mOTU_v3.0.1.tar.gz",
    "3.0.0": "https://zenodo.org/records/5012106/files/db_mOTU_v3.0.0.tar.gz",
}


def download_untar_store(url, tmp_path, dest_path):
    """
    Download a tar.gz file containing one folder,
    extract that folder and move the content inside dest_path
    """

    extract_path = os.path.join(tmp_path, "extract")

    os.makedirs(tmp_path, exist_ok=True)

    # download data
    filename = wget.download(url, out=tmp_path)
    tarfile_path = os.path.join(tmp_path, filename)
    tar = tarfile.open(tarfile_path)
    tar.extractall(extract_path)

    if len(list(os.listdir(extract_path))) > 1:
        print("More then one folder in zipped file, aborting !")
    else:
        for folder in os.listdir(extract_path):
            folder_path = os.path.join(extract_path, folder)

            print(f"Copy data to {dest_path}")
            shutil.copytree(folder_path, dest_path)
            print("Done !")

    shutil.rmtree(tmp_path)


def main():
    # Parse Command Line
    parser = argparse.ArgumentParser(description="Create data manager JSON.")
    parser.add_argument("--out", dest="output", action="store", help="JSON filename")
    parser.add_argument(
        "--version", dest="version", action="store", help="Version of the DB"
    )
    parser.add_argument(
        "--test",
        action="store_true",
        help="option to test the script with an lighted database",
    )

    args = parser.parse_args()

    # the output file of a DM is a json containing args that can be used by the DM
    # most tools mainly use these args to find the extra_files_path for the DM, which can be used
    # to store the DB data
    with open(args.output) as fh:
        params = json.load(fh)

    workdir = params["output_data"][0]["extra_files_path"]
    os.mkdir(workdir)

    time = datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
    db_value = "db_from_{0}".format(time)
    db_path = os.path.join(workdir, db_value)
    tmp_path = os.path.join(workdir, "tmp")
    url = version_mapping[args.version]

    # create DB
    if args.test:  # the test only checks that the pharokka download script is available

        # check if link is there
        command_args = ["wget", "--spider", url]
        proc = subprocess.Popen(args=command_args, shell=False)
        return_code = proc.wait()
        if return_code:
            print("Error downloading motus database.", file=sys.stderr)
            sys.exit(return_code)

        # copy the test DB
        # TODO ones available: https://github.com/motu-tool/mOTUs/issues/121
        test_db_path = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), "motus_test_DB_non_functional"
        )
        command_args = ["cp", "-r", test_db_path, db_path]
        proc = subprocess.Popen(args=command_args, shell=False)
        return_code = proc.wait()
        if return_code:
            print("Error copying motus database.", file=sys.stderr)
            sys.exit(return_code)

    else:

        # download data
        download_untar_store(url, tmp_path, db_path)

    # Update Data Manager JSON and write to file
    data_manager_entry = {
        "data_tables": {
            "motus_db_versioned": {
                "value": db_value,
                "version": args.version,
                "name": f"mOTUs DB version {args.version} downloaded at {datetime.now()}",
                "path": db_path,
            }
        }
    }

    with open(os.path.join(args.output), "w+") as fh:
        json.dump(data_manager_entry, fh, sort_keys=True)


if __name__ == "__main__":
    main()