# HG changeset patch # User iuc # Date 1649184007 0 # Node ID 6e24e79d3d69cbeb1ac491a2a4f79344be275e0d # Parent df30a2f1db55e75a6e0fe6a33a116804ce6ad46e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_pangolearn commit fd2566abd51c88847437d38a5abea8703b8ee034" diff -r df30a2f1db55 -r 6e24e79d3d69 data_manager/pangolearn_dm.py --- a/data_manager/pangolearn_dm.py Mon Aug 30 13:44:01 2021 +0000 +++ b/data_manager/pangolearn_dm.py Tue Apr 05 18:40:07 2022 +0000 @@ -12,53 +12,43 @@ import requests -def extract_date(tag_str): - parts = tag_str.split("_") - assert len(parts) < 3, "expected maximum of two parts, got " + str(parts) - # there are tags like: 2021-07-07-2 - parts[0] = "-".join(parts[0].split("-")[:3]) - tag_date = datetime.datetime.strptime(parts[0], "%Y-%m-%d") - if len(parts) == 2: - version = int(parts[1]) - assert ( - version < 24 * 60 - ) # because the code stores versions as minutes of the day, it can't handle versions > 1440 - tag_date += datetime.timedelta(minutes=version) - return tag_date - - def get_model_list( existing_release_tags, - url="https://api.github.com/repos/cov-lineages/pangoLEARN/releases", + url="https://api.github.com/repos/cov-lineages/pangoLEARN/releases" ): - response = requests.get(url) - if response.status_code == 200: - release_list = json.loads(response.text) - release_info = [ - dict( - tag_name=e["tag_name"], - name=e["name"], - date=extract_date(e["tag_name"]), - tarball_url=e["tarball_url"], - ) - for e in release_list - if e["tag_name"] not in existing_release_tags - ] - return release_info - else: - response.raise_for_status() + page_num = 0 + while True: + page_num += 1 + response = requests.get(url + f'?page={page_num}') + if response.status_code == 200: + release_list_chunk = json.loads(response.text) + if not release_list_chunk: + # past the last page of results + return + for e in release_list_chunk: + if e["tag_name"] in existing_release_tags: + continue + if e["prerelease"]: + continue + yield dict( + tag_name=e["tag_name"], + name=e["name"], + date=parse_date(e["tag_name"]), + tarball_url=e["tarball_url"], + ) + else: + response.raise_for_status() def filter_by_date(existing_release_tags, start_date=None, end_date=None): - release_list = get_model_list(existing_release_tags) - return [ - element - for element in release_list - if not ( - (end_date is not None and element["date"] > end_date) - or (start_date is not None and element["date"] < start_date) - ) - ] + ret = [] + for release in get_model_list(existing_release_tags): + if start_date and release["date"] < start_date: + break + if not end_date or release["date"] <= end_date: + ret.append(release) + + return ret def download_and_unpack(url, output_directory): @@ -84,7 +74,9 @@ def parse_date(d): - return datetime.datetime.strptime(d, "%Y-%m-%d") + # Tries to parse the first 10 chars of d as a date, which currently + # succeeds for all pangolearn model releases. + return datetime.datetime.strptime(d[:10], "%Y-%m-%d") if __name__ == "__main__": @@ -101,9 +93,9 @@ args = parser.parse_args() if args.testmode: - releases = filter_by_date(start_date=args.start_date, end_date=args.end_date) + releases = filter_by_date([], start_date=args.start_date, end_date=args.end_date) for release in releases: - print(release["tag_name"], release["tarball_url"].split("/")[-1]) + print(release["tag_name"], release["tarball_url"].split("/")[-1], release["date"]) sys.exit(0) with open(args.galaxy_datamanager_filename) as fh: @@ -129,7 +121,7 @@ else: existing_release_tags = set() if args.latest: - releases = [get_model_list(existing_release_tags)[0]] + releases = [next(get_model_list(existing_release_tags))] else: releases = filter_by_date( existing_release_tags, start_date=args.start_date, end_date=args.end_date @@ -140,22 +132,21 @@ if release["tag_name"] not in existing_release_tags ] for release in releases_to_download: - tag = download_and_unpack(release["tarball_url"], output_directory) - release_date = parse_date(tag) + fname = download_and_unpack(release["tarball_url"], output_directory) if args.pangolearn_format_version is not None: version = args.pangolearn_format_version else: # 2021-05-27 was the first release of pangoLEARN for pangolin 3, which changed DB format - if release_date >= datetime.datetime(2021, 5, 27): + if release["date"] >= datetime.datetime(2021, 5, 27): version = '3.0' else: version = '1.0' data_manager_dict["data_tables"][args.datatable_name].append( dict( - value=tag, + value=release["tag_name"], description=release["name"], format_version=version, - path=output_directory + "/" + tag, + path=output_directory + "/" + fname, ) ) data_manager_dict["data_tables"][args.datatable_name].sort( diff -r df30a2f1db55 -r 6e24e79d3d69 data_manager/pangolearn_dm.xml --- a/data_manager/pangolearn_dm.xml Mon Aug 30 13:44:01 2021 +0000 +++ b/data_manager/pangolearn_dm.xml Tue Apr 05 18:40:07 2022 +0000 @@ -1,4 +1,4 @@ - + python requests @@ -15,7 +15,7 @@ --end_date '$release.end_date' #end if #end if - 'pangolearn' + 'pangolearn' '${output_file}' ]]> @@ -52,7 +52,7 @@ - + @@ -64,8 +64,8 @@ - - + +