annotate data_manager/model_fetcher.py @ 0:11e42265a9b0 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
author iuc
date Thu, 20 Feb 2025 17:57:11 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
1 #!/usr/bin/env python3
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
2
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
3 import argparse
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
4 import json
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
5 import sys
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
6 import tarfile
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
7 from hashlib import sha256
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
8 from io import BytesIO, StringIO
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
9 from pathlib import Path
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
10 from urllib.error import HTTPError
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
11 from urllib.request import Request, urlopen
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
12
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
13 DATA_TABLE_NAME = 'clair3_models'
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
14
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
15
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
16 def find_latest_models():
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
17 # based on the README.rst of the rerio repository as of 7 January 2025
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
18 url = 'https://raw.githubusercontent.com/nanoporetech/rerio/refs/heads/master/README.rst'
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
19 httprequest = Request(url)
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
20 with urlopen(httprequest) as response:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
21 if response.status != 200:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
22 raise IOError(f'Failed to fetch the latest models: {response.status}')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
23 data = response.read().decode('utf-8')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
24 init_line_seen = False
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
25 latest_seen = False
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
26 config_line_seen = False
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
27 read_lines = False
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
28 models = []
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
29 # the file that we are parsing has a section that looks like this:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
30 # Clair3 Models
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
31 # -------------
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
32
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
33 # Clair3 models for the following configurations are available:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
34
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
35 # Latest:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
36
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
37 # ========================== =================== =======================
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
38 # Config Chemistry Dorado basecaller model
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
39 # ========================== =================== =======================
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
40 # r1041_e82_400bps_sup_v500 R10.4.1 E8.2 (5kHz) v5.0.0 SUP
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
41 # r1041_e82_400bps_hac_v500 R10.4.1 E8.2 (5kHz) v5.0.0 HAC
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
42 # r1041_e82_400bps_sup_v410 R10.4.1 E8.2 (4kHz) v4.1.0 SUP
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
43 # r1041_e82_400bps_hac_v410 R10.4.1 E8.2 (4kHz) v4.1.0 HAC
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
44 # ========================== =================== =======================
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
45 #
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
46 # and the aim is to extract the list of model names from the table by successfully looking for
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
47 # "Clair3 Models", then "Latest:", then "Config" and then "=====" and then reading the lines until
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
48 # the next "=====" is encountered
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
49 for line in StringIO(data):
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
50 if read_lines:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
51 if line.startswith('====='):
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
52 read_lines = False
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
53 break
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
54 model = line.split()[0]
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
55 models.append(model)
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
56 if config_line_seen and line.startswith('====='):
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
57 read_lines = True
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
58 continue
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
59 if init_line_seen and line.startswith('Latest:'):
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
60 latest_seen = True
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
61 continue
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
62 if latest_seen and line.startswith('Config'):
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
63 config_line_seen = True
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
64 continue
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
65 if line.startswith('Clair3 Models'):
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
66 init_line_seen = True
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
67 continue
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
68 return models
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
69
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
70
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
71 def fetch_model(model_name):
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
72 # the model files are tar gzipped, with a structure like:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
73 # model_name/pileup.index
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
74 # model_name/full_alignment.index
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
75 # and other files, with the key point being that the model_name becoomes the model_directory
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
76
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
77 url = f'https://raw.githubusercontent.com/nanoporetech/rerio/refs/heads/master/clair3_models/{model_name}_model'
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
78 httprequest = Request(url)
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
79 try:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
80 # urlopen throws a HTTPError if it gets a 404 status (and perhaps other non-200 status?)
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
81 with urlopen(httprequest) as response:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
82 if response.status != 200:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
83 raise IOError(f'Failed to fetch the model {model_name}: {response.status}')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
84 final_url = response.read().decode('utf-8').strip()
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
85 httprequest = Request(final_url)
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
86 except HTTPError as e:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
87 raise IOError(f'Failed to fetch the model {model_name}: {e}')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
88
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
89 with urlopen(httprequest) as response:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
90 if response.status != 200:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
91 raise IOError(f'Failed to fetch the model {model_name} from CDN URL {final_url}: {response.status}')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
92 data = response.read()
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
93 return data
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
94
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
95
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
96 def unpack_model(data, outdir):
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
97 with tarfile.open(fileobj=BytesIO(data), mode='r:*') as tar:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
98 tar.extractall(outdir)
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
99
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
100
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
101 if __name__ == '__main__':
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
102 parser = argparse.ArgumentParser()
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
103 parser.add_argument('dm_filename', type=str, help='The filename of the data manager file to read parameters from and write outputs to')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
104 parser.add_argument('--known_models', type=str, help='List of models already known in the Galaxy data table')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
105 parser.add_argument('--sha256_sums', type=str, help='List of sha256sums of the models already known in the Galaxy data table')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
106 parser.add_argument('--download_latest', action='store_true', default=False, help='Download the latest models as per the Rerio repository')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
107 parser.add_argument('--download_models', type=str, help='Comma separated list of models to download')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
108 args = parser.parse_args()
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
109
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
110 # parameters to a data manager are passed in a JSON file (see https://docs.galaxyproject.org/en/latest/dev/data_managers.html) and
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
111 # similarily a JSON file is created to pass the output back to Galaxy
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
112 models = []
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
113 if args.download_latest:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
114 models.extend(find_latest_models())
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
115 if args.download_models:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
116 models.extend(args.download_models.split(','))
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
117
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
118 if not models:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
119 sys.exit('No models to download, please specify either --download_latest or --download_models')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
120
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
121 with open(args.dm_filename) as fh:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
122 config = json.load(fh)
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
123 if 'extra_files_path' not in config.get('output_data', [{}])[0]:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
124 sys.exit('Please specify the output directory in the data manager configuration (the extra_files_path)')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
125 output_directory = config["output_data"][0]["extra_files_path"]
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
126 if not Path(output_directory).exists():
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
127 Path(output_directory).mkdir(parents=True)
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
128
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
129 data_manager_dict = {}
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
130 data_manager_dict["data_tables"] = config.get("data_tables", {})
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
131 data_manager_dict["data_tables"][DATA_TABLE_NAME] = []
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
132
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
133 known_models = set(args.known_models.split(',')) if args.known_models else set()
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
134 model_to_sha256 = {}
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
135 if args.known_models:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
136 sha256_sums = args.sha256_sums.split(',')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
137 for (i, model) in enumerate(known_models):
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
138 model_to_sha256[model] = sha256_sums[i]
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
139
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
140 for model in models:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
141 model_dir = Path(output_directory) / model
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
142 # The data table cannot handle duplicate entries, so we skip models that are already in the data table
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
143 if model in known_models:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
144 print(f'Model {model} already exists, skipping', file=sys.stderr)
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
145 continue
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
146 data = fetch_model(model)
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
147 sha256sum = sha256(data).hexdigest()
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
148
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
149 # Since we skip models that are already known we cannot test the sha256sum here. This code is retained to illustrate that an
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
150 # alternative logic would be to download the model each time and check if the sha256sum matches what is already known. Hopefully
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
151 # ONT does not update the models while keeping the same name, so this is not needed. The sha256sum is stored in the data table
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
152 # in case it is needed in the future.
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
153 # if model in model_to_sha256 and sha256sum != model_to_sha256[model]:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
154 # sys.exit(f'Model {model} already exists with a different sha256sum {model_to_sha256[model]}. This is a serious error, inform the Galaxy admin')
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
155
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
156 unpack_model(data, output_directory)
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
157
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
158 data_manager_dict["data_tables"][DATA_TABLE_NAME].append(
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
159 dict(
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
160 value=model,
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
161 platform="ont",
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
162 sha256=sha256sum,
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
163 path=str(model_dir),
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
164 source="rerio"
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
165 )
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
166 )
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
167
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
168 with open(args.dm_filename, 'w') as fh:
11e42265a9b0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_clair3_models commit 2672414472cc968c736dc7d42f5a119ff8c16c62
iuc
parents:
diff changeset
169 json.dump(data_manager_dict, fh, sort_keys=True, indent=4)