comparison astronomical_archives.py @ 2:7398ea3d9ac4 draft default tip

planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools/ commit b77ceb5085f378a8bef9b202b80e3ca7ef1e9a8e
author astroteam
date Tue, 26 Mar 2024 10:03:55 +0000
parents 667fc28d803c
children
comparison
equal deleted inserted replaced
1:667fc28d803c 2:7398ea3d9ac4
1 import errno 1 import errno
2 import functools 2 import functools
3 import json 3 import json
4 import os 4 import os
5 import re
5 import signal 6 import signal
6 import sys 7 import sys
7 import urllib 8 import urllib
8 from urllib import request 9 from urllib import request
9 10
14 from pyvo import registry 15 from pyvo import registry
15 16
16 17
17 MAX_ALLOWED_ENTRIES = 100 18 MAX_ALLOWED_ENTRIES = 100
18 MAX_REGISTRIES_TO_SEARCH = 100 19 MAX_REGISTRIES_TO_SEARCH = 100
20
21 ARCHIVES_TIMEOUT_BYPASS = [
22 "https://datalab.noirlab.edu/tap"
23 ]
19 24
20 25
21 class TimeoutException(Exception): 26 class TimeoutException(Exception):
22 pass 27 pass
23 28
215 220
216 def _set_archive_tables(self): 221 def _set_archive_tables(self):
217 222
218 self.tables = [] 223 self.tables = []
219 224
220 for table in self.archive_service.tables: 225 try:
221 archive_table = { 226 for table in self.archive_service.tables:
222 'name': table.name, 227 archive_table = {
223 'type': table.type, 228 'name': table.name,
224 'fields': None 229 'type': table.type,
225 } 230 'fields': None
226
227 fields = []
228
229 for table_field in table.columns:
230 field = {
231 'name': table_field.name,
232 'description': table_field.description,
233 'unit': table_field.unit,
234 'datatype': table_field.datatype.content
235 } 231 }
236 232
237 fields.append(field) 233 fields = []
238 234
239 archive_table['fields'] = fields 235 for table_field in table.columns:
240 236 field = {
241 self.tables.append(archive_table) 237 'name': table_field.name,
238 'description': table_field.description,
239 'unit': table_field.unit,
240 'datatype': table_field.datatype.content
241 }
242
243 fields.append(field)
244
245 archive_table['fields'] = fields
246
247 self.tables.append(archive_table)
248
249 # Exception is raised when a table schema is missing
250 # Missing table will be omitted so no action needed
251 except DALServiceError:
252 pass
242 253
243 def _is_query_valid(self, query) -> bool: 254 def _is_query_valid(self, query) -> bool:
244 is_valid = True 255 is_valid = True
245 256
246 attribute_from = 'from' 257 attribute_from = 'from'
504 self._service_access_url =\ 515 self._service_access_url =\
505 self._json_parameters['archive_selection']['archive'] 516 self._json_parameters['archive_selection']['archive']
506 517
507 self._archives.append( 518 self._archives.append(
508 TapArchive(access_url=self._service_access_url)) 519 TapArchive(access_url=self._service_access_url))
520
521 elif self._archive_type == 'custom':
522 self._service_access_url = \
523 self._json_parameters['archive_selection']['access_url']
524
525 if Utils.is_valid_url(self._service_access_url):
526 self._archives.append(
527 TapArchive(access_url=self._service_access_url))
528 else:
529 error_message = "archive access url is not a valid url"
530 Logger.create_action_log(
531 Logger.ACTION_ERROR,
532 Logger.ACTION_TYPE_ARCHIVE_CONNECTION,
533 error_message)
509 534
510 else: 535 else:
511 keyword = \ 536 keyword = \
512 self._json_parameters['archive_selection']['keyword'] 537 self._json_parameters['archive_selection']['keyword']
513 waveband = \ 538 waveband = \
750 archive_name = self._archives[0].get_archive_name( 775 archive_name = self._archives[0].get_archive_name(
751 self._archive_type) 776 self._archive_type)
752 777
753 for archive in self._archives: 778 for archive in self._archives:
754 try: 779 try:
780
781 if archive.access_url in ARCHIVES_TIMEOUT_BYPASS:
782 archive.get_resources = \
783 timeout(40)(TapArchive.get_resources.__get__(archive)) # noqa: E501
784
755 _file_url, error_message = archive.get_resources( 785 _file_url, error_message = archive.get_resources(
756 self._adql_query, 786 self._adql_query,
757 self._number_of_files, 787 self._number_of_files,
758 self._url_field) 788 self._url_field)
759 789
1248 @staticmethod 1278 @staticmethod
1249 def write_urls_to_output(urls: [], output, access_url="access_url"): 1279 def write_urls_to_output(urls: [], output, access_url="access_url"):
1250 with open(output, "w") as file_output: 1280 with open(output, "w") as file_output:
1251 for url in urls: 1281 for url in urls:
1252 try: 1282 try:
1253 file_output.write(url[access_url] + ',') 1283 file_output.write(str(url[access_url]) + ',')
1254 except Exception: 1284 except Exception:
1255 error_message = "url field not found for url" 1285 error_message = f"url field {access_url} not found for url"
1256 Logger.create_action_log( 1286 Logger.create_action_log(
1257 Logger.ACTION_ERROR, 1287 Logger.ACTION_ERROR,
1258 Logger.ACTION_TYPE_WRITE_URL, 1288 Logger.ACTION_TYPE_WRITE_URL,
1259 error_message) 1289 error_message)
1260 1290
1303 for key in resource.keys(): 1333 for key in resource.keys():
1304 if key not in resource_keys: 1334 if key not in resource_keys:
1305 resource_keys.append(key) 1335 resource_keys.append(key)
1306 return resource_keys 1336 return resource_keys
1307 1337
1338 @staticmethod
1339 def is_valid_url(url: str) -> bool:
1340 regex_url = re.compile(r'^https?://(?:[A-Za-z0-9-]+\.)+[A-Za-z]{2,6}(?::\d+)?(?:/[^\s]*)?$') # noqa: E501
1341 return re.match(regex_url, url) is not None
1342
1308 1343
1309 class Logger: 1344 class Logger:
1310 _logs = [] 1345 _logs = []
1311 1346
1312 ACTION_SUCCESS = 1 1347 ACTION_SUCCESS = 1