Mercurial > repos > astroteam > astronomical_archives
comparison astronomical_archives.py @ 2:7398ea3d9ac4 draft default tip
planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools/ commit b77ceb5085f378a8bef9b202b80e3ca7ef1e9a8e
author | astroteam |
---|---|
date | Tue, 26 Mar 2024 10:03:55 +0000 |
parents | 667fc28d803c |
children |
comparison
equal
deleted
inserted
replaced
1:667fc28d803c | 2:7398ea3d9ac4 |
---|---|
1 import errno | 1 import errno |
2 import functools | 2 import functools |
3 import json | 3 import json |
4 import os | 4 import os |
5 import re | |
5 import signal | 6 import signal |
6 import sys | 7 import sys |
7 import urllib | 8 import urllib |
8 from urllib import request | 9 from urllib import request |
9 | 10 |
14 from pyvo import registry | 15 from pyvo import registry |
15 | 16 |
16 | 17 |
17 MAX_ALLOWED_ENTRIES = 100 | 18 MAX_ALLOWED_ENTRIES = 100 |
18 MAX_REGISTRIES_TO_SEARCH = 100 | 19 MAX_REGISTRIES_TO_SEARCH = 100 |
20 | |
21 ARCHIVES_TIMEOUT_BYPASS = [ | |
22 "https://datalab.noirlab.edu/tap" | |
23 ] | |
19 | 24 |
20 | 25 |
21 class TimeoutException(Exception): | 26 class TimeoutException(Exception): |
22 pass | 27 pass |
23 | 28 |
215 | 220 |
216 def _set_archive_tables(self): | 221 def _set_archive_tables(self): |
217 | 222 |
218 self.tables = [] | 223 self.tables = [] |
219 | 224 |
220 for table in self.archive_service.tables: | 225 try: |
221 archive_table = { | 226 for table in self.archive_service.tables: |
222 'name': table.name, | 227 archive_table = { |
223 'type': table.type, | 228 'name': table.name, |
224 'fields': None | 229 'type': table.type, |
225 } | 230 'fields': None |
226 | |
227 fields = [] | |
228 | |
229 for table_field in table.columns: | |
230 field = { | |
231 'name': table_field.name, | |
232 'description': table_field.description, | |
233 'unit': table_field.unit, | |
234 'datatype': table_field.datatype.content | |
235 } | 231 } |
236 | 232 |
237 fields.append(field) | 233 fields = [] |
238 | 234 |
239 archive_table['fields'] = fields | 235 for table_field in table.columns: |
240 | 236 field = { |
241 self.tables.append(archive_table) | 237 'name': table_field.name, |
238 'description': table_field.description, | |
239 'unit': table_field.unit, | |
240 'datatype': table_field.datatype.content | |
241 } | |
242 | |
243 fields.append(field) | |
244 | |
245 archive_table['fields'] = fields | |
246 | |
247 self.tables.append(archive_table) | |
248 | |
249 # Exception is raised when a table schema is missing | |
250 # Missing table will be omitted so no action needed | |
251 except DALServiceError: | |
252 pass | |
242 | 253 |
243 def _is_query_valid(self, query) -> bool: | 254 def _is_query_valid(self, query) -> bool: |
244 is_valid = True | 255 is_valid = True |
245 | 256 |
246 attribute_from = 'from' | 257 attribute_from = 'from' |
504 self._service_access_url =\ | 515 self._service_access_url =\ |
505 self._json_parameters['archive_selection']['archive'] | 516 self._json_parameters['archive_selection']['archive'] |
506 | 517 |
507 self._archives.append( | 518 self._archives.append( |
508 TapArchive(access_url=self._service_access_url)) | 519 TapArchive(access_url=self._service_access_url)) |
520 | |
521 elif self._archive_type == 'custom': | |
522 self._service_access_url = \ | |
523 self._json_parameters['archive_selection']['access_url'] | |
524 | |
525 if Utils.is_valid_url(self._service_access_url): | |
526 self._archives.append( | |
527 TapArchive(access_url=self._service_access_url)) | |
528 else: | |
529 error_message = "archive access url is not a valid url" | |
530 Logger.create_action_log( | |
531 Logger.ACTION_ERROR, | |
532 Logger.ACTION_TYPE_ARCHIVE_CONNECTION, | |
533 error_message) | |
509 | 534 |
510 else: | 535 else: |
511 keyword = \ | 536 keyword = \ |
512 self._json_parameters['archive_selection']['keyword'] | 537 self._json_parameters['archive_selection']['keyword'] |
513 waveband = \ | 538 waveband = \ |
750 archive_name = self._archives[0].get_archive_name( | 775 archive_name = self._archives[0].get_archive_name( |
751 self._archive_type) | 776 self._archive_type) |
752 | 777 |
753 for archive in self._archives: | 778 for archive in self._archives: |
754 try: | 779 try: |
780 | |
781 if archive.access_url in ARCHIVES_TIMEOUT_BYPASS: | |
782 archive.get_resources = \ | |
783 timeout(40)(TapArchive.get_resources.__get__(archive)) # noqa: E501 | |
784 | |
755 _file_url, error_message = archive.get_resources( | 785 _file_url, error_message = archive.get_resources( |
756 self._adql_query, | 786 self._adql_query, |
757 self._number_of_files, | 787 self._number_of_files, |
758 self._url_field) | 788 self._url_field) |
759 | 789 |
1248 @staticmethod | 1278 @staticmethod |
1249 def write_urls_to_output(urls: [], output, access_url="access_url"): | 1279 def write_urls_to_output(urls: [], output, access_url="access_url"): |
1250 with open(output, "w") as file_output: | 1280 with open(output, "w") as file_output: |
1251 for url in urls: | 1281 for url in urls: |
1252 try: | 1282 try: |
1253 file_output.write(url[access_url] + ',') | 1283 file_output.write(str(url[access_url]) + ',') |
1254 except Exception: | 1284 except Exception: |
1255 error_message = "url field not found for url" | 1285 error_message = f"url field {access_url} not found for url" |
1256 Logger.create_action_log( | 1286 Logger.create_action_log( |
1257 Logger.ACTION_ERROR, | 1287 Logger.ACTION_ERROR, |
1258 Logger.ACTION_TYPE_WRITE_URL, | 1288 Logger.ACTION_TYPE_WRITE_URL, |
1259 error_message) | 1289 error_message) |
1260 | 1290 |
1303 for key in resource.keys(): | 1333 for key in resource.keys(): |
1304 if key not in resource_keys: | 1334 if key not in resource_keys: |
1305 resource_keys.append(key) | 1335 resource_keys.append(key) |
1306 return resource_keys | 1336 return resource_keys |
1307 | 1337 |
1338 @staticmethod | |
1339 def is_valid_url(url: str) -> bool: | |
1340 regex_url = re.compile(r'^https?://(?:[A-Za-z0-9-]+\.)+[A-Za-z]{2,6}(?::\d+)?(?:/[^\s]*)?$') # noqa: E501 | |
1341 return re.match(regex_url, url) is not None | |
1342 | |
1308 | 1343 |
1309 class Logger: | 1344 class Logger: |
1310 _logs = [] | 1345 _logs = [] |
1311 | 1346 |
1312 ACTION_SUCCESS = 1 | 1347 ACTION_SUCCESS = 1 |