Mercurial > repos > astroteam > astronomical_archives
comparison astronomical_archives.py @ 0:0ddfc343f9f9 draft
planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools/ commit d68858614f92df46c58724928d918e989d916db0
| author | astroteam | 
|---|---|
| date | Mon, 04 Sep 2023 14:20:34 +0000 | 
| parents | |
| children | 667fc28d803c | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:0ddfc343f9f9 | 
|---|---|
| 1 import json | |
| 2 import os | |
| 3 import sys | |
| 4 import urllib | |
| 5 from urllib import request | |
| 6 | |
| 7 import pyvo | |
| 8 from pyvo import DALAccessError, DALQueryError, DALServiceError | |
| 9 from pyvo import registry | |
| 10 | |
| 11 MAX_ALLOWED_ENTRIES = 100 | |
| 12 MAX_REGISTRIES_TO_SEARCH = 100 | |
| 13 | |
| 14 | |
| 15 class Service: | |
| 16 # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Servicetype.html | |
| 17 | |
| 18 services = { | |
| 19 'TAP': 'tap', | |
| 20 'SIA': 'sia', | |
| 21 'SIA2': 'sia2', | |
| 22 'SPECTRUM': 'spectrum', | |
| 23 'SCS': 'scs', | |
| 24 'LINE': 'line' | |
| 25 } | |
| 26 | |
| 27 supported_services = { | |
| 28 'TAP': 'tap' | |
| 29 } | |
| 30 | |
| 31 def __init__(self): | |
| 32 pass | |
| 33 | |
| 34 @staticmethod | |
| 35 def is_service_supported(service_type) -> bool: | |
| 36 is_supported = True | |
| 37 | |
| 38 if service_type not in Service.services.keys(): | |
| 39 is_supported = False | |
| 40 elif service_type not in Service.supported_services.keys(): | |
| 41 is_supported = False | |
| 42 | |
| 43 return is_supported | |
| 44 | |
| 45 | |
| 46 class Waveband: | |
| 47 # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Waveband.html | |
| 48 # https://www.ivoa.net/rdf/messenger/2020-08-26/messenger.html | |
| 49 | |
| 50 wavebands = { | |
| 51 'Extreme UV': 'EUV', | |
| 52 'Gamma ray': 'Gamma-ray', | |
| 53 'Infrared': 'Infrared', | |
| 54 'Millimeter': 'Millimeter', | |
| 55 'Neutrino': 'Neutrino', | |
| 56 'Optical': 'Optical', | |
| 57 'Photon': 'Photon', | |
| 58 'Radio': 'Radio', | |
| 59 'Ultra violet': 'UV', | |
| 60 'X-ray': 'X-ray' | |
| 61 } | |
| 62 | |
| 63 def __init__(self): | |
| 64 pass | |
| 65 | |
| 66 @staticmethod | |
| 67 def is_waveband_supported(waveband) -> bool: | |
| 68 is_supported = True | |
| 69 | |
| 70 if waveband not in Waveband.wavebands.keys(): | |
| 71 is_supported = False | |
| 72 | |
| 73 return is_supported | |
| 74 | |
| 75 | |
| 76 class TapArchive: | |
| 77 # https://www.ivoa.net/documents/ObsCore/20170509/REC-ObsCore-v1.1-20170509 | |
| 78 | |
| 79 service_type = Service.services['TAP'] | |
| 80 | |
| 81 def __init__(self, | |
| 82 id=1, | |
| 83 title="Unknown title", | |
| 84 name="Unknown name", | |
| 85 access_url=""): | |
| 86 | |
| 87 self.id = id, | |
| 88 self.title = title, | |
| 89 self.name = name, | |
| 90 self.access_url = access_url | |
| 91 self.initialized = False | |
| 92 self.archive_service = None | |
| 93 self.tables = None | |
| 94 | |
| 95 def get_resources(self, | |
| 96 query, | |
| 97 number_of_results, | |
| 98 url_field='access_url'): | |
| 99 | |
| 100 resource_list_hydrated = [] | |
| 101 | |
| 102 error_message = None | |
| 103 | |
| 104 if self.initialized: | |
| 105 | |
| 106 try: | |
| 107 raw_resource_list = self.archive_service.search(query) | |
| 108 | |
| 109 for i, resource in enumerate(raw_resource_list): | |
| 110 if i < number_of_results: | |
| 111 resource_list_hydrated.append( | |
| 112 self._get_resource_object(resource)) | |
| 113 else: | |
| 114 break | |
| 115 | |
| 116 except DALQueryError: | |
| 117 if self.has_obscore_table(): | |
| 118 error_message = "Error in query -> " + query | |
| 119 Logger.create_action_log( | |
| 120 Logger.ACTION_ERROR, | |
| 121 Logger.ACTION_TYPE_DOWNLOAD, | |
| 122 error_message) | |
| 123 else: | |
| 124 error_message = "No obscore table in the archive" | |
| 125 Logger.create_action_log( | |
| 126 Logger.ACTION_ERROR, | |
| 127 Logger.ACTION_TYPE_DOWNLOAD, | |
| 128 error_message) | |
| 129 | |
| 130 except DALServiceError: | |
| 131 error_message = "Error communicating with the service" | |
| 132 Logger.create_action_log( | |
| 133 Logger.ACTION_ERROR, | |
| 134 Logger.ACTION_TYPE_DOWNLOAD, | |
| 135 error_message) | |
| 136 | |
| 137 except Exception: | |
| 138 error_message = "Unknow error while querying the service" | |
| 139 Logger.create_action_log( | |
| 140 Logger.ACTION_ERROR, | |
| 141 Logger.ACTION_TYPE_DOWNLOAD, | |
| 142 error_message) | |
| 143 | |
| 144 return resource_list_hydrated, error_message | |
| 145 | |
| 146 def _get_resource_object(self, resource): | |
| 147 resource_hydrated = {} | |
| 148 | |
| 149 for key, value in resource.items(): | |
| 150 resource_hydrated[key] = value | |
| 151 | |
| 152 return resource_hydrated | |
| 153 | |
| 154 def initialize(self): | |
| 155 error_message = None | |
| 156 | |
| 157 try: | |
| 158 self._get_service() | |
| 159 | |
| 160 if self.archive_service: | |
| 161 self._set_archive_tables() | |
| 162 self.initialized = True | |
| 163 | |
| 164 except DALAccessError: | |
| 165 error_message = \ | |
| 166 "A connection to the service could not be established" | |
| 167 Logger.create_action_log( | |
| 168 Logger.ACTION_ERROR, | |
| 169 Logger.ACTION_TYPE_ARCHIVE_CONNECTION, | |
| 170 error_message) | |
| 171 | |
| 172 except Exception: | |
| 173 error_message = "Unknow error while initializing TAP service" | |
| 174 Logger.create_action_log( | |
| 175 Logger.ACTION_ERROR, | |
| 176 Logger.ACTION_TYPE_ARCHIVE_CONNECTION, | |
| 177 error_message) | |
| 178 | |
| 179 return self.initialized, error_message | |
| 180 | |
| 181 def _get_service(self): | |
| 182 if self.access_url: | |
| 183 self.archive_service = pyvo.dal.TAPService(self.access_url) | |
| 184 | |
| 185 def _set_archive_tables(self): | |
| 186 | |
| 187 self.tables = [] | |
| 188 | |
| 189 for table in self.archive_service.tables: | |
| 190 archive_table = { | |
| 191 'name': table.name, | |
| 192 'type': table.type, | |
| 193 'fields': None | |
| 194 } | |
| 195 | |
| 196 fields = [] | |
| 197 | |
| 198 for table_field in table.columns: | |
| 199 field = { | |
| 200 'name': table_field.name, | |
| 201 'description': table_field.description, | |
| 202 'unit': table_field.unit, | |
| 203 'datatype': table_field.datatype.content | |
| 204 } | |
| 205 | |
| 206 fields.append(field) | |
| 207 | |
| 208 archive_table['fields'] = fields | |
| 209 | |
| 210 self.tables.append(archive_table) | |
| 211 | |
| 212 def _is_query_valid(self, query) -> bool: | |
| 213 is_valid = True | |
| 214 | |
| 215 attribute_from = 'from' | |
| 216 attribute_where = 'where' | |
| 217 | |
| 218 idx_from = query.index(attribute_from) | |
| 219 idx_where = query.index(attribute_where) | |
| 220 | |
| 221 table_name = '' | |
| 222 | |
| 223 for idx in range(idx_from + len('from') + 1, idx_where): | |
| 224 table_name = table_name + query[idx] | |
| 225 | |
| 226 if not next( | |
| 227 (item for item in self.tables if | |
| 228 item["name"] == table_name), | |
| 229 False): | |
| 230 | |
| 231 is_valid = False | |
| 232 | |
| 233 return is_valid | |
| 234 | |
| 235 def has_obscore_table(self) -> bool: | |
| 236 has_obscore_table = self._has_table("ivoa.obscore") | |
| 237 | |
| 238 return has_obscore_table | |
| 239 | |
| 240 def _has_table(self, table_name) -> bool: | |
| 241 _has_table = False | |
| 242 | |
| 243 _has_table = next( | |
| 244 (item for item in self.tables if item["name"] == table_name), | |
| 245 False) | |
| 246 | |
| 247 return _has_table | |
| 248 | |
| 249 def get_archive_name(self, archive_type): | |
| 250 try: | |
| 251 if archive_type == 'registry': | |
| 252 name = str(self.title).strip("',()") | |
| 253 else: | |
| 254 name = self.access_url | |
| 255 except Exception: | |
| 256 name = 'Unknown archive title' | |
| 257 | |
| 258 return name | |
| 259 | |
| 260 | |
| 261 class RegistrySearchParameters: | |
| 262 | |
| 263 def __init__(self, keyword=None, waveband=None, service_type=None): | |
| 264 self.keyword = keyword | |
| 265 self.waveband = waveband | |
| 266 self.service_type = service_type | |
| 267 | |
| 268 def get_parameters(self): | |
| 269 | |
| 270 parameters = { | |
| 271 'keywords': '', | |
| 272 'waveband': '', | |
| 273 'service_type': '' | |
| 274 } | |
| 275 | |
| 276 if self.keyword: | |
| 277 parameters['keywords'] = self.keyword | |
| 278 | |
| 279 if Waveband.is_waveband_supported(self.waveband): | |
| 280 parameters['waveband'] = \ | |
| 281 Waveband.wavebands[self.waveband] | |
| 282 | |
| 283 if Service.is_service_supported(self.service_type): | |
| 284 parameters['service_type'] = \ | |
| 285 Service.services[self.service_type] | |
| 286 else: | |
| 287 parameters['service_type'] = Service.services['TAP'] | |
| 288 | |
| 289 return parameters | |
| 290 | |
| 291 | |
| 292 class Registry: | |
| 293 | |
| 294 def __init__(self): | |
| 295 pass | |
| 296 | |
| 297 @staticmethod | |
| 298 def search_registries(rsp: RegistrySearchParameters, | |
| 299 number_of_registries): | |
| 300 | |
| 301 parameters = rsp.get_parameters() | |
| 302 | |
| 303 keywords = parameters['keywords'] | |
| 304 waveband = parameters['waveband'] | |
| 305 service_type = parameters['service_type'] | |
| 306 | |
| 307 if not waveband: | |
| 308 registry_list = registry.search( | |
| 309 keywords=keywords, | |
| 310 servicetype=service_type) | |
| 311 else: | |
| 312 registry_list = registry.search( | |
| 313 keywords=keywords, | |
| 314 waveband=waveband, | |
| 315 servicetype=service_type) | |
| 316 | |
| 317 if registry_list: | |
| 318 registry_list = Registry._get_registries_from_list( | |
| 319 registry_list, | |
| 320 number_of_registries) | |
| 321 | |
| 322 return registry_list | |
| 323 | |
| 324 @staticmethod | |
| 325 def _get_registries_from_list(registry_list, number_of_registries): | |
| 326 | |
| 327 archive_list = [] | |
| 328 | |
| 329 for i, ivoa_registry in enumerate(registry_list): | |
| 330 if i < number_of_registries: | |
| 331 archive = TapArchive(ivoa_registry.standard_id, | |
| 332 ivoa_registry.res_title, | |
| 333 ivoa_registry.short_name, | |
| 334 ivoa_registry.access_url) | |
| 335 | |
| 336 archive_list.append(archive) | |
| 337 | |
| 338 return archive_list | |
| 339 | |
| 340 | |
| 341 class TapQuery: | |
| 342 | |
| 343 def __init__(self, query): | |
| 344 self.raw_query = query | |
| 345 | |
| 346 def get_query(self): | |
| 347 return urllib.parse.unquote(self.raw_query).replace("+", " ") | |
| 348 | |
| 349 | |
| 350 class BaseADQLQuery: | |
| 351 | |
| 352 def __init__(self): | |
| 353 pass | |
| 354 | |
| 355 def _get_order_by_clause(self, order_type): | |
| 356 order_by_clause = 'ORDER BY ' + order_type | |
| 357 | |
| 358 return order_by_clause | |
| 359 | |
| 360 def _get_where_clause(self, parameters): | |
| 361 where_clause = '' | |
| 362 is_first_statement = True | |
| 363 | |
| 364 for key, value in parameters.items(): | |
| 365 | |
| 366 if value != '': | |
| 367 statement = str(key) + ' = ' + '\'' + str(value) + '\' ' | |
| 368 | |
| 369 if is_first_statement: | |
| 370 is_first_statement = False | |
| 371 where_clause += 'WHERE ' | |
| 372 else: | |
| 373 statement = 'AND ' + statement | |
| 374 | |
| 375 where_clause += statement | |
| 376 | |
| 377 return where_clause | |
| 378 | |
| 379 | |
| 380 class ToolRunner: | |
| 381 | |
| 382 def __init__(self, | |
| 383 run_parameters, | |
| 384 output, | |
| 385 output_csv, | |
| 386 output_html, | |
| 387 output_basic_html, | |
| 388 output_error): | |
| 389 | |
| 390 self._raw_parameters_path = run_parameters | |
| 391 self._json_parameters = json.load(open(run_parameters, "r")) | |
| 392 self._archive_type = '' | |
| 393 self._query_type = '' | |
| 394 self._archives = [] | |
| 395 self._adql_query = '' | |
| 396 self._services_access_url = '' | |
| 397 self._url_field = 'access_url' | |
| 398 self._number_of_files = '' | |
| 399 self._is_initialised = False | |
| 400 | |
| 401 self._csv_file = False | |
| 402 self._image_file = False | |
| 403 self._html_file = False | |
| 404 self._basic_html_file = False | |
| 405 | |
| 406 self._output = output | |
| 407 self._output_csv = output_csv | |
| 408 self._output_html = output_html | |
| 409 self._output_basic_html = output_basic_html | |
| 410 self._output_error = output_error | |
| 411 | |
| 412 self._set_run_main_parameters() | |
| 413 self._is_initialised, error_message = self._set_archive() | |
| 414 | |
| 415 if self._is_initialised and error_message is None: | |
| 416 self._set_query() | |
| 417 self._set_output() | |
| 418 | |
| 419 def _set_run_main_parameters(self): | |
| 420 | |
| 421 qs = "query_section" | |
| 422 qsl = "query_selection" | |
| 423 | |
| 424 self._archive_type = \ | |
| 425 self._json_parameters['archive_selection']['archive_type'] | |
| 426 self._query_type = \ | |
| 427 self._json_parameters[qs][qsl]['query_type'] | |
| 428 | |
| 429 def _set_archive(self): | |
| 430 | |
| 431 error_message = None | |
| 432 | |
| 433 if self._archive_type == 'archive': | |
| 434 self._service_access_url =\ | |
| 435 self._json_parameters['archive_selection']['archive'] | |
| 436 | |
| 437 self._archives.append( | |
| 438 TapArchive(access_url=self._service_access_url)) | |
| 439 | |
| 440 else: | |
| 441 keyword = \ | |
| 442 self._json_parameters['archive_selection']['keyword'] | |
| 443 waveband = \ | |
| 444 self._json_parameters['archive_selection']['wavebands'] | |
| 445 service_type = \ | |
| 446 self._json_parameters['archive_selection']['service_type'] | |
| 447 | |
| 448 rsp = RegistrySearchParameters( | |
| 449 keyword=keyword, | |
| 450 waveband=waveband, | |
| 451 service_type=service_type) | |
| 452 | |
| 453 archive_list = Registry.search_registries( | |
| 454 rsp, | |
| 455 MAX_REGISTRIES_TO_SEARCH) | |
| 456 | |
| 457 if len(archive_list) >= 1: | |
| 458 self._archives = archive_list | |
| 459 else: | |
| 460 error_message = "no archive matching search parameters" | |
| 461 Logger.create_action_log( | |
| 462 Logger.ACTION_ERROR, | |
| 463 Logger.ACTION_TYPE_ARCHIVE_CONNECTION, | |
| 464 error_message) | |
| 465 | |
| 466 if error_message is None: | |
| 467 | |
| 468 self._archives[:] = \ | |
| 469 [archive for archive in self._archives if | |
| 470 archive.initialize()[0]] | |
| 471 | |
| 472 if len(self._archives) >= 1: | |
| 473 return True, None | |
| 474 else: | |
| 475 return False, \ | |
| 476 "no archive matching search" \ | |
| 477 " parameters could be initialized" | |
| 478 | |
| 479 else: | |
| 480 return False, error_message | |
| 481 | |
| 482 def _set_query(self): | |
| 483 | |
| 484 qs = 'query_section' | |
| 485 qsl = 'query_selection' | |
| 486 | |
| 487 if self._query_type == 'obscore_query': | |
| 488 | |
| 489 dataproduct_type = \ | |
| 490 self._json_parameters[qs][qsl]['dataproduct_type'] | |
| 491 obs_collection = \ | |
| 492 self._json_parameters[qs][qsl]['obs_collection'] | |
| 493 obs_title = \ | |
| 494 self._json_parameters[qs][qsl]['obs_title'] | |
| 495 obs_id = \ | |
| 496 self._json_parameters[qs][qsl]['obs_id'] | |
| 497 facility_name = \ | |
| 498 self._json_parameters[qs][qsl]['facility_name'] | |
| 499 instrument_name = \ | |
| 500 self._json_parameters[qs][qsl]['instrument_name'] | |
| 501 em_min = \ | |
| 502 self._json_parameters[qs][qsl]['em_min'] | |
| 503 em_max = \ | |
| 504 self._json_parameters[qs][qsl]['em_max'] | |
| 505 target_name = \ | |
| 506 self._json_parameters[qs][qsl]['target_name'] | |
| 507 obs_publisher_id = \ | |
| 508 self._json_parameters[qs][qsl]['obs_publisher_id'] | |
| 509 s_fov = \ | |
| 510 self._json_parameters[qs][qsl]['s_fov'] | |
| 511 calibration_level = \ | |
| 512 self._json_parameters[qs][qsl]['calibration_level'] | |
| 513 t_min = \ | |
| 514 self._json_parameters[qs][qsl]['t_min'] | |
| 515 t_max = \ | |
| 516 self._json_parameters[qs][qsl]['t_max'] | |
| 517 order_by = \ | |
| 518 self._json_parameters[qs][qsl]['order_by'] | |
| 519 | |
| 520 obscore_query_object = ADQLObscoreQuery(dataproduct_type, | |
| 521 obs_collection, | |
| 522 obs_title, | |
| 523 obs_id, | |
| 524 facility_name, | |
| 525 instrument_name, | |
| 526 em_min, | |
| 527 em_max, | |
| 528 target_name, | |
| 529 obs_publisher_id, | |
| 530 s_fov, | |
| 531 calibration_level, | |
| 532 t_min, | |
| 533 t_max, | |
| 534 order_by) | |
| 535 | |
| 536 self._adql_query = obscore_query_object.get_query() | |
| 537 | |
| 538 elif self._query_type == 'raw_query': | |
| 539 | |
| 540 wc = 'where_clause' | |
| 541 | |
| 542 tap_table = \ | |
| 543 self._json_parameters[qs][qsl]['table'] | |
| 544 | |
| 545 where_field = \ | |
| 546 self._json_parameters[qs][qsl][wc]['where_field'] | |
| 547 where_condition = \ | |
| 548 self._json_parameters[qs][qsl][wc]['where_condition'] | |
| 549 | |
| 550 self._url_field = \ | |
| 551 self._json_parameters[qs][qsl]['url_field'] | |
| 552 | |
| 553 self._adql_query = \ | |
| 554 ADQLTapQuery().get_query( | |
| 555 tap_table, | |
| 556 where_field, | |
| 557 where_condition) | |
| 558 else: | |
| 559 self._adql_query = ADQLObscoreQuery.base_query | |
| 560 | |
| 561 def _set_output(self): | |
| 562 self._number_of_files = \ | |
| 563 int( | |
| 564 self._json_parameters['output_section']['number_of_files'] | |
| 565 ) | |
| 566 | |
| 567 if self._number_of_files < 1: | |
| 568 self._number_of_files = 1 | |
| 569 elif self._number_of_files > 100: | |
| 570 self._number_of_files = MAX_ALLOWED_ENTRIES | |
| 571 | |
| 572 output_selection = \ | |
| 573 self._json_parameters['output_section']['output_selection'] | |
| 574 | |
| 575 if output_selection is not None: | |
| 576 if 'c' in output_selection: | |
| 577 self._csv_file = True | |
| 578 if 'i' in output_selection: | |
| 579 self._image_file = True | |
| 580 if 'h' in output_selection: | |
| 581 self._html_file = True | |
| 582 if 'b' in output_selection: | |
| 583 self._basic_html_file = True | |
| 584 | |
| 585 def _validate_json_parameters(self, json_parameters): | |
| 586 self._json_parameters = json.load(open(json_parameters, "r")) | |
| 587 | |
| 588 def run(self): | |
| 589 if self._is_initialised: | |
| 590 error_message = None | |
| 591 file_url = [] | |
| 592 | |
| 593 archive_name = self._archives[0].get_archive_name( | |
| 594 self._archive_type) | |
| 595 | |
| 596 for archive in self._archives: | |
| 597 _file_url, error_message = archive.get_resources( | |
| 598 self._adql_query, | |
| 599 self._number_of_files, | |
| 600 self._url_field) | |
| 601 | |
| 602 file_url.extend(_file_url) | |
| 603 | |
| 604 if len(file_url) >= int(self._number_of_files): | |
| 605 file_url = file_url[:int(self._number_of_files)] | |
| 606 break | |
| 607 | |
| 608 if file_url: | |
| 609 | |
| 610 if self._csv_file: | |
| 611 FileHandler.write_urls_to_output( | |
| 612 file_url, | |
| 613 self._output_csv, | |
| 614 self._url_field) | |
| 615 | |
| 616 if self._image_file: | |
| 617 | |
| 618 try: | |
| 619 fits_file = FileHandler.download_file_from_url( | |
| 620 file_url[0][self._url_field]) | |
| 621 | |
| 622 FileHandler.write_file_to_output( | |
| 623 fits_file, | |
| 624 self._output, "wb") | |
| 625 | |
| 626 log_message = "from url " +\ | |
| 627 file_url[0][self._url_field] | |
| 628 | |
| 629 Logger.create_action_log( | |
| 630 Logger.ACTION_SUCCESS, | |
| 631 Logger.ACTION_TYPE_DOWNLOAD, | |
| 632 log_message) | |
| 633 | |
| 634 except Exception: | |
| 635 error_message = "from url " + \ | |
| 636 file_url[0][self._url_field] | |
| 637 | |
| 638 Logger.create_action_log( | |
| 639 Logger.ACTION_ERROR, | |
| 640 Logger.ACTION_TYPE_DOWNLOAD, | |
| 641 error_message) | |
| 642 | |
| 643 for i, url in enumerate(file_url[1:], start=1): | |
| 644 try: | |
| 645 fits_file = \ | |
| 646 FileHandler.download_file_from_url( | |
| 647 url[self._url_field]) | |
| 648 | |
| 649 FileHandler.write_file_to_subdir( | |
| 650 fits_file, | |
| 651 FileHandler.get_file_name_from_url( | |
| 652 url[self._url_field])) | |
| 653 | |
| 654 log_message = "from url " + \ | |
| 655 url[self._url_field] | |
| 656 | |
| 657 Logger.create_action_log( | |
| 658 Logger.ACTION_SUCCESS, | |
| 659 Logger.ACTION_TYPE_DOWNLOAD, | |
| 660 log_message) | |
| 661 | |
| 662 except Exception: | |
| 663 error_message = "from url " + \ | |
| 664 url[self._url_field] | |
| 665 | |
| 666 Logger.create_action_log( | |
| 667 Logger.ACTION_ERROR, | |
| 668 Logger.ACTION_TYPE_DOWNLOAD, | |
| 669 error_message) | |
| 670 | |
| 671 if self._html_file: | |
| 672 html_file = OutputHandler.generate_html_output( | |
| 673 file_url, | |
| 674 archive_name, | |
| 675 self._adql_query) | |
| 676 | |
| 677 FileHandler.write_file_to_output(html_file, | |
| 678 self._output_html) | |
| 679 | |
| 680 if self._basic_html_file: | |
| 681 html_file = \ | |
| 682 OutputHandler.generate_basic_html_output( | |
| 683 file_url, | |
| 684 archive_name, | |
| 685 self._adql_query) | |
| 686 | |
| 687 FileHandler.write_file_to_output( | |
| 688 html_file, | |
| 689 self._output_basic_html) | |
| 690 | |
| 691 summary_file = Logger.create_log_file(archive_name, | |
| 692 self._adql_query) | |
| 693 summary_file += "\n Tool run executed with success" | |
| 694 | |
| 695 FileHandler.write_file_to_output(summary_file, | |
| 696 self._output_error) | |
| 697 | |
| 698 else: | |
| 699 | |
| 700 summary_file = Logger.create_log_file(archive_name, | |
| 701 self._adql_query) | |
| 702 | |
| 703 if error_message is None: | |
| 704 summary_file += \ | |
| 705 "\n No resources matching parameters found" | |
| 706 else: | |
| 707 summary_file += error_message | |
| 708 | |
| 709 FileHandler.write_file_to_output(summary_file, | |
| 710 self._output_error) | |
| 711 | |
| 712 else: | |
| 713 summary_file = Logger.create_log_file("Archive", | |
| 714 self._adql_query) | |
| 715 | |
| 716 summary_file += "Unable to initialize archive" | |
| 717 | |
| 718 FileHandler.write_file_to_output(summary_file, | |
| 719 self._output_error) | |
| 720 | |
| 721 | |
| 722 class ADQLObscoreQuery(BaseADQLQuery): | |
| 723 order_by_field = { | |
| 724 'size': 'access_estsize', | |
| 725 'collection': 'obs_collection', | |
| 726 'object': 'target_name' | |
| 727 } | |
| 728 | |
| 729 base_query = 'SELECT TOP ' + \ | |
| 730 str(MAX_ALLOWED_ENTRIES) + \ | |
| 731 ' * FROM ivoa.obscore ' | |
| 732 | |
| 733 def __init__(self, | |
| 734 dataproduct_type, | |
| 735 obs_collection, | |
| 736 obs_title, | |
| 737 obs_id, | |
| 738 facility_name, | |
| 739 instrument_name, | |
| 740 em_min, | |
| 741 em_max, | |
| 742 target_name, | |
| 743 obs_publisher_id, | |
| 744 s_fov, | |
| 745 calibration_level, | |
| 746 t_min, | |
| 747 t_max, | |
| 748 order_by): | |
| 749 | |
| 750 super().__init__() | |
| 751 | |
| 752 if calibration_level == 'none': | |
| 753 calibration_level = '' | |
| 754 | |
| 755 if order_by == 'none': | |
| 756 order_by = '' | |
| 757 | |
| 758 if t_min == 'None' or t_min is None: | |
| 759 t_min = '' | |
| 760 | |
| 761 if t_max == 'None' or t_max is None: | |
| 762 t_max = '' | |
| 763 | |
| 764 if em_min == 'None' or em_min is None: | |
| 765 em_min = '' | |
| 766 | |
| 767 if em_max == 'None' or em_max is None: | |
| 768 em_max = '' | |
| 769 | |
| 770 if dataproduct_type == 'none' or dataproduct_type is None: | |
| 771 dataproduct_type = '' | |
| 772 | |
| 773 self.parameters = { | |
| 774 'dataproduct_type': dataproduct_type, | |
| 775 'obs_collection': obs_collection, | |
| 776 'obs_title': obs_title, | |
| 777 'obs_id': obs_id, | |
| 778 'facility_name': facility_name, | |
| 779 'instrument_name': instrument_name, | |
| 780 'em_min': em_min, | |
| 781 'em_max': em_max, | |
| 782 'target_name': target_name, | |
| 783 'obs_publisher_id': obs_publisher_id, | |
| 784 's_fov': s_fov, | |
| 785 'calibration_level': calibration_level, | |
| 786 't_min': t_min, | |
| 787 't_max': t_max | |
| 788 } | |
| 789 | |
| 790 self.order_by = order_by | |
| 791 | |
| 792 def get_query(self): | |
| 793 return ADQLObscoreQuery.base_query + \ | |
| 794 self.get_where_statement() + \ | |
| 795 self.get_order_by_statement() | |
| 796 | |
| 797 def get_order_by_statement(self): | |
| 798 if self.order_by != '': | |
| 799 return self._get_order_by_clause(self.order_by) | |
| 800 else: | |
| 801 return '' | |
| 802 | |
| 803 def _get_order_by_clause(self, order_type): | |
| 804 | |
| 805 obscore_order_type = ADQLObscoreQuery.order_by_field[order_type] | |
| 806 | |
| 807 return super()._get_order_by_clause(obscore_order_type) | |
| 808 | |
| 809 def get_where_statement(self): | |
| 810 return self._get_where_clause(self.parameters) | |
| 811 | |
| 812 def _get_where_clause(self, parameters): | |
| 813 return super()._get_where_clause(parameters) | |
| 814 | |
| 815 | |
| 816 class ADQLTapQuery(BaseADQLQuery): | |
| 817 base_query = 'SELECT TOP '+str(MAX_ALLOWED_ENTRIES)+' * FROM ' | |
| 818 | |
| 819 def __init__(self): | |
| 820 super().__init__() | |
| 821 | |
| 822 def get_order_by_clause(self, order_type): | |
| 823 return super()._get_order_by_clause(order_type) | |
| 824 | |
| 825 def get_query(self, table, where_field, where_condition): | |
| 826 if where_field != '' and where_condition != '': | |
| 827 return ADQLTapQuery.base_query + \ | |
| 828 str(table) + \ | |
| 829 ' WHERE ' + \ | |
| 830 str(where_field) + ' = ' + '\'' + \ | |
| 831 str(where_condition) + '\'' | |
| 832 else: | |
| 833 return ADQLTapQuery.base_query + str(table) | |
| 834 | |
| 835 | |
| 836 class HTMLReport: | |
| 837 _html_report_base_header = '' | |
| 838 _html_report_base_body = '' | |
| 839 _html_report_base_footer = '' | |
| 840 _html_report_base_script = '' | |
| 841 | |
| 842 def __init__(self): | |
| 843 pass | |
| 844 | |
| 845 | |
| 846 class OutputHandler: | |
| 847 | |
| 848 def __init__(self): | |
| 849 pass | |
| 850 | |
| 851 @staticmethod | |
| 852 def generate_html_output(urls_data, archive_name, adql_query): | |
| 853 return OutputHandler.html_header + \ | |
| 854 OutputHandler.generate_html_content( | |
| 855 urls_data, | |
| 856 archive_name, | |
| 857 adql_query, | |
| 858 div_attr='class="title"', | |
| 859 table_attr='class="fl-table"') | |
| 860 | |
| 861 @staticmethod | |
| 862 def generate_basic_html_output(urls_data, | |
| 863 archive_name, | |
| 864 adql_query, ): | |
| 865 return OutputHandler.generate_html_content(urls_data, | |
| 866 archive_name, | |
| 867 adql_query) | |
| 868 | |
| 869 @staticmethod | |
| 870 def generate_html_content(urls_data, archive_name, adql_query, | |
| 871 div_attr="", table_attr="border='1'"): | |
| 872 html_file = \ | |
| 873 f""" | |
| 874 <div {div_attr}> | |
| 875 <h2>Resources Preview archive: | |
| 876 <span> | |
| 877 {archive_name} | |
| 878 </span> | |
| 879 </h2> | |
| 880 <span>ADQL query : {adql_query}</span> | |
| 881 </div>""" | |
| 882 | |
| 883 html_file += f'<table {table_attr}><thead><tr>' | |
| 884 | |
| 885 for key in Utils.collect_resource_keys(urls_data): | |
| 886 html_file += '<th>' + str(key) + '</th>' | |
| 887 | |
| 888 html_file += '</thead></tr><tbody>' | |
| 889 | |
| 890 for resource in urls_data: | |
| 891 html_file += '<tr>' | |
| 892 | |
| 893 for key, value in resource.items(): | |
| 894 html_file += f'<td>{value}</td>' | |
| 895 | |
| 896 html_file += '<td>' | |
| 897 for preview_key in \ | |
| 898 ['preview', 'preview_url', 'postcard_url']: | |
| 899 if preview_key in resource: | |
| 900 html_file += ( | |
| 901 '<details><summary>Preview</summary>' | |
| 902 f'<img src="{resource[preview_key]}"/>' | |
| 903 '</details>' | |
| 904 ) | |
| 905 html_file += '</td>' | |
| 906 html_file += '</tr>' | |
| 907 | |
| 908 html_file += '</tbody></table>' | |
| 909 return html_file | |
| 910 | |
| 911 html_header = """ <head><style> | |
| 912 | |
| 913 details { | |
| 914 padding: 10px; | |
| 915 } | |
| 916 | |
| 917 .table-wrapper { | |
| 918 margin: 10px 70px 70px; | |
| 919 box-shadow: 0px 35px 50px rgba( 0, 0, 0, 0.2 ); | |
| 920 } | |
| 921 | |
| 922 .fl-table { | |
| 923 border-radius: 5px; | |
| 924 font-size: 12px; | |
| 925 font-weight: normal; | |
| 926 border: none; | |
| 927 border-collapse: collapse; | |
| 928 width: 100%; | |
| 929 max-width: 100%; | |
| 930 white-space: nowrap; | |
| 931 background-color: white; | |
| 932 } | |
| 933 | |
| 934 .fl-table td, .fl-table th { | |
| 935 text-align: center; | |
| 936 padding: 8px; | |
| 937 } | |
| 938 | |
| 939 .fl-table td { | |
| 940 border: 1px solid #999999; | |
| 941 font-size: 15px; | |
| 942 } | |
| 943 | |
| 944 .fl-table thead th { | |
| 945 color: #ffffff; | |
| 946 background: #4FC3A1; | |
| 947 border: 1px solid #999999; | |
| 948 } | |
| 949 | |
| 950 | |
| 951 .fl-table thead th:nth-child(odd) { | |
| 952 color: #ffffff; | |
| 953 background: #324960; | |
| 954 } | |
| 955 | |
| 956 .fl-table tr:nth-child(even) { | |
| 957 background: #F8F8F8; | |
| 958 } | |
| 959 | |
| 960 .title h2 { | |
| 961 text-align: center; | |
| 962 font-size: 22px; | |
| 963 font-weight: 700; color:#202020; | |
| 964 text-transform: uppercase; | |
| 965 word-spacing: 1px; letter-spacing:2px; | |
| 966 margin-bottom: 50px; | |
| 967 } | |
| 968 | |
| 969 .title h2 span { | |
| 970 padding-top: 40px; | |
| 971 text-transform: none; | |
| 972 font-size:.80em; | |
| 973 font-weight: bold; | |
| 974 font-family: "Playfair Display","Bookman",serif; | |
| 975 color:#999; | |
| 976 letter-spacing:-0.005em; | |
| 977 word-spacing:1px; | |
| 978 letter-spacing:none; | |
| 979 } | |
| 980 | |
| 981 .title h1:before { | |
| 982 background-color: #dfdfdf; | |
| 983 } | |
| 984 | |
| 985 </style></head>""" | |
| 986 | |
| 987 | |
| 988 class FileHandler: | |
| 989 | |
| 990 def __init__(self): | |
| 991 pass | |
| 992 | |
| 993 @staticmethod | |
| 994 def download_file_from_url(file_url): | |
| 995 with request.urlopen(file_url) as response: | |
| 996 fits_file = response.read() | |
| 997 | |
| 998 return fits_file | |
| 999 | |
| 1000 @staticmethod | |
| 1001 def write_file_to_output(file, output, write_type="w"): | |
| 1002 with open(output, write_type) as file_output: | |
| 1003 file_output.write(file) | |
| 1004 | |
| 1005 @staticmethod | |
| 1006 def write_urls_to_output(urls: [], output, access_url="access_url"): | |
| 1007 with open(output, "w") as file_output: | |
| 1008 for url in urls: | |
| 1009 try: | |
| 1010 file_output.write(url[access_url] + ',') | |
| 1011 except Exception: | |
| 1012 error_message = "url field not found for url" | |
| 1013 Logger.create_action_log( | |
| 1014 Logger.ACTION_ERROR, | |
| 1015 Logger.ACTION_TYPE_WRITE_URL, | |
| 1016 error_message) | |
| 1017 | |
| 1018 @staticmethod | |
| 1019 def write_file_to_subdir(file, index): | |
| 1020 dir = os.getcwd() | |
| 1021 | |
| 1022 dir += '/fits' | |
| 1023 | |
| 1024 upload_dir = os.path.join(dir, str(index) + '.fits') | |
| 1025 | |
| 1026 with open(upload_dir, "wb") as file_output: | |
| 1027 file_output.write(file) | |
| 1028 | |
| 1029 @staticmethod | |
| 1030 def get_file_name_from_url(url, index=None): | |
| 1031 url_parts = url.split('/') | |
| 1032 | |
| 1033 file_name = '' | |
| 1034 | |
| 1035 try: | |
| 1036 if (url_parts[-1]) != '': | |
| 1037 file_name = url_parts[-1] | |
| 1038 elif len(url_parts) > 1: | |
| 1039 file_name = url_parts[-2] | |
| 1040 except Exception: | |
| 1041 file_name = 'archive file ' | |
| 1042 | |
| 1043 return file_name | |
| 1044 | |
| 1045 | |
| 1046 class Utils: | |
| 1047 | |
| 1048 def __init__(self): | |
| 1049 pass | |
| 1050 | |
| 1051 @staticmethod | |
| 1052 def collect_resource_keys(urls_data: list) -> list: | |
| 1053 """ | |
| 1054 Collect all the keys from the resources, | |
| 1055 keeping the order in the order of key appearance in the resources | |
| 1056 """ | |
| 1057 | |
| 1058 resource_keys = [] | |
| 1059 for resource in urls_data: | |
| 1060 for key in resource.keys(): | |
| 1061 if key not in resource_keys: | |
| 1062 resource_keys.append(key) | |
| 1063 return resource_keys | |
| 1064 | |
| 1065 | |
| 1066 class Logger: | |
| 1067 _logs = [] | |
| 1068 | |
| 1069 ACTION_SUCCESS = 1 | |
| 1070 ACTION_ERROR = 2 | |
| 1071 | |
| 1072 ACTION_TYPE = 1 | |
| 1073 INFO_TYPE = 2 | |
| 1074 | |
| 1075 ACTION_TYPE_DOWNLOAD = 1 | |
| 1076 ACTION_TYPE_ARCHIVE_CONNECTION = 2 | |
| 1077 ACTION_TYPE_WRITE_URL = 3 | |
| 1078 ACTION_TYPE_WRITE_FILE = 4 | |
| 1079 | |
| 1080 def __init__(self): | |
| 1081 pass | |
| 1082 | |
| 1083 @staticmethod | |
| 1084 def create_action_log(outcome, action, message) -> bool: | |
| 1085 | |
| 1086 is_log_created = False | |
| 1087 log = "" | |
| 1088 | |
| 1089 if action == Logger.ACTION_TYPE_DOWNLOAD: | |
| 1090 if outcome == Logger.ACTION_SUCCESS: | |
| 1091 log += "Success downloading file : " + message | |
| 1092 else: | |
| 1093 log += "Error downloading file : " + message | |
| 1094 | |
| 1095 is_log_created = True | |
| 1096 elif action == Logger.ACTION_TYPE_ARCHIVE_CONNECTION: | |
| 1097 if outcome == Logger.ACTION_SUCCESS: | |
| 1098 log += "Success connecting to archive : " + message | |
| 1099 else: | |
| 1100 log += "Error connecting to archive : " + message | |
| 1101 | |
| 1102 is_log_created = True | |
| 1103 elif action == Logger.ACTION_TYPE_WRITE_URL: | |
| 1104 if outcome == Logger.ACTION_SUCCESS: | |
| 1105 log += "Success writing url to file : " + message | |
| 1106 else: | |
| 1107 log += "Error writing to file : " + message | |
| 1108 | |
| 1109 is_log_created = True | |
| 1110 | |
| 1111 if is_log_created: | |
| 1112 Logger._insert_log(Logger.ACTION_TYPE, log) | |
| 1113 | |
| 1114 return is_log_created | |
| 1115 | |
| 1116 @staticmethod | |
| 1117 def create_info_log(message): | |
| 1118 pass | |
| 1119 | |
| 1120 @staticmethod | |
| 1121 def _insert_log(type, log): | |
| 1122 Logger._logs.append(log) | |
| 1123 | |
| 1124 @staticmethod | |
| 1125 def create_log_file(archive_name, query): | |
| 1126 log_file = "" | |
| 1127 | |
| 1128 log_file += "Run summary for archive : " + archive_name + "\n" | |
| 1129 log_file += "With query : " + query + "\n" | |
| 1130 | |
| 1131 for log in Logger._logs: | |
| 1132 log_file += log + "\n" | |
| 1133 | |
| 1134 return log_file | |
| 1135 | |
| 1136 | |
| 1137 if __name__ == "__main__": | |
| 1138 output = sys.argv[1] | |
| 1139 output_csv = sys.argv[2] | |
| 1140 output_html = sys.argv[3] | |
| 1141 output_basic_html = sys.argv[4] | |
| 1142 output_error = sys.argv[5] | |
| 1143 | |
| 1144 inputs = sys.argv[6] | |
| 1145 | |
| 1146 tool_runner = ToolRunner(inputs, | |
| 1147 output, | |
| 1148 output_csv, | |
| 1149 output_html, | |
| 1150 output_basic_html, | |
| 1151 output_error) | |
| 1152 | |
| 1153 tool_runner.run() | 
