Mercurial > repos > astroteam > astronomical_archives
comparison astronomical_archives.py @ 1:667fc28d803c draft
planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools/ commit f9ba105adfaad1b2a16dd570652aa27c508d3c4d
| author | astroteam |
|---|---|
| date | Tue, 24 Oct 2023 06:38:22 +0000 |
| parents | 0ddfc343f9f9 |
| children | 7398ea3d9ac4 |
comparison
equal
deleted
inserted
replaced
| 0:0ddfc343f9f9 | 1:667fc28d803c |
|---|---|
| 1 import errno | |
| 2 import functools | |
| 1 import json | 3 import json |
| 2 import os | 4 import os |
| 5 import signal | |
| 3 import sys | 6 import sys |
| 4 import urllib | 7 import urllib |
| 5 from urllib import request | 8 from urllib import request |
| 6 | 9 |
| 10 from astropy.coordinates import SkyCoord | |
| 11 | |
| 7 import pyvo | 12 import pyvo |
| 8 from pyvo import DALAccessError, DALQueryError, DALServiceError | 13 from pyvo import DALAccessError, DALQueryError, DALServiceError |
| 9 from pyvo import registry | 14 from pyvo import registry |
| 10 | 15 |
| 16 | |
| 11 MAX_ALLOWED_ENTRIES = 100 | 17 MAX_ALLOWED_ENTRIES = 100 |
| 12 MAX_REGISTRIES_TO_SEARCH = 100 | 18 MAX_REGISTRIES_TO_SEARCH = 100 |
| 19 | |
| 20 | |
| 21 class TimeoutException(Exception): | |
| 22 pass | |
| 23 | |
| 24 | |
| 25 def timeout(seconds=10, error_message=os.strerror(errno.ETIME)): | |
| 26 def decorator(func): | |
| 27 def _handle_timeout(signum, frame): | |
| 28 raise TimeoutException(error_message) | |
| 29 | |
| 30 @functools.wraps(func) | |
| 31 def wrapper(*args, **kwargs): | |
| 32 signal.signal(signal.SIGALRM, _handle_timeout) | |
| 33 signal.alarm(seconds) | |
| 34 try: | |
| 35 result = func(*args, **kwargs) | |
| 36 finally: | |
| 37 signal.alarm(0) | |
| 38 return result | |
| 39 | |
| 40 return wrapper | |
| 41 | |
| 42 return decorator | |
| 13 | 43 |
| 14 | 44 |
| 15 class Service: | 45 class Service: |
| 16 # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Servicetype.html | 46 # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Servicetype.html |
| 17 | 47 |
| 90 self.access_url = access_url | 120 self.access_url = access_url |
| 91 self.initialized = False | 121 self.initialized = False |
| 92 self.archive_service = None | 122 self.archive_service = None |
| 93 self.tables = None | 123 self.tables = None |
| 94 | 124 |
| 125 @timeout(10) | |
| 95 def get_resources(self, | 126 def get_resources(self, |
| 96 query, | 127 query, |
| 97 number_of_results, | 128 number_of_results, |
| 98 url_field='access_url'): | 129 url_field='access_url'): |
| 99 | 130 |
| 256 name = 'Unknown archive title' | 287 name = 'Unknown archive title' |
| 257 | 288 |
| 258 return name | 289 return name |
| 259 | 290 |
| 260 | 291 |
| 292 class ConeService(TapArchive): | |
| 293 | |
| 294 def _get_service(self): | |
| 295 if self.access_url: | |
| 296 self.archive_service = pyvo.dal.SCSService(self.access_url) | |
| 297 | |
| 298 def get_resources_from_service_list(self, service_list, target, radius): | |
| 299 | |
| 300 resource_list_hydrated = [] | |
| 301 | |
| 302 for service in service_list: | |
| 303 resources = service.search(target, radius) | |
| 304 for i in range(resources.__len__()): | |
| 305 resource_url = resources.getrecord(i).getdataurl() | |
| 306 if resource_url: | |
| 307 resource_list_hydrated.append(resource_url) | |
| 308 | |
| 309 return resource_list_hydrated | |
| 310 | |
| 311 | |
| 261 class RegistrySearchParameters: | 312 class RegistrySearchParameters: |
| 262 | 313 |
| 263 def __init__(self, keyword=None, waveband=None, service_type=None): | 314 def __init__(self, keyword=None, waveband=None, service_type=None): |
| 264 self.keyword = keyword | 315 self.keyword = keyword |
| 265 self.waveband = waveband | 316 self.waveband = waveband |
| 334 ivoa_registry.access_url) | 385 ivoa_registry.access_url) |
| 335 | 386 |
| 336 archive_list.append(archive) | 387 archive_list.append(archive) |
| 337 | 388 |
| 338 return archive_list | 389 return archive_list |
| 390 | |
| 391 | |
| 392 class ConeServiceRegistry: | |
| 393 | |
| 394 def __init__(self): | |
| 395 pass | |
| 396 | |
| 397 @staticmethod | |
| 398 def search_services(keyword, number_of_registries): | |
| 399 | |
| 400 service_list = [] | |
| 401 | |
| 402 service_list = registry.search(servicetype="scs", keywords=keyword) | |
| 403 | |
| 404 if service_list: | |
| 405 service_list = service_list[:number_of_registries] | |
| 406 | |
| 407 return service_list | |
| 339 | 408 |
| 340 | 409 |
| 341 class TapQuery: | 410 class TapQuery: |
| 342 | 411 |
| 343 def __init__(self, query): | 412 def __init__(self, query): |
| 408 self._output_html = output_html | 477 self._output_html = output_html |
| 409 self._output_basic_html = output_basic_html | 478 self._output_basic_html = output_basic_html |
| 410 self._output_error = output_error | 479 self._output_error = output_error |
| 411 | 480 |
| 412 self._set_run_main_parameters() | 481 self._set_run_main_parameters() |
| 482 | |
| 413 self._is_initialised, error_message = self._set_archive() | 483 self._is_initialised, error_message = self._set_archive() |
| 414 | 484 |
| 415 if self._is_initialised and error_message is None: | 485 if self._is_initialised and error_message is None: |
| 416 self._set_query() | 486 self._set_query() |
| 417 self._set_output() | 487 self._set_output() |
| 477 " parameters could be initialized" | 547 " parameters could be initialized" |
| 478 | 548 |
| 479 else: | 549 else: |
| 480 return False, error_message | 550 return False, error_message |
| 481 | 551 |
| 482 def _set_query(self): | 552 def _set_cone_service(self): |
| 483 | 553 |
| 484 qs = 'query_section' | 554 qs = 'query_section' |
| 485 qsl = 'query_selection' | 555 qsl = 'query_selection' |
| 556 csts = 'cone_search_target_selection' | |
| 557 | |
| 558 error_message = None | |
| 559 is_service_initialised = True | |
| 560 | |
| 561 keyword = self._json_parameters[qs][qsl][csts]['keyword'] | |
| 562 | |
| 563 service_list = ConeServiceRegistry.search_services( | |
| 564 keyword, | |
| 565 MAX_REGISTRIES_TO_SEARCH) | |
| 566 | |
| 567 if len(service_list) >= 1: | |
| 568 self._services = service_list | |
| 569 else: | |
| 570 is_service_initialised = False | |
| 571 error_message = "no services matching search parameters" | |
| 572 Logger.create_action_log( | |
| 573 Logger.ACTION_ERROR, | |
| 574 Logger.ACTION_TYPE_ARCHIVE_CONNECTION, | |
| 575 error_message) | |
| 576 | |
| 577 return is_service_initialised, error_message | |
| 578 | |
| 579 def _set_query(self): | |
| 580 | |
| 581 qs = 'query_section' | |
| 582 qsl = 'query_selection' | |
| 583 csts = 'cone_search_target_selection' | |
| 584 cs = 'cone_section' | |
| 585 ts = 'target_selection' | |
| 586 con = 'cone_object_name' | |
| 486 | 587 |
| 487 if self._query_type == 'obscore_query': | 588 if self._query_type == 'obscore_query': |
| 488 | 589 |
| 489 dataproduct_type = \ | 590 dataproduct_type = \ |
| 490 self._json_parameters[qs][qsl]['dataproduct_type'] | 591 self._json_parameters[qs][qsl]['dataproduct_type'] |
| 515 t_max = \ | 616 t_max = \ |
| 516 self._json_parameters[qs][qsl]['t_max'] | 617 self._json_parameters[qs][qsl]['t_max'] |
| 517 order_by = \ | 618 order_by = \ |
| 518 self._json_parameters[qs][qsl]['order_by'] | 619 self._json_parameters[qs][qsl]['order_by'] |
| 519 | 620 |
| 621 if self._json_parameters[qs][qsl][cs][csts][ts] == 'coordinates': | |
| 622 ra = self._json_parameters[qs][qsl][cs][csts]['ra'] | |
| 623 dec = self._json_parameters[qs][qsl][cs][csts]['dec'] | |
| 624 else: | |
| 625 obs_target = self._json_parameters[qs][qsl][cs][csts][con] | |
| 626 | |
| 627 if obs_target != 'none' and obs_target is not None: | |
| 628 target = CelestialObject(obs_target) | |
| 629 target_coordinates = target.get_coordinates_in_degrees() | |
| 630 | |
| 631 ra = target_coordinates['ra'] | |
| 632 dec = target_coordinates['dec'] | |
| 633 else: | |
| 634 ra = None | |
| 635 dec = None | |
| 636 | |
| 637 radius = self._json_parameters[qs][qsl][cs]['radius'] | |
| 638 | |
| 639 if (ra != '' and ra is not None)\ | |
| 640 and (dec != '' and dec is not None)\ | |
| 641 and (radius != '' and radius is not None): | |
| 642 cone_condition = \ | |
| 643 ADQLConeSearchQuery.get_search_circle_condition(ra, | |
| 644 dec, | |
| 645 radius) | |
| 646 else: | |
| 647 cone_condition = None | |
| 648 | |
| 520 obscore_query_object = ADQLObscoreQuery(dataproduct_type, | 649 obscore_query_object = ADQLObscoreQuery(dataproduct_type, |
| 521 obs_collection, | 650 obs_collection, |
| 522 obs_title, | 651 obs_title, |
| 523 obs_id, | 652 obs_id, |
| 524 facility_name, | 653 facility_name, |
| 529 obs_publisher_id, | 658 obs_publisher_id, |
| 530 s_fov, | 659 s_fov, |
| 531 calibration_level, | 660 calibration_level, |
| 532 t_min, | 661 t_min, |
| 533 t_max, | 662 t_max, |
| 663 cone_condition, | |
| 534 order_by) | 664 order_by) |
| 535 | 665 |
| 536 self._adql_query = obscore_query_object.get_query() | 666 self._adql_query = obscore_query_object.get_query() |
| 537 | 667 |
| 538 elif self._query_type == 'raw_query': | 668 elif self._query_type == 'raw_query': |
| 555 tap_table, | 685 tap_table, |
| 556 where_field, | 686 where_field, |
| 557 where_condition) | 687 where_condition) |
| 558 else: | 688 else: |
| 559 self._adql_query = ADQLObscoreQuery.base_query | 689 self._adql_query = ADQLObscoreQuery.base_query |
| 690 | |
| 691 def _set_cone_query(self): | |
| 692 | |
| 693 qs = 'query_section' | |
| 694 qsl = 'query_selection' | |
| 695 csts = 'cone_search_target_selection' | |
| 696 ts = 'target_selection' | |
| 697 con = 'cone_object_name' | |
| 698 | |
| 699 search_radius = self._json_parameters[qs][qsl]['radius'] | |
| 700 time = None | |
| 701 | |
| 702 if self._json_parameters[qs][qsl][csts][ts] == 'coordinates': | |
| 703 ra = self._json_parameters[qs][qsl][csts]['ra'] | |
| 704 dec = self._json_parameters[qs][qsl][csts]['dec'] | |
| 705 time = self._json_parameters[qs][qsl][csts]['time'] | |
| 706 else: | |
| 707 target = CelestialObject(self._json_parameters[qs][qsl][csts][con]) | |
| 708 | |
| 709 target_coordinates = target.get_coordinates_in_degrees() | |
| 710 | |
| 711 ra = target_coordinates['ra'] | |
| 712 dec = target_coordinates['dec'] | |
| 713 | |
| 714 cone_query_object = ADQLConeSearchQuery(ra, dec, search_radius, time) | |
| 715 | |
| 716 self._adql_query = cone_query_object.get_query() | |
| 560 | 717 |
| 561 def _set_output(self): | 718 def _set_output(self): |
| 562 self._number_of_files = \ | 719 self._number_of_files = \ |
| 563 int( | 720 int( |
| 564 self._json_parameters['output_section']['number_of_files'] | 721 self._json_parameters['output_section']['number_of_files'] |
| 592 | 749 |
| 593 archive_name = self._archives[0].get_archive_name( | 750 archive_name = self._archives[0].get_archive_name( |
| 594 self._archive_type) | 751 self._archive_type) |
| 595 | 752 |
| 596 for archive in self._archives: | 753 for archive in self._archives: |
| 597 _file_url, error_message = archive.get_resources( | 754 try: |
| 598 self._adql_query, | 755 _file_url, error_message = archive.get_resources( |
| 599 self._number_of_files, | 756 self._adql_query, |
| 600 self._url_field) | 757 self._number_of_files, |
| 601 | 758 self._url_field) |
| 602 file_url.extend(_file_url) | 759 |
| 760 file_url.extend(_file_url) | |
| 761 except TimeoutException: | |
| 762 error_message = \ | |
| 763 "Archive is taking too long to respond (timeout)" | |
| 764 Logger.create_action_log( | |
| 765 Logger.ACTION_ERROR, | |
| 766 Logger.ACTION_TYPE_ARCHIVE_CONNECTION, | |
| 767 error_message) | |
| 603 | 768 |
| 604 if len(file_url) >= int(self._number_of_files): | 769 if len(file_url) >= int(self._number_of_files): |
| 605 file_url = file_url[:int(self._number_of_files)] | 770 file_url = file_url[:int(self._number_of_files)] |
| 606 break | 771 break |
| 607 | 772 |
| 706 else: | 871 else: |
| 707 summary_file += error_message | 872 summary_file += error_message |
| 708 | 873 |
| 709 FileHandler.write_file_to_output(summary_file, | 874 FileHandler.write_file_to_output(summary_file, |
| 710 self._output_error) | 875 self._output_error) |
| 711 | |
| 712 else: | 876 else: |
| 713 summary_file = Logger.create_log_file("Archive", | 877 summary_file = Logger.create_log_file("Archive", |
| 714 self._adql_query) | 878 self._adql_query) |
| 715 | 879 |
| 716 summary_file += "Unable to initialize archive" | 880 summary_file += "Unable to initialize archives" |
| 717 | 881 |
| 718 FileHandler.write_file_to_output(summary_file, | 882 FileHandler.write_file_to_output(summary_file, |
| 719 self._output_error) | 883 self._output_error) |
| 720 | 884 |
| 721 | 885 |
| 743 obs_publisher_id, | 907 obs_publisher_id, |
| 744 s_fov, | 908 s_fov, |
| 745 calibration_level, | 909 calibration_level, |
| 746 t_min, | 910 t_min, |
| 747 t_max, | 911 t_max, |
| 912 cone_condition, | |
| 748 order_by): | 913 order_by): |
| 749 | 914 |
| 750 super().__init__() | 915 super().__init__() |
| 751 | 916 |
| 752 if calibration_level == 'none': | 917 if calibration_level == 'none': |
| 767 if em_max == 'None' or em_max is None: | 932 if em_max == 'None' or em_max is None: |
| 768 em_max = '' | 933 em_max = '' |
| 769 | 934 |
| 770 if dataproduct_type == 'none' or dataproduct_type is None: | 935 if dataproduct_type == 'none' or dataproduct_type is None: |
| 771 dataproduct_type = '' | 936 dataproduct_type = '' |
| 937 | |
| 938 if cone_condition is not None: | |
| 939 self.cone_condition = cone_condition | |
| 940 else: | |
| 941 self.cone_condition = None | |
| 772 | 942 |
| 773 self.parameters = { | 943 self.parameters = { |
| 774 'dataproduct_type': dataproduct_type, | 944 'dataproduct_type': dataproduct_type, |
| 775 'obs_collection': obs_collection, | 945 'obs_collection': obs_collection, |
| 776 'obs_title': obs_title, | 946 'obs_title': obs_title, |
| 780 'em_min': em_min, | 950 'em_min': em_min, |
| 781 'em_max': em_max, | 951 'em_max': em_max, |
| 782 'target_name': target_name, | 952 'target_name': target_name, |
| 783 'obs_publisher_id': obs_publisher_id, | 953 'obs_publisher_id': obs_publisher_id, |
| 784 's_fov': s_fov, | 954 's_fov': s_fov, |
| 785 'calibration_level': calibration_level, | 955 'calib_level': calibration_level, |
| 786 't_min': t_min, | 956 't_min': t_min, |
| 787 't_max': t_max | 957 't_max': t_max, |
| 788 } | 958 } |
| 789 | 959 |
| 790 self.order_by = order_by | 960 self.order_by = order_by |
| 791 | 961 |
| 792 def get_query(self): | 962 def get_query(self): |
| 805 obscore_order_type = ADQLObscoreQuery.order_by_field[order_type] | 975 obscore_order_type = ADQLObscoreQuery.order_by_field[order_type] |
| 806 | 976 |
| 807 return super()._get_order_by_clause(obscore_order_type) | 977 return super()._get_order_by_clause(obscore_order_type) |
| 808 | 978 |
| 809 def get_where_statement(self): | 979 def get_where_statement(self): |
| 810 return self._get_where_clause(self.parameters) | 980 where_clause = self._get_where_clause(self.parameters) |
| 981 | |
| 982 if where_clause == '' and self.cone_condition is not None: | |
| 983 where_clause = 'WHERE ' + self.get_cone_condition() | |
| 984 elif where_clause != '' and self.cone_condition is not None: | |
| 985 where_clause += 'AND ' + self.get_cone_condition() | |
| 986 | |
| 987 return where_clause | |
| 811 | 988 |
| 812 def _get_where_clause(self, parameters): | 989 def _get_where_clause(self, parameters): |
| 813 return super()._get_where_clause(parameters) | 990 return super()._get_where_clause(parameters) |
| 991 | |
| 992 def get_cone_condition(self): | |
| 993 return self.cone_condition | |
| 814 | 994 |
| 815 | 995 |
| 816 class ADQLTapQuery(BaseADQLQuery): | 996 class ADQLTapQuery(BaseADQLQuery): |
| 817 base_query = 'SELECT TOP '+str(MAX_ALLOWED_ENTRIES)+' * FROM ' | 997 base_query = 'SELECT TOP '+str(MAX_ALLOWED_ENTRIES)+' * FROM ' |
| 818 | 998 |
| 829 ' WHERE ' + \ | 1009 ' WHERE ' + \ |
| 830 str(where_field) + ' = ' + '\'' + \ | 1010 str(where_field) + ' = ' + '\'' + \ |
| 831 str(where_condition) + '\'' | 1011 str(where_condition) + '\'' |
| 832 else: | 1012 else: |
| 833 return ADQLTapQuery.base_query + str(table) | 1013 return ADQLTapQuery.base_query + str(table) |
| 1014 | |
| 1015 | |
| 1016 class ADQLConeSearchQuery: | |
| 1017 | |
| 1018 base_query = "SELECT TOP 100 * FROM ivoa.obscore" | |
| 1019 | |
| 1020 def __init__(self, ra, dec, radius, time=None): | |
| 1021 | |
| 1022 self.ra = ra | |
| 1023 self.dec = dec | |
| 1024 self.radius = radius | |
| 1025 self.time = time | |
| 1026 | |
| 1027 self._query = ADQLObscoreQuery.base_query | |
| 1028 | |
| 1029 if self.ra and self.dec and self.radius: | |
| 1030 self._query += " WHERE " | |
| 1031 self._query += self._get_search_circle(ra, dec, radius) | |
| 1032 | |
| 1033 if self.time: | |
| 1034 self._query += self._get_search_time() | |
| 1035 | |
| 1036 def _get_search_circle(self, ra, dec, radius): | |
| 1037 return "(CONTAINS" \ | |
| 1038 "(POINT('ICRS', s_ra, s_dec), " \ | |
| 1039 "CIRCLE('ICRS', "+str(ra)+", "+str(dec)+", "+str(radius)+")" \ | |
| 1040 ") = 1)" | |
| 1041 | |
| 1042 def _get_search_time(self): | |
| 1043 return " AND t_min <= "+self.time+" AND t_max >= "+self.time | |
| 1044 | |
| 1045 def get_query(self): | |
| 1046 return self._query | |
| 1047 | |
| 1048 @staticmethod | |
| 1049 def get_search_circle_condition(ra, dec, radius): | |
| 1050 return "(CONTAINS" \ | |
| 1051 "(POINT('ICRS', s_ra, s_dec)," \ | |
| 1052 "CIRCLE('ICRS', "+str(ra)+", "+str(dec)+", "+str(radius)+")" \ | |
| 1053 ") = 1) " | |
| 1054 | |
| 1055 | |
| 1056 class CelestialObject: | |
| 1057 | |
| 1058 def __init__(self, name): | |
| 1059 self.name = name | |
| 1060 self.coordinates = None | |
| 1061 | |
| 1062 self.coordinates = SkyCoord.from_name(self.name) | |
| 1063 | |
| 1064 def get_coordinates_in_degrees(self): | |
| 1065 | |
| 1066 coordinates = { | |
| 1067 'ra': '', | |
| 1068 'dec': '' | |
| 1069 } | |
| 1070 | |
| 1071 ra_dec = self.coordinates.ravel() | |
| 1072 | |
| 1073 coordinates['ra'] = ra_dec.ra.degree[0] | |
| 1074 coordinates['dec'] = ra_dec.dec.degree[0] | |
| 1075 | |
| 1076 return coordinates | |
| 834 | 1077 |
| 835 | 1078 |
| 836 class HTMLReport: | 1079 class HTMLReport: |
| 837 _html_report_base_header = '' | 1080 _html_report_base_header = '' |
| 838 _html_report_base_body = '' | 1081 _html_report_base_body = '' |
