Mercurial > repos > astroteam > astronomical_archives
comparison astronomical_archives.py @ 1:667fc28d803c draft
planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools/ commit f9ba105adfaad1b2a16dd570652aa27c508d3c4d
author | astroteam |
---|---|
date | Tue, 24 Oct 2023 06:38:22 +0000 |
parents | 0ddfc343f9f9 |
children | 7398ea3d9ac4 |
comparison
equal
deleted
inserted
replaced
0:0ddfc343f9f9 | 1:667fc28d803c |
---|---|
1 import errno | |
2 import functools | |
1 import json | 3 import json |
2 import os | 4 import os |
5 import signal | |
3 import sys | 6 import sys |
4 import urllib | 7 import urllib |
5 from urllib import request | 8 from urllib import request |
6 | 9 |
10 from astropy.coordinates import SkyCoord | |
11 | |
7 import pyvo | 12 import pyvo |
8 from pyvo import DALAccessError, DALQueryError, DALServiceError | 13 from pyvo import DALAccessError, DALQueryError, DALServiceError |
9 from pyvo import registry | 14 from pyvo import registry |
10 | 15 |
16 | |
11 MAX_ALLOWED_ENTRIES = 100 | 17 MAX_ALLOWED_ENTRIES = 100 |
12 MAX_REGISTRIES_TO_SEARCH = 100 | 18 MAX_REGISTRIES_TO_SEARCH = 100 |
19 | |
20 | |
21 class TimeoutException(Exception): | |
22 pass | |
23 | |
24 | |
25 def timeout(seconds=10, error_message=os.strerror(errno.ETIME)): | |
26 def decorator(func): | |
27 def _handle_timeout(signum, frame): | |
28 raise TimeoutException(error_message) | |
29 | |
30 @functools.wraps(func) | |
31 def wrapper(*args, **kwargs): | |
32 signal.signal(signal.SIGALRM, _handle_timeout) | |
33 signal.alarm(seconds) | |
34 try: | |
35 result = func(*args, **kwargs) | |
36 finally: | |
37 signal.alarm(0) | |
38 return result | |
39 | |
40 return wrapper | |
41 | |
42 return decorator | |
13 | 43 |
14 | 44 |
15 class Service: | 45 class Service: |
16 # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Servicetype.html | 46 # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Servicetype.html |
17 | 47 |
90 self.access_url = access_url | 120 self.access_url = access_url |
91 self.initialized = False | 121 self.initialized = False |
92 self.archive_service = None | 122 self.archive_service = None |
93 self.tables = None | 123 self.tables = None |
94 | 124 |
125 @timeout(10) | |
95 def get_resources(self, | 126 def get_resources(self, |
96 query, | 127 query, |
97 number_of_results, | 128 number_of_results, |
98 url_field='access_url'): | 129 url_field='access_url'): |
99 | 130 |
256 name = 'Unknown archive title' | 287 name = 'Unknown archive title' |
257 | 288 |
258 return name | 289 return name |
259 | 290 |
260 | 291 |
292 class ConeService(TapArchive): | |
293 | |
294 def _get_service(self): | |
295 if self.access_url: | |
296 self.archive_service = pyvo.dal.SCSService(self.access_url) | |
297 | |
298 def get_resources_from_service_list(self, service_list, target, radius): | |
299 | |
300 resource_list_hydrated = [] | |
301 | |
302 for service in service_list: | |
303 resources = service.search(target, radius) | |
304 for i in range(resources.__len__()): | |
305 resource_url = resources.getrecord(i).getdataurl() | |
306 if resource_url: | |
307 resource_list_hydrated.append(resource_url) | |
308 | |
309 return resource_list_hydrated | |
310 | |
311 | |
261 class RegistrySearchParameters: | 312 class RegistrySearchParameters: |
262 | 313 |
263 def __init__(self, keyword=None, waveband=None, service_type=None): | 314 def __init__(self, keyword=None, waveband=None, service_type=None): |
264 self.keyword = keyword | 315 self.keyword = keyword |
265 self.waveband = waveband | 316 self.waveband = waveband |
334 ivoa_registry.access_url) | 385 ivoa_registry.access_url) |
335 | 386 |
336 archive_list.append(archive) | 387 archive_list.append(archive) |
337 | 388 |
338 return archive_list | 389 return archive_list |
390 | |
391 | |
392 class ConeServiceRegistry: | |
393 | |
394 def __init__(self): | |
395 pass | |
396 | |
397 @staticmethod | |
398 def search_services(keyword, number_of_registries): | |
399 | |
400 service_list = [] | |
401 | |
402 service_list = registry.search(servicetype="scs", keywords=keyword) | |
403 | |
404 if service_list: | |
405 service_list = service_list[:number_of_registries] | |
406 | |
407 return service_list | |
339 | 408 |
340 | 409 |
341 class TapQuery: | 410 class TapQuery: |
342 | 411 |
343 def __init__(self, query): | 412 def __init__(self, query): |
408 self._output_html = output_html | 477 self._output_html = output_html |
409 self._output_basic_html = output_basic_html | 478 self._output_basic_html = output_basic_html |
410 self._output_error = output_error | 479 self._output_error = output_error |
411 | 480 |
412 self._set_run_main_parameters() | 481 self._set_run_main_parameters() |
482 | |
413 self._is_initialised, error_message = self._set_archive() | 483 self._is_initialised, error_message = self._set_archive() |
414 | 484 |
415 if self._is_initialised and error_message is None: | 485 if self._is_initialised and error_message is None: |
416 self._set_query() | 486 self._set_query() |
417 self._set_output() | 487 self._set_output() |
477 " parameters could be initialized" | 547 " parameters could be initialized" |
478 | 548 |
479 else: | 549 else: |
480 return False, error_message | 550 return False, error_message |
481 | 551 |
482 def _set_query(self): | 552 def _set_cone_service(self): |
483 | 553 |
484 qs = 'query_section' | 554 qs = 'query_section' |
485 qsl = 'query_selection' | 555 qsl = 'query_selection' |
556 csts = 'cone_search_target_selection' | |
557 | |
558 error_message = None | |
559 is_service_initialised = True | |
560 | |
561 keyword = self._json_parameters[qs][qsl][csts]['keyword'] | |
562 | |
563 service_list = ConeServiceRegistry.search_services( | |
564 keyword, | |
565 MAX_REGISTRIES_TO_SEARCH) | |
566 | |
567 if len(service_list) >= 1: | |
568 self._services = service_list | |
569 else: | |
570 is_service_initialised = False | |
571 error_message = "no services matching search parameters" | |
572 Logger.create_action_log( | |
573 Logger.ACTION_ERROR, | |
574 Logger.ACTION_TYPE_ARCHIVE_CONNECTION, | |
575 error_message) | |
576 | |
577 return is_service_initialised, error_message | |
578 | |
579 def _set_query(self): | |
580 | |
581 qs = 'query_section' | |
582 qsl = 'query_selection' | |
583 csts = 'cone_search_target_selection' | |
584 cs = 'cone_section' | |
585 ts = 'target_selection' | |
586 con = 'cone_object_name' | |
486 | 587 |
487 if self._query_type == 'obscore_query': | 588 if self._query_type == 'obscore_query': |
488 | 589 |
489 dataproduct_type = \ | 590 dataproduct_type = \ |
490 self._json_parameters[qs][qsl]['dataproduct_type'] | 591 self._json_parameters[qs][qsl]['dataproduct_type'] |
515 t_max = \ | 616 t_max = \ |
516 self._json_parameters[qs][qsl]['t_max'] | 617 self._json_parameters[qs][qsl]['t_max'] |
517 order_by = \ | 618 order_by = \ |
518 self._json_parameters[qs][qsl]['order_by'] | 619 self._json_parameters[qs][qsl]['order_by'] |
519 | 620 |
621 if self._json_parameters[qs][qsl][cs][csts][ts] == 'coordinates': | |
622 ra = self._json_parameters[qs][qsl][cs][csts]['ra'] | |
623 dec = self._json_parameters[qs][qsl][cs][csts]['dec'] | |
624 else: | |
625 obs_target = self._json_parameters[qs][qsl][cs][csts][con] | |
626 | |
627 if obs_target != 'none' and obs_target is not None: | |
628 target = CelestialObject(obs_target) | |
629 target_coordinates = target.get_coordinates_in_degrees() | |
630 | |
631 ra = target_coordinates['ra'] | |
632 dec = target_coordinates['dec'] | |
633 else: | |
634 ra = None | |
635 dec = None | |
636 | |
637 radius = self._json_parameters[qs][qsl][cs]['radius'] | |
638 | |
639 if (ra != '' and ra is not None)\ | |
640 and (dec != '' and dec is not None)\ | |
641 and (radius != '' and radius is not None): | |
642 cone_condition = \ | |
643 ADQLConeSearchQuery.get_search_circle_condition(ra, | |
644 dec, | |
645 radius) | |
646 else: | |
647 cone_condition = None | |
648 | |
520 obscore_query_object = ADQLObscoreQuery(dataproduct_type, | 649 obscore_query_object = ADQLObscoreQuery(dataproduct_type, |
521 obs_collection, | 650 obs_collection, |
522 obs_title, | 651 obs_title, |
523 obs_id, | 652 obs_id, |
524 facility_name, | 653 facility_name, |
529 obs_publisher_id, | 658 obs_publisher_id, |
530 s_fov, | 659 s_fov, |
531 calibration_level, | 660 calibration_level, |
532 t_min, | 661 t_min, |
533 t_max, | 662 t_max, |
663 cone_condition, | |
534 order_by) | 664 order_by) |
535 | 665 |
536 self._adql_query = obscore_query_object.get_query() | 666 self._adql_query = obscore_query_object.get_query() |
537 | 667 |
538 elif self._query_type == 'raw_query': | 668 elif self._query_type == 'raw_query': |
555 tap_table, | 685 tap_table, |
556 where_field, | 686 where_field, |
557 where_condition) | 687 where_condition) |
558 else: | 688 else: |
559 self._adql_query = ADQLObscoreQuery.base_query | 689 self._adql_query = ADQLObscoreQuery.base_query |
690 | |
691 def _set_cone_query(self): | |
692 | |
693 qs = 'query_section' | |
694 qsl = 'query_selection' | |
695 csts = 'cone_search_target_selection' | |
696 ts = 'target_selection' | |
697 con = 'cone_object_name' | |
698 | |
699 search_radius = self._json_parameters[qs][qsl]['radius'] | |
700 time = None | |
701 | |
702 if self._json_parameters[qs][qsl][csts][ts] == 'coordinates': | |
703 ra = self._json_parameters[qs][qsl][csts]['ra'] | |
704 dec = self._json_parameters[qs][qsl][csts]['dec'] | |
705 time = self._json_parameters[qs][qsl][csts]['time'] | |
706 else: | |
707 target = CelestialObject(self._json_parameters[qs][qsl][csts][con]) | |
708 | |
709 target_coordinates = target.get_coordinates_in_degrees() | |
710 | |
711 ra = target_coordinates['ra'] | |
712 dec = target_coordinates['dec'] | |
713 | |
714 cone_query_object = ADQLConeSearchQuery(ra, dec, search_radius, time) | |
715 | |
716 self._adql_query = cone_query_object.get_query() | |
560 | 717 |
561 def _set_output(self): | 718 def _set_output(self): |
562 self._number_of_files = \ | 719 self._number_of_files = \ |
563 int( | 720 int( |
564 self._json_parameters['output_section']['number_of_files'] | 721 self._json_parameters['output_section']['number_of_files'] |
592 | 749 |
593 archive_name = self._archives[0].get_archive_name( | 750 archive_name = self._archives[0].get_archive_name( |
594 self._archive_type) | 751 self._archive_type) |
595 | 752 |
596 for archive in self._archives: | 753 for archive in self._archives: |
597 _file_url, error_message = archive.get_resources( | 754 try: |
598 self._adql_query, | 755 _file_url, error_message = archive.get_resources( |
599 self._number_of_files, | 756 self._adql_query, |
600 self._url_field) | 757 self._number_of_files, |
601 | 758 self._url_field) |
602 file_url.extend(_file_url) | 759 |
760 file_url.extend(_file_url) | |
761 except TimeoutException: | |
762 error_message = \ | |
763 "Archive is taking too long to respond (timeout)" | |
764 Logger.create_action_log( | |
765 Logger.ACTION_ERROR, | |
766 Logger.ACTION_TYPE_ARCHIVE_CONNECTION, | |
767 error_message) | |
603 | 768 |
604 if len(file_url) >= int(self._number_of_files): | 769 if len(file_url) >= int(self._number_of_files): |
605 file_url = file_url[:int(self._number_of_files)] | 770 file_url = file_url[:int(self._number_of_files)] |
606 break | 771 break |
607 | 772 |
706 else: | 871 else: |
707 summary_file += error_message | 872 summary_file += error_message |
708 | 873 |
709 FileHandler.write_file_to_output(summary_file, | 874 FileHandler.write_file_to_output(summary_file, |
710 self._output_error) | 875 self._output_error) |
711 | |
712 else: | 876 else: |
713 summary_file = Logger.create_log_file("Archive", | 877 summary_file = Logger.create_log_file("Archive", |
714 self._adql_query) | 878 self._adql_query) |
715 | 879 |
716 summary_file += "Unable to initialize archive" | 880 summary_file += "Unable to initialize archives" |
717 | 881 |
718 FileHandler.write_file_to_output(summary_file, | 882 FileHandler.write_file_to_output(summary_file, |
719 self._output_error) | 883 self._output_error) |
720 | 884 |
721 | 885 |
743 obs_publisher_id, | 907 obs_publisher_id, |
744 s_fov, | 908 s_fov, |
745 calibration_level, | 909 calibration_level, |
746 t_min, | 910 t_min, |
747 t_max, | 911 t_max, |
912 cone_condition, | |
748 order_by): | 913 order_by): |
749 | 914 |
750 super().__init__() | 915 super().__init__() |
751 | 916 |
752 if calibration_level == 'none': | 917 if calibration_level == 'none': |
767 if em_max == 'None' or em_max is None: | 932 if em_max == 'None' or em_max is None: |
768 em_max = '' | 933 em_max = '' |
769 | 934 |
770 if dataproduct_type == 'none' or dataproduct_type is None: | 935 if dataproduct_type == 'none' or dataproduct_type is None: |
771 dataproduct_type = '' | 936 dataproduct_type = '' |
937 | |
938 if cone_condition is not None: | |
939 self.cone_condition = cone_condition | |
940 else: | |
941 self.cone_condition = None | |
772 | 942 |
773 self.parameters = { | 943 self.parameters = { |
774 'dataproduct_type': dataproduct_type, | 944 'dataproduct_type': dataproduct_type, |
775 'obs_collection': obs_collection, | 945 'obs_collection': obs_collection, |
776 'obs_title': obs_title, | 946 'obs_title': obs_title, |
780 'em_min': em_min, | 950 'em_min': em_min, |
781 'em_max': em_max, | 951 'em_max': em_max, |
782 'target_name': target_name, | 952 'target_name': target_name, |
783 'obs_publisher_id': obs_publisher_id, | 953 'obs_publisher_id': obs_publisher_id, |
784 's_fov': s_fov, | 954 's_fov': s_fov, |
785 'calibration_level': calibration_level, | 955 'calib_level': calibration_level, |
786 't_min': t_min, | 956 't_min': t_min, |
787 't_max': t_max | 957 't_max': t_max, |
788 } | 958 } |
789 | 959 |
790 self.order_by = order_by | 960 self.order_by = order_by |
791 | 961 |
792 def get_query(self): | 962 def get_query(self): |
805 obscore_order_type = ADQLObscoreQuery.order_by_field[order_type] | 975 obscore_order_type = ADQLObscoreQuery.order_by_field[order_type] |
806 | 976 |
807 return super()._get_order_by_clause(obscore_order_type) | 977 return super()._get_order_by_clause(obscore_order_type) |
808 | 978 |
809 def get_where_statement(self): | 979 def get_where_statement(self): |
810 return self._get_where_clause(self.parameters) | 980 where_clause = self._get_where_clause(self.parameters) |
981 | |
982 if where_clause == '' and self.cone_condition is not None: | |
983 where_clause = 'WHERE ' + self.get_cone_condition() | |
984 elif where_clause != '' and self.cone_condition is not None: | |
985 where_clause += 'AND ' + self.get_cone_condition() | |
986 | |
987 return where_clause | |
811 | 988 |
812 def _get_where_clause(self, parameters): | 989 def _get_where_clause(self, parameters): |
813 return super()._get_where_clause(parameters) | 990 return super()._get_where_clause(parameters) |
991 | |
992 def get_cone_condition(self): | |
993 return self.cone_condition | |
814 | 994 |
815 | 995 |
816 class ADQLTapQuery(BaseADQLQuery): | 996 class ADQLTapQuery(BaseADQLQuery): |
817 base_query = 'SELECT TOP '+str(MAX_ALLOWED_ENTRIES)+' * FROM ' | 997 base_query = 'SELECT TOP '+str(MAX_ALLOWED_ENTRIES)+' * FROM ' |
818 | 998 |
829 ' WHERE ' + \ | 1009 ' WHERE ' + \ |
830 str(where_field) + ' = ' + '\'' + \ | 1010 str(where_field) + ' = ' + '\'' + \ |
831 str(where_condition) + '\'' | 1011 str(where_condition) + '\'' |
832 else: | 1012 else: |
833 return ADQLTapQuery.base_query + str(table) | 1013 return ADQLTapQuery.base_query + str(table) |
1014 | |
1015 | |
1016 class ADQLConeSearchQuery: | |
1017 | |
1018 base_query = "SELECT TOP 100 * FROM ivoa.obscore" | |
1019 | |
1020 def __init__(self, ra, dec, radius, time=None): | |
1021 | |
1022 self.ra = ra | |
1023 self.dec = dec | |
1024 self.radius = radius | |
1025 self.time = time | |
1026 | |
1027 self._query = ADQLObscoreQuery.base_query | |
1028 | |
1029 if self.ra and self.dec and self.radius: | |
1030 self._query += " WHERE " | |
1031 self._query += self._get_search_circle(ra, dec, radius) | |
1032 | |
1033 if self.time: | |
1034 self._query += self._get_search_time() | |
1035 | |
1036 def _get_search_circle(self, ra, dec, radius): | |
1037 return "(CONTAINS" \ | |
1038 "(POINT('ICRS', s_ra, s_dec), " \ | |
1039 "CIRCLE('ICRS', "+str(ra)+", "+str(dec)+", "+str(radius)+")" \ | |
1040 ") = 1)" | |
1041 | |
1042 def _get_search_time(self): | |
1043 return " AND t_min <= "+self.time+" AND t_max >= "+self.time | |
1044 | |
1045 def get_query(self): | |
1046 return self._query | |
1047 | |
1048 @staticmethod | |
1049 def get_search_circle_condition(ra, dec, radius): | |
1050 return "(CONTAINS" \ | |
1051 "(POINT('ICRS', s_ra, s_dec)," \ | |
1052 "CIRCLE('ICRS', "+str(ra)+", "+str(dec)+", "+str(radius)+")" \ | |
1053 ") = 1) " | |
1054 | |
1055 | |
1056 class CelestialObject: | |
1057 | |
1058 def __init__(self, name): | |
1059 self.name = name | |
1060 self.coordinates = None | |
1061 | |
1062 self.coordinates = SkyCoord.from_name(self.name) | |
1063 | |
1064 def get_coordinates_in_degrees(self): | |
1065 | |
1066 coordinates = { | |
1067 'ra': '', | |
1068 'dec': '' | |
1069 } | |
1070 | |
1071 ra_dec = self.coordinates.ravel() | |
1072 | |
1073 coordinates['ra'] = ra_dec.ra.degree[0] | |
1074 coordinates['dec'] = ra_dec.dec.degree[0] | |
1075 | |
1076 return coordinates | |
834 | 1077 |
835 | 1078 |
836 class HTMLReport: | 1079 class HTMLReport: |
837 _html_report_base_header = '' | 1080 _html_report_base_header = '' |
838 _html_report_base_body = '' | 1081 _html_report_base_body = '' |