comparison astronomical_archives.py @ 1:667fc28d803c draft

planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools/ commit f9ba105adfaad1b2a16dd570652aa27c508d3c4d
author astroteam
date Tue, 24 Oct 2023 06:38:22 +0000
parents 0ddfc343f9f9
children 7398ea3d9ac4
comparison
equal deleted inserted replaced
0:0ddfc343f9f9 1:667fc28d803c
1 import errno
2 import functools
1 import json 3 import json
2 import os 4 import os
5 import signal
3 import sys 6 import sys
4 import urllib 7 import urllib
5 from urllib import request 8 from urllib import request
6 9
10 from astropy.coordinates import SkyCoord
11
7 import pyvo 12 import pyvo
8 from pyvo import DALAccessError, DALQueryError, DALServiceError 13 from pyvo import DALAccessError, DALQueryError, DALServiceError
9 from pyvo import registry 14 from pyvo import registry
10 15
16
11 MAX_ALLOWED_ENTRIES = 100 17 MAX_ALLOWED_ENTRIES = 100
12 MAX_REGISTRIES_TO_SEARCH = 100 18 MAX_REGISTRIES_TO_SEARCH = 100
19
20
21 class TimeoutException(Exception):
22 pass
23
24
25 def timeout(seconds=10, error_message=os.strerror(errno.ETIME)):
26 def decorator(func):
27 def _handle_timeout(signum, frame):
28 raise TimeoutException(error_message)
29
30 @functools.wraps(func)
31 def wrapper(*args, **kwargs):
32 signal.signal(signal.SIGALRM, _handle_timeout)
33 signal.alarm(seconds)
34 try:
35 result = func(*args, **kwargs)
36 finally:
37 signal.alarm(0)
38 return result
39
40 return wrapper
41
42 return decorator
13 43
14 44
15 class Service: 45 class Service:
16 # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Servicetype.html 46 # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Servicetype.html
17 47
90 self.access_url = access_url 120 self.access_url = access_url
91 self.initialized = False 121 self.initialized = False
92 self.archive_service = None 122 self.archive_service = None
93 self.tables = None 123 self.tables = None
94 124
125 @timeout(10)
95 def get_resources(self, 126 def get_resources(self,
96 query, 127 query,
97 number_of_results, 128 number_of_results,
98 url_field='access_url'): 129 url_field='access_url'):
99 130
256 name = 'Unknown archive title' 287 name = 'Unknown archive title'
257 288
258 return name 289 return name
259 290
260 291
292 class ConeService(TapArchive):
293
294 def _get_service(self):
295 if self.access_url:
296 self.archive_service = pyvo.dal.SCSService(self.access_url)
297
298 def get_resources_from_service_list(self, service_list, target, radius):
299
300 resource_list_hydrated = []
301
302 for service in service_list:
303 resources = service.search(target, radius)
304 for i in range(resources.__len__()):
305 resource_url = resources.getrecord(i).getdataurl()
306 if resource_url:
307 resource_list_hydrated.append(resource_url)
308
309 return resource_list_hydrated
310
311
261 class RegistrySearchParameters: 312 class RegistrySearchParameters:
262 313
263 def __init__(self, keyword=None, waveband=None, service_type=None): 314 def __init__(self, keyword=None, waveband=None, service_type=None):
264 self.keyword = keyword 315 self.keyword = keyword
265 self.waveband = waveband 316 self.waveband = waveband
334 ivoa_registry.access_url) 385 ivoa_registry.access_url)
335 386
336 archive_list.append(archive) 387 archive_list.append(archive)
337 388
338 return archive_list 389 return archive_list
390
391
392 class ConeServiceRegistry:
393
394 def __init__(self):
395 pass
396
397 @staticmethod
398 def search_services(keyword, number_of_registries):
399
400 service_list = []
401
402 service_list = registry.search(servicetype="scs", keywords=keyword)
403
404 if service_list:
405 service_list = service_list[:number_of_registries]
406
407 return service_list
339 408
340 409
341 class TapQuery: 410 class TapQuery:
342 411
343 def __init__(self, query): 412 def __init__(self, query):
408 self._output_html = output_html 477 self._output_html = output_html
409 self._output_basic_html = output_basic_html 478 self._output_basic_html = output_basic_html
410 self._output_error = output_error 479 self._output_error = output_error
411 480
412 self._set_run_main_parameters() 481 self._set_run_main_parameters()
482
413 self._is_initialised, error_message = self._set_archive() 483 self._is_initialised, error_message = self._set_archive()
414 484
415 if self._is_initialised and error_message is None: 485 if self._is_initialised and error_message is None:
416 self._set_query() 486 self._set_query()
417 self._set_output() 487 self._set_output()
477 " parameters could be initialized" 547 " parameters could be initialized"
478 548
479 else: 549 else:
480 return False, error_message 550 return False, error_message
481 551
482 def _set_query(self): 552 def _set_cone_service(self):
483 553
484 qs = 'query_section' 554 qs = 'query_section'
485 qsl = 'query_selection' 555 qsl = 'query_selection'
556 csts = 'cone_search_target_selection'
557
558 error_message = None
559 is_service_initialised = True
560
561 keyword = self._json_parameters[qs][qsl][csts]['keyword']
562
563 service_list = ConeServiceRegistry.search_services(
564 keyword,
565 MAX_REGISTRIES_TO_SEARCH)
566
567 if len(service_list) >= 1:
568 self._services = service_list
569 else:
570 is_service_initialised = False
571 error_message = "no services matching search parameters"
572 Logger.create_action_log(
573 Logger.ACTION_ERROR,
574 Logger.ACTION_TYPE_ARCHIVE_CONNECTION,
575 error_message)
576
577 return is_service_initialised, error_message
578
579 def _set_query(self):
580
581 qs = 'query_section'
582 qsl = 'query_selection'
583 csts = 'cone_search_target_selection'
584 cs = 'cone_section'
585 ts = 'target_selection'
586 con = 'cone_object_name'
486 587
487 if self._query_type == 'obscore_query': 588 if self._query_type == 'obscore_query':
488 589
489 dataproduct_type = \ 590 dataproduct_type = \
490 self._json_parameters[qs][qsl]['dataproduct_type'] 591 self._json_parameters[qs][qsl]['dataproduct_type']
515 t_max = \ 616 t_max = \
516 self._json_parameters[qs][qsl]['t_max'] 617 self._json_parameters[qs][qsl]['t_max']
517 order_by = \ 618 order_by = \
518 self._json_parameters[qs][qsl]['order_by'] 619 self._json_parameters[qs][qsl]['order_by']
519 620
621 if self._json_parameters[qs][qsl][cs][csts][ts] == 'coordinates':
622 ra = self._json_parameters[qs][qsl][cs][csts]['ra']
623 dec = self._json_parameters[qs][qsl][cs][csts]['dec']
624 else:
625 obs_target = self._json_parameters[qs][qsl][cs][csts][con]
626
627 if obs_target != 'none' and obs_target is not None:
628 target = CelestialObject(obs_target)
629 target_coordinates = target.get_coordinates_in_degrees()
630
631 ra = target_coordinates['ra']
632 dec = target_coordinates['dec']
633 else:
634 ra = None
635 dec = None
636
637 radius = self._json_parameters[qs][qsl][cs]['radius']
638
639 if (ra != '' and ra is not None)\
640 and (dec != '' and dec is not None)\
641 and (radius != '' and radius is not None):
642 cone_condition = \
643 ADQLConeSearchQuery.get_search_circle_condition(ra,
644 dec,
645 radius)
646 else:
647 cone_condition = None
648
520 obscore_query_object = ADQLObscoreQuery(dataproduct_type, 649 obscore_query_object = ADQLObscoreQuery(dataproduct_type,
521 obs_collection, 650 obs_collection,
522 obs_title, 651 obs_title,
523 obs_id, 652 obs_id,
524 facility_name, 653 facility_name,
529 obs_publisher_id, 658 obs_publisher_id,
530 s_fov, 659 s_fov,
531 calibration_level, 660 calibration_level,
532 t_min, 661 t_min,
533 t_max, 662 t_max,
663 cone_condition,
534 order_by) 664 order_by)
535 665
536 self._adql_query = obscore_query_object.get_query() 666 self._adql_query = obscore_query_object.get_query()
537 667
538 elif self._query_type == 'raw_query': 668 elif self._query_type == 'raw_query':
555 tap_table, 685 tap_table,
556 where_field, 686 where_field,
557 where_condition) 687 where_condition)
558 else: 688 else:
559 self._adql_query = ADQLObscoreQuery.base_query 689 self._adql_query = ADQLObscoreQuery.base_query
690
691 def _set_cone_query(self):
692
693 qs = 'query_section'
694 qsl = 'query_selection'
695 csts = 'cone_search_target_selection'
696 ts = 'target_selection'
697 con = 'cone_object_name'
698
699 search_radius = self._json_parameters[qs][qsl]['radius']
700 time = None
701
702 if self._json_parameters[qs][qsl][csts][ts] == 'coordinates':
703 ra = self._json_parameters[qs][qsl][csts]['ra']
704 dec = self._json_parameters[qs][qsl][csts]['dec']
705 time = self._json_parameters[qs][qsl][csts]['time']
706 else:
707 target = CelestialObject(self._json_parameters[qs][qsl][csts][con])
708
709 target_coordinates = target.get_coordinates_in_degrees()
710
711 ra = target_coordinates['ra']
712 dec = target_coordinates['dec']
713
714 cone_query_object = ADQLConeSearchQuery(ra, dec, search_radius, time)
715
716 self._adql_query = cone_query_object.get_query()
560 717
561 def _set_output(self): 718 def _set_output(self):
562 self._number_of_files = \ 719 self._number_of_files = \
563 int( 720 int(
564 self._json_parameters['output_section']['number_of_files'] 721 self._json_parameters['output_section']['number_of_files']
592 749
593 archive_name = self._archives[0].get_archive_name( 750 archive_name = self._archives[0].get_archive_name(
594 self._archive_type) 751 self._archive_type)
595 752
596 for archive in self._archives: 753 for archive in self._archives:
597 _file_url, error_message = archive.get_resources( 754 try:
598 self._adql_query, 755 _file_url, error_message = archive.get_resources(
599 self._number_of_files, 756 self._adql_query,
600 self._url_field) 757 self._number_of_files,
601 758 self._url_field)
602 file_url.extend(_file_url) 759
760 file_url.extend(_file_url)
761 except TimeoutException:
762 error_message = \
763 "Archive is taking too long to respond (timeout)"
764 Logger.create_action_log(
765 Logger.ACTION_ERROR,
766 Logger.ACTION_TYPE_ARCHIVE_CONNECTION,
767 error_message)
603 768
604 if len(file_url) >= int(self._number_of_files): 769 if len(file_url) >= int(self._number_of_files):
605 file_url = file_url[:int(self._number_of_files)] 770 file_url = file_url[:int(self._number_of_files)]
606 break 771 break
607 772
706 else: 871 else:
707 summary_file += error_message 872 summary_file += error_message
708 873
709 FileHandler.write_file_to_output(summary_file, 874 FileHandler.write_file_to_output(summary_file,
710 self._output_error) 875 self._output_error)
711
712 else: 876 else:
713 summary_file = Logger.create_log_file("Archive", 877 summary_file = Logger.create_log_file("Archive",
714 self._adql_query) 878 self._adql_query)
715 879
716 summary_file += "Unable to initialize archive" 880 summary_file += "Unable to initialize archives"
717 881
718 FileHandler.write_file_to_output(summary_file, 882 FileHandler.write_file_to_output(summary_file,
719 self._output_error) 883 self._output_error)
720 884
721 885
743 obs_publisher_id, 907 obs_publisher_id,
744 s_fov, 908 s_fov,
745 calibration_level, 909 calibration_level,
746 t_min, 910 t_min,
747 t_max, 911 t_max,
912 cone_condition,
748 order_by): 913 order_by):
749 914
750 super().__init__() 915 super().__init__()
751 916
752 if calibration_level == 'none': 917 if calibration_level == 'none':
767 if em_max == 'None' or em_max is None: 932 if em_max == 'None' or em_max is None:
768 em_max = '' 933 em_max = ''
769 934
770 if dataproduct_type == 'none' or dataproduct_type is None: 935 if dataproduct_type == 'none' or dataproduct_type is None:
771 dataproduct_type = '' 936 dataproduct_type = ''
937
938 if cone_condition is not None:
939 self.cone_condition = cone_condition
940 else:
941 self.cone_condition = None
772 942
773 self.parameters = { 943 self.parameters = {
774 'dataproduct_type': dataproduct_type, 944 'dataproduct_type': dataproduct_type,
775 'obs_collection': obs_collection, 945 'obs_collection': obs_collection,
776 'obs_title': obs_title, 946 'obs_title': obs_title,
780 'em_min': em_min, 950 'em_min': em_min,
781 'em_max': em_max, 951 'em_max': em_max,
782 'target_name': target_name, 952 'target_name': target_name,
783 'obs_publisher_id': obs_publisher_id, 953 'obs_publisher_id': obs_publisher_id,
784 's_fov': s_fov, 954 's_fov': s_fov,
785 'calibration_level': calibration_level, 955 'calib_level': calibration_level,
786 't_min': t_min, 956 't_min': t_min,
787 't_max': t_max 957 't_max': t_max,
788 } 958 }
789 959
790 self.order_by = order_by 960 self.order_by = order_by
791 961
792 def get_query(self): 962 def get_query(self):
805 obscore_order_type = ADQLObscoreQuery.order_by_field[order_type] 975 obscore_order_type = ADQLObscoreQuery.order_by_field[order_type]
806 976
807 return super()._get_order_by_clause(obscore_order_type) 977 return super()._get_order_by_clause(obscore_order_type)
808 978
809 def get_where_statement(self): 979 def get_where_statement(self):
810 return self._get_where_clause(self.parameters) 980 where_clause = self._get_where_clause(self.parameters)
981
982 if where_clause == '' and self.cone_condition is not None:
983 where_clause = 'WHERE ' + self.get_cone_condition()
984 elif where_clause != '' and self.cone_condition is not None:
985 where_clause += 'AND ' + self.get_cone_condition()
986
987 return where_clause
811 988
812 def _get_where_clause(self, parameters): 989 def _get_where_clause(self, parameters):
813 return super()._get_where_clause(parameters) 990 return super()._get_where_clause(parameters)
991
992 def get_cone_condition(self):
993 return self.cone_condition
814 994
815 995
816 class ADQLTapQuery(BaseADQLQuery): 996 class ADQLTapQuery(BaseADQLQuery):
817 base_query = 'SELECT TOP '+str(MAX_ALLOWED_ENTRIES)+' * FROM ' 997 base_query = 'SELECT TOP '+str(MAX_ALLOWED_ENTRIES)+' * FROM '
818 998
829 ' WHERE ' + \ 1009 ' WHERE ' + \
830 str(where_field) + ' = ' + '\'' + \ 1010 str(where_field) + ' = ' + '\'' + \
831 str(where_condition) + '\'' 1011 str(where_condition) + '\''
832 else: 1012 else:
833 return ADQLTapQuery.base_query + str(table) 1013 return ADQLTapQuery.base_query + str(table)
1014
1015
1016 class ADQLConeSearchQuery:
1017
1018 base_query = "SELECT TOP 100 * FROM ivoa.obscore"
1019
1020 def __init__(self, ra, dec, radius, time=None):
1021
1022 self.ra = ra
1023 self.dec = dec
1024 self.radius = radius
1025 self.time = time
1026
1027 self._query = ADQLObscoreQuery.base_query
1028
1029 if self.ra and self.dec and self.radius:
1030 self._query += " WHERE "
1031 self._query += self._get_search_circle(ra, dec, radius)
1032
1033 if self.time:
1034 self._query += self._get_search_time()
1035
1036 def _get_search_circle(self, ra, dec, radius):
1037 return "(CONTAINS" \
1038 "(POINT('ICRS', s_ra, s_dec), " \
1039 "CIRCLE('ICRS', "+str(ra)+", "+str(dec)+", "+str(radius)+")" \
1040 ") = 1)"
1041
1042 def _get_search_time(self):
1043 return " AND t_min <= "+self.time+" AND t_max >= "+self.time
1044
1045 def get_query(self):
1046 return self._query
1047
1048 @staticmethod
1049 def get_search_circle_condition(ra, dec, radius):
1050 return "(CONTAINS" \
1051 "(POINT('ICRS', s_ra, s_dec)," \
1052 "CIRCLE('ICRS', "+str(ra)+", "+str(dec)+", "+str(radius)+")" \
1053 ") = 1) "
1054
1055
1056 class CelestialObject:
1057
1058 def __init__(self, name):
1059 self.name = name
1060 self.coordinates = None
1061
1062 self.coordinates = SkyCoord.from_name(self.name)
1063
1064 def get_coordinates_in_degrees(self):
1065
1066 coordinates = {
1067 'ra': '',
1068 'dec': ''
1069 }
1070
1071 ra_dec = self.coordinates.ravel()
1072
1073 coordinates['ra'] = ra_dec.ra.degree[0]
1074 coordinates['dec'] = ra_dec.dec.degree[0]
1075
1076 return coordinates
834 1077
835 1078
836 class HTMLReport: 1079 class HTMLReport:
837 _html_report_base_header = '' 1080 _html_report_base_header = ''
838 _html_report_base_body = '' 1081 _html_report_base_body = ''