Mercurial > repos > astroteam > astronomical_archives
comparison astronomical_archives.py @ 0:0ddfc343f9f9 draft
planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools/ commit d68858614f92df46c58724928d918e989d916db0
author | astroteam |
---|---|
date | Mon, 04 Sep 2023 14:20:34 +0000 |
parents | |
children | 667fc28d803c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0ddfc343f9f9 |
---|---|
1 import json | |
2 import os | |
3 import sys | |
4 import urllib | |
5 from urllib import request | |
6 | |
7 import pyvo | |
8 from pyvo import DALAccessError, DALQueryError, DALServiceError | |
9 from pyvo import registry | |
10 | |
11 MAX_ALLOWED_ENTRIES = 100 | |
12 MAX_REGISTRIES_TO_SEARCH = 100 | |
13 | |
14 | |
15 class Service: | |
16 # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Servicetype.html | |
17 | |
18 services = { | |
19 'TAP': 'tap', | |
20 'SIA': 'sia', | |
21 'SIA2': 'sia2', | |
22 'SPECTRUM': 'spectrum', | |
23 'SCS': 'scs', | |
24 'LINE': 'line' | |
25 } | |
26 | |
27 supported_services = { | |
28 'TAP': 'tap' | |
29 } | |
30 | |
31 def __init__(self): | |
32 pass | |
33 | |
34 @staticmethod | |
35 def is_service_supported(service_type) -> bool: | |
36 is_supported = True | |
37 | |
38 if service_type not in Service.services.keys(): | |
39 is_supported = False | |
40 elif service_type not in Service.supported_services.keys(): | |
41 is_supported = False | |
42 | |
43 return is_supported | |
44 | |
45 | |
46 class Waveband: | |
47 # https://pyvo.readthedocs.io/en/latest/api/pyvo.registry.Waveband.html | |
48 # https://www.ivoa.net/rdf/messenger/2020-08-26/messenger.html | |
49 | |
50 wavebands = { | |
51 'Extreme UV': 'EUV', | |
52 'Gamma ray': 'Gamma-ray', | |
53 'Infrared': 'Infrared', | |
54 'Millimeter': 'Millimeter', | |
55 'Neutrino': 'Neutrino', | |
56 'Optical': 'Optical', | |
57 'Photon': 'Photon', | |
58 'Radio': 'Radio', | |
59 'Ultra violet': 'UV', | |
60 'X-ray': 'X-ray' | |
61 } | |
62 | |
63 def __init__(self): | |
64 pass | |
65 | |
66 @staticmethod | |
67 def is_waveband_supported(waveband) -> bool: | |
68 is_supported = True | |
69 | |
70 if waveband not in Waveband.wavebands.keys(): | |
71 is_supported = False | |
72 | |
73 return is_supported | |
74 | |
75 | |
76 class TapArchive: | |
77 # https://www.ivoa.net/documents/ObsCore/20170509/REC-ObsCore-v1.1-20170509 | |
78 | |
79 service_type = Service.services['TAP'] | |
80 | |
81 def __init__(self, | |
82 id=1, | |
83 title="Unknown title", | |
84 name="Unknown name", | |
85 access_url=""): | |
86 | |
87 self.id = id, | |
88 self.title = title, | |
89 self.name = name, | |
90 self.access_url = access_url | |
91 self.initialized = False | |
92 self.archive_service = None | |
93 self.tables = None | |
94 | |
95 def get_resources(self, | |
96 query, | |
97 number_of_results, | |
98 url_field='access_url'): | |
99 | |
100 resource_list_hydrated = [] | |
101 | |
102 error_message = None | |
103 | |
104 if self.initialized: | |
105 | |
106 try: | |
107 raw_resource_list = self.archive_service.search(query) | |
108 | |
109 for i, resource in enumerate(raw_resource_list): | |
110 if i < number_of_results: | |
111 resource_list_hydrated.append( | |
112 self._get_resource_object(resource)) | |
113 else: | |
114 break | |
115 | |
116 except DALQueryError: | |
117 if self.has_obscore_table(): | |
118 error_message = "Error in query -> " + query | |
119 Logger.create_action_log( | |
120 Logger.ACTION_ERROR, | |
121 Logger.ACTION_TYPE_DOWNLOAD, | |
122 error_message) | |
123 else: | |
124 error_message = "No obscore table in the archive" | |
125 Logger.create_action_log( | |
126 Logger.ACTION_ERROR, | |
127 Logger.ACTION_TYPE_DOWNLOAD, | |
128 error_message) | |
129 | |
130 except DALServiceError: | |
131 error_message = "Error communicating with the service" | |
132 Logger.create_action_log( | |
133 Logger.ACTION_ERROR, | |
134 Logger.ACTION_TYPE_DOWNLOAD, | |
135 error_message) | |
136 | |
137 except Exception: | |
138 error_message = "Unknow error while querying the service" | |
139 Logger.create_action_log( | |
140 Logger.ACTION_ERROR, | |
141 Logger.ACTION_TYPE_DOWNLOAD, | |
142 error_message) | |
143 | |
144 return resource_list_hydrated, error_message | |
145 | |
146 def _get_resource_object(self, resource): | |
147 resource_hydrated = {} | |
148 | |
149 for key, value in resource.items(): | |
150 resource_hydrated[key] = value | |
151 | |
152 return resource_hydrated | |
153 | |
154 def initialize(self): | |
155 error_message = None | |
156 | |
157 try: | |
158 self._get_service() | |
159 | |
160 if self.archive_service: | |
161 self._set_archive_tables() | |
162 self.initialized = True | |
163 | |
164 except DALAccessError: | |
165 error_message = \ | |
166 "A connection to the service could not be established" | |
167 Logger.create_action_log( | |
168 Logger.ACTION_ERROR, | |
169 Logger.ACTION_TYPE_ARCHIVE_CONNECTION, | |
170 error_message) | |
171 | |
172 except Exception: | |
173 error_message = "Unknow error while initializing TAP service" | |
174 Logger.create_action_log( | |
175 Logger.ACTION_ERROR, | |
176 Logger.ACTION_TYPE_ARCHIVE_CONNECTION, | |
177 error_message) | |
178 | |
179 return self.initialized, error_message | |
180 | |
181 def _get_service(self): | |
182 if self.access_url: | |
183 self.archive_service = pyvo.dal.TAPService(self.access_url) | |
184 | |
185 def _set_archive_tables(self): | |
186 | |
187 self.tables = [] | |
188 | |
189 for table in self.archive_service.tables: | |
190 archive_table = { | |
191 'name': table.name, | |
192 'type': table.type, | |
193 'fields': None | |
194 } | |
195 | |
196 fields = [] | |
197 | |
198 for table_field in table.columns: | |
199 field = { | |
200 'name': table_field.name, | |
201 'description': table_field.description, | |
202 'unit': table_field.unit, | |
203 'datatype': table_field.datatype.content | |
204 } | |
205 | |
206 fields.append(field) | |
207 | |
208 archive_table['fields'] = fields | |
209 | |
210 self.tables.append(archive_table) | |
211 | |
212 def _is_query_valid(self, query) -> bool: | |
213 is_valid = True | |
214 | |
215 attribute_from = 'from' | |
216 attribute_where = 'where' | |
217 | |
218 idx_from = query.index(attribute_from) | |
219 idx_where = query.index(attribute_where) | |
220 | |
221 table_name = '' | |
222 | |
223 for idx in range(idx_from + len('from') + 1, idx_where): | |
224 table_name = table_name + query[idx] | |
225 | |
226 if not next( | |
227 (item for item in self.tables if | |
228 item["name"] == table_name), | |
229 False): | |
230 | |
231 is_valid = False | |
232 | |
233 return is_valid | |
234 | |
235 def has_obscore_table(self) -> bool: | |
236 has_obscore_table = self._has_table("ivoa.obscore") | |
237 | |
238 return has_obscore_table | |
239 | |
240 def _has_table(self, table_name) -> bool: | |
241 _has_table = False | |
242 | |
243 _has_table = next( | |
244 (item for item in self.tables if item["name"] == table_name), | |
245 False) | |
246 | |
247 return _has_table | |
248 | |
249 def get_archive_name(self, archive_type): | |
250 try: | |
251 if archive_type == 'registry': | |
252 name = str(self.title).strip("',()") | |
253 else: | |
254 name = self.access_url | |
255 except Exception: | |
256 name = 'Unknown archive title' | |
257 | |
258 return name | |
259 | |
260 | |
261 class RegistrySearchParameters: | |
262 | |
263 def __init__(self, keyword=None, waveband=None, service_type=None): | |
264 self.keyword = keyword | |
265 self.waveband = waveband | |
266 self.service_type = service_type | |
267 | |
268 def get_parameters(self): | |
269 | |
270 parameters = { | |
271 'keywords': '', | |
272 'waveband': '', | |
273 'service_type': '' | |
274 } | |
275 | |
276 if self.keyword: | |
277 parameters['keywords'] = self.keyword | |
278 | |
279 if Waveband.is_waveband_supported(self.waveband): | |
280 parameters['waveband'] = \ | |
281 Waveband.wavebands[self.waveband] | |
282 | |
283 if Service.is_service_supported(self.service_type): | |
284 parameters['service_type'] = \ | |
285 Service.services[self.service_type] | |
286 else: | |
287 parameters['service_type'] = Service.services['TAP'] | |
288 | |
289 return parameters | |
290 | |
291 | |
292 class Registry: | |
293 | |
294 def __init__(self): | |
295 pass | |
296 | |
297 @staticmethod | |
298 def search_registries(rsp: RegistrySearchParameters, | |
299 number_of_registries): | |
300 | |
301 parameters = rsp.get_parameters() | |
302 | |
303 keywords = parameters['keywords'] | |
304 waveband = parameters['waveband'] | |
305 service_type = parameters['service_type'] | |
306 | |
307 if not waveband: | |
308 registry_list = registry.search( | |
309 keywords=keywords, | |
310 servicetype=service_type) | |
311 else: | |
312 registry_list = registry.search( | |
313 keywords=keywords, | |
314 waveband=waveband, | |
315 servicetype=service_type) | |
316 | |
317 if registry_list: | |
318 registry_list = Registry._get_registries_from_list( | |
319 registry_list, | |
320 number_of_registries) | |
321 | |
322 return registry_list | |
323 | |
324 @staticmethod | |
325 def _get_registries_from_list(registry_list, number_of_registries): | |
326 | |
327 archive_list = [] | |
328 | |
329 for i, ivoa_registry in enumerate(registry_list): | |
330 if i < number_of_registries: | |
331 archive = TapArchive(ivoa_registry.standard_id, | |
332 ivoa_registry.res_title, | |
333 ivoa_registry.short_name, | |
334 ivoa_registry.access_url) | |
335 | |
336 archive_list.append(archive) | |
337 | |
338 return archive_list | |
339 | |
340 | |
341 class TapQuery: | |
342 | |
343 def __init__(self, query): | |
344 self.raw_query = query | |
345 | |
346 def get_query(self): | |
347 return urllib.parse.unquote(self.raw_query).replace("+", " ") | |
348 | |
349 | |
350 class BaseADQLQuery: | |
351 | |
352 def __init__(self): | |
353 pass | |
354 | |
355 def _get_order_by_clause(self, order_type): | |
356 order_by_clause = 'ORDER BY ' + order_type | |
357 | |
358 return order_by_clause | |
359 | |
360 def _get_where_clause(self, parameters): | |
361 where_clause = '' | |
362 is_first_statement = True | |
363 | |
364 for key, value in parameters.items(): | |
365 | |
366 if value != '': | |
367 statement = str(key) + ' = ' + '\'' + str(value) + '\' ' | |
368 | |
369 if is_first_statement: | |
370 is_first_statement = False | |
371 where_clause += 'WHERE ' | |
372 else: | |
373 statement = 'AND ' + statement | |
374 | |
375 where_clause += statement | |
376 | |
377 return where_clause | |
378 | |
379 | |
380 class ToolRunner: | |
381 | |
382 def __init__(self, | |
383 run_parameters, | |
384 output, | |
385 output_csv, | |
386 output_html, | |
387 output_basic_html, | |
388 output_error): | |
389 | |
390 self._raw_parameters_path = run_parameters | |
391 self._json_parameters = json.load(open(run_parameters, "r")) | |
392 self._archive_type = '' | |
393 self._query_type = '' | |
394 self._archives = [] | |
395 self._adql_query = '' | |
396 self._services_access_url = '' | |
397 self._url_field = 'access_url' | |
398 self._number_of_files = '' | |
399 self._is_initialised = False | |
400 | |
401 self._csv_file = False | |
402 self._image_file = False | |
403 self._html_file = False | |
404 self._basic_html_file = False | |
405 | |
406 self._output = output | |
407 self._output_csv = output_csv | |
408 self._output_html = output_html | |
409 self._output_basic_html = output_basic_html | |
410 self._output_error = output_error | |
411 | |
412 self._set_run_main_parameters() | |
413 self._is_initialised, error_message = self._set_archive() | |
414 | |
415 if self._is_initialised and error_message is None: | |
416 self._set_query() | |
417 self._set_output() | |
418 | |
419 def _set_run_main_parameters(self): | |
420 | |
421 qs = "query_section" | |
422 qsl = "query_selection" | |
423 | |
424 self._archive_type = \ | |
425 self._json_parameters['archive_selection']['archive_type'] | |
426 self._query_type = \ | |
427 self._json_parameters[qs][qsl]['query_type'] | |
428 | |
429 def _set_archive(self): | |
430 | |
431 error_message = None | |
432 | |
433 if self._archive_type == 'archive': | |
434 self._service_access_url =\ | |
435 self._json_parameters['archive_selection']['archive'] | |
436 | |
437 self._archives.append( | |
438 TapArchive(access_url=self._service_access_url)) | |
439 | |
440 else: | |
441 keyword = \ | |
442 self._json_parameters['archive_selection']['keyword'] | |
443 waveband = \ | |
444 self._json_parameters['archive_selection']['wavebands'] | |
445 service_type = \ | |
446 self._json_parameters['archive_selection']['service_type'] | |
447 | |
448 rsp = RegistrySearchParameters( | |
449 keyword=keyword, | |
450 waveband=waveband, | |
451 service_type=service_type) | |
452 | |
453 archive_list = Registry.search_registries( | |
454 rsp, | |
455 MAX_REGISTRIES_TO_SEARCH) | |
456 | |
457 if len(archive_list) >= 1: | |
458 self._archives = archive_list | |
459 else: | |
460 error_message = "no archive matching search parameters" | |
461 Logger.create_action_log( | |
462 Logger.ACTION_ERROR, | |
463 Logger.ACTION_TYPE_ARCHIVE_CONNECTION, | |
464 error_message) | |
465 | |
466 if error_message is None: | |
467 | |
468 self._archives[:] = \ | |
469 [archive for archive in self._archives if | |
470 archive.initialize()[0]] | |
471 | |
472 if len(self._archives) >= 1: | |
473 return True, None | |
474 else: | |
475 return False, \ | |
476 "no archive matching search" \ | |
477 " parameters could be initialized" | |
478 | |
479 else: | |
480 return False, error_message | |
481 | |
482 def _set_query(self): | |
483 | |
484 qs = 'query_section' | |
485 qsl = 'query_selection' | |
486 | |
487 if self._query_type == 'obscore_query': | |
488 | |
489 dataproduct_type = \ | |
490 self._json_parameters[qs][qsl]['dataproduct_type'] | |
491 obs_collection = \ | |
492 self._json_parameters[qs][qsl]['obs_collection'] | |
493 obs_title = \ | |
494 self._json_parameters[qs][qsl]['obs_title'] | |
495 obs_id = \ | |
496 self._json_parameters[qs][qsl]['obs_id'] | |
497 facility_name = \ | |
498 self._json_parameters[qs][qsl]['facility_name'] | |
499 instrument_name = \ | |
500 self._json_parameters[qs][qsl]['instrument_name'] | |
501 em_min = \ | |
502 self._json_parameters[qs][qsl]['em_min'] | |
503 em_max = \ | |
504 self._json_parameters[qs][qsl]['em_max'] | |
505 target_name = \ | |
506 self._json_parameters[qs][qsl]['target_name'] | |
507 obs_publisher_id = \ | |
508 self._json_parameters[qs][qsl]['obs_publisher_id'] | |
509 s_fov = \ | |
510 self._json_parameters[qs][qsl]['s_fov'] | |
511 calibration_level = \ | |
512 self._json_parameters[qs][qsl]['calibration_level'] | |
513 t_min = \ | |
514 self._json_parameters[qs][qsl]['t_min'] | |
515 t_max = \ | |
516 self._json_parameters[qs][qsl]['t_max'] | |
517 order_by = \ | |
518 self._json_parameters[qs][qsl]['order_by'] | |
519 | |
520 obscore_query_object = ADQLObscoreQuery(dataproduct_type, | |
521 obs_collection, | |
522 obs_title, | |
523 obs_id, | |
524 facility_name, | |
525 instrument_name, | |
526 em_min, | |
527 em_max, | |
528 target_name, | |
529 obs_publisher_id, | |
530 s_fov, | |
531 calibration_level, | |
532 t_min, | |
533 t_max, | |
534 order_by) | |
535 | |
536 self._adql_query = obscore_query_object.get_query() | |
537 | |
538 elif self._query_type == 'raw_query': | |
539 | |
540 wc = 'where_clause' | |
541 | |
542 tap_table = \ | |
543 self._json_parameters[qs][qsl]['table'] | |
544 | |
545 where_field = \ | |
546 self._json_parameters[qs][qsl][wc]['where_field'] | |
547 where_condition = \ | |
548 self._json_parameters[qs][qsl][wc]['where_condition'] | |
549 | |
550 self._url_field = \ | |
551 self._json_parameters[qs][qsl]['url_field'] | |
552 | |
553 self._adql_query = \ | |
554 ADQLTapQuery().get_query( | |
555 tap_table, | |
556 where_field, | |
557 where_condition) | |
558 else: | |
559 self._adql_query = ADQLObscoreQuery.base_query | |
560 | |
561 def _set_output(self): | |
562 self._number_of_files = \ | |
563 int( | |
564 self._json_parameters['output_section']['number_of_files'] | |
565 ) | |
566 | |
567 if self._number_of_files < 1: | |
568 self._number_of_files = 1 | |
569 elif self._number_of_files > 100: | |
570 self._number_of_files = MAX_ALLOWED_ENTRIES | |
571 | |
572 output_selection = \ | |
573 self._json_parameters['output_section']['output_selection'] | |
574 | |
575 if output_selection is not None: | |
576 if 'c' in output_selection: | |
577 self._csv_file = True | |
578 if 'i' in output_selection: | |
579 self._image_file = True | |
580 if 'h' in output_selection: | |
581 self._html_file = True | |
582 if 'b' in output_selection: | |
583 self._basic_html_file = True | |
584 | |
585 def _validate_json_parameters(self, json_parameters): | |
586 self._json_parameters = json.load(open(json_parameters, "r")) | |
587 | |
588 def run(self): | |
589 if self._is_initialised: | |
590 error_message = None | |
591 file_url = [] | |
592 | |
593 archive_name = self._archives[0].get_archive_name( | |
594 self._archive_type) | |
595 | |
596 for archive in self._archives: | |
597 _file_url, error_message = archive.get_resources( | |
598 self._adql_query, | |
599 self._number_of_files, | |
600 self._url_field) | |
601 | |
602 file_url.extend(_file_url) | |
603 | |
604 if len(file_url) >= int(self._number_of_files): | |
605 file_url = file_url[:int(self._number_of_files)] | |
606 break | |
607 | |
608 if file_url: | |
609 | |
610 if self._csv_file: | |
611 FileHandler.write_urls_to_output( | |
612 file_url, | |
613 self._output_csv, | |
614 self._url_field) | |
615 | |
616 if self._image_file: | |
617 | |
618 try: | |
619 fits_file = FileHandler.download_file_from_url( | |
620 file_url[0][self._url_field]) | |
621 | |
622 FileHandler.write_file_to_output( | |
623 fits_file, | |
624 self._output, "wb") | |
625 | |
626 log_message = "from url " +\ | |
627 file_url[0][self._url_field] | |
628 | |
629 Logger.create_action_log( | |
630 Logger.ACTION_SUCCESS, | |
631 Logger.ACTION_TYPE_DOWNLOAD, | |
632 log_message) | |
633 | |
634 except Exception: | |
635 error_message = "from url " + \ | |
636 file_url[0][self._url_field] | |
637 | |
638 Logger.create_action_log( | |
639 Logger.ACTION_ERROR, | |
640 Logger.ACTION_TYPE_DOWNLOAD, | |
641 error_message) | |
642 | |
643 for i, url in enumerate(file_url[1:], start=1): | |
644 try: | |
645 fits_file = \ | |
646 FileHandler.download_file_from_url( | |
647 url[self._url_field]) | |
648 | |
649 FileHandler.write_file_to_subdir( | |
650 fits_file, | |
651 FileHandler.get_file_name_from_url( | |
652 url[self._url_field])) | |
653 | |
654 log_message = "from url " + \ | |
655 url[self._url_field] | |
656 | |
657 Logger.create_action_log( | |
658 Logger.ACTION_SUCCESS, | |
659 Logger.ACTION_TYPE_DOWNLOAD, | |
660 log_message) | |
661 | |
662 except Exception: | |
663 error_message = "from url " + \ | |
664 url[self._url_field] | |
665 | |
666 Logger.create_action_log( | |
667 Logger.ACTION_ERROR, | |
668 Logger.ACTION_TYPE_DOWNLOAD, | |
669 error_message) | |
670 | |
671 if self._html_file: | |
672 html_file = OutputHandler.generate_html_output( | |
673 file_url, | |
674 archive_name, | |
675 self._adql_query) | |
676 | |
677 FileHandler.write_file_to_output(html_file, | |
678 self._output_html) | |
679 | |
680 if self._basic_html_file: | |
681 html_file = \ | |
682 OutputHandler.generate_basic_html_output( | |
683 file_url, | |
684 archive_name, | |
685 self._adql_query) | |
686 | |
687 FileHandler.write_file_to_output( | |
688 html_file, | |
689 self._output_basic_html) | |
690 | |
691 summary_file = Logger.create_log_file(archive_name, | |
692 self._adql_query) | |
693 summary_file += "\n Tool run executed with success" | |
694 | |
695 FileHandler.write_file_to_output(summary_file, | |
696 self._output_error) | |
697 | |
698 else: | |
699 | |
700 summary_file = Logger.create_log_file(archive_name, | |
701 self._adql_query) | |
702 | |
703 if error_message is None: | |
704 summary_file += \ | |
705 "\n No resources matching parameters found" | |
706 else: | |
707 summary_file += error_message | |
708 | |
709 FileHandler.write_file_to_output(summary_file, | |
710 self._output_error) | |
711 | |
712 else: | |
713 summary_file = Logger.create_log_file("Archive", | |
714 self._adql_query) | |
715 | |
716 summary_file += "Unable to initialize archive" | |
717 | |
718 FileHandler.write_file_to_output(summary_file, | |
719 self._output_error) | |
720 | |
721 | |
722 class ADQLObscoreQuery(BaseADQLQuery): | |
723 order_by_field = { | |
724 'size': 'access_estsize', | |
725 'collection': 'obs_collection', | |
726 'object': 'target_name' | |
727 } | |
728 | |
729 base_query = 'SELECT TOP ' + \ | |
730 str(MAX_ALLOWED_ENTRIES) + \ | |
731 ' * FROM ivoa.obscore ' | |
732 | |
733 def __init__(self, | |
734 dataproduct_type, | |
735 obs_collection, | |
736 obs_title, | |
737 obs_id, | |
738 facility_name, | |
739 instrument_name, | |
740 em_min, | |
741 em_max, | |
742 target_name, | |
743 obs_publisher_id, | |
744 s_fov, | |
745 calibration_level, | |
746 t_min, | |
747 t_max, | |
748 order_by): | |
749 | |
750 super().__init__() | |
751 | |
752 if calibration_level == 'none': | |
753 calibration_level = '' | |
754 | |
755 if order_by == 'none': | |
756 order_by = '' | |
757 | |
758 if t_min == 'None' or t_min is None: | |
759 t_min = '' | |
760 | |
761 if t_max == 'None' or t_max is None: | |
762 t_max = '' | |
763 | |
764 if em_min == 'None' or em_min is None: | |
765 em_min = '' | |
766 | |
767 if em_max == 'None' or em_max is None: | |
768 em_max = '' | |
769 | |
770 if dataproduct_type == 'none' or dataproduct_type is None: | |
771 dataproduct_type = '' | |
772 | |
773 self.parameters = { | |
774 'dataproduct_type': dataproduct_type, | |
775 'obs_collection': obs_collection, | |
776 'obs_title': obs_title, | |
777 'obs_id': obs_id, | |
778 'facility_name': facility_name, | |
779 'instrument_name': instrument_name, | |
780 'em_min': em_min, | |
781 'em_max': em_max, | |
782 'target_name': target_name, | |
783 'obs_publisher_id': obs_publisher_id, | |
784 's_fov': s_fov, | |
785 'calibration_level': calibration_level, | |
786 't_min': t_min, | |
787 't_max': t_max | |
788 } | |
789 | |
790 self.order_by = order_by | |
791 | |
792 def get_query(self): | |
793 return ADQLObscoreQuery.base_query + \ | |
794 self.get_where_statement() + \ | |
795 self.get_order_by_statement() | |
796 | |
797 def get_order_by_statement(self): | |
798 if self.order_by != '': | |
799 return self._get_order_by_clause(self.order_by) | |
800 else: | |
801 return '' | |
802 | |
803 def _get_order_by_clause(self, order_type): | |
804 | |
805 obscore_order_type = ADQLObscoreQuery.order_by_field[order_type] | |
806 | |
807 return super()._get_order_by_clause(obscore_order_type) | |
808 | |
809 def get_where_statement(self): | |
810 return self._get_where_clause(self.parameters) | |
811 | |
812 def _get_where_clause(self, parameters): | |
813 return super()._get_where_clause(parameters) | |
814 | |
815 | |
816 class ADQLTapQuery(BaseADQLQuery): | |
817 base_query = 'SELECT TOP '+str(MAX_ALLOWED_ENTRIES)+' * FROM ' | |
818 | |
819 def __init__(self): | |
820 super().__init__() | |
821 | |
822 def get_order_by_clause(self, order_type): | |
823 return super()._get_order_by_clause(order_type) | |
824 | |
825 def get_query(self, table, where_field, where_condition): | |
826 if where_field != '' and where_condition != '': | |
827 return ADQLTapQuery.base_query + \ | |
828 str(table) + \ | |
829 ' WHERE ' + \ | |
830 str(where_field) + ' = ' + '\'' + \ | |
831 str(where_condition) + '\'' | |
832 else: | |
833 return ADQLTapQuery.base_query + str(table) | |
834 | |
835 | |
836 class HTMLReport: | |
837 _html_report_base_header = '' | |
838 _html_report_base_body = '' | |
839 _html_report_base_footer = '' | |
840 _html_report_base_script = '' | |
841 | |
842 def __init__(self): | |
843 pass | |
844 | |
845 | |
846 class OutputHandler: | |
847 | |
848 def __init__(self): | |
849 pass | |
850 | |
851 @staticmethod | |
852 def generate_html_output(urls_data, archive_name, adql_query): | |
853 return OutputHandler.html_header + \ | |
854 OutputHandler.generate_html_content( | |
855 urls_data, | |
856 archive_name, | |
857 adql_query, | |
858 div_attr='class="title"', | |
859 table_attr='class="fl-table"') | |
860 | |
861 @staticmethod | |
862 def generate_basic_html_output(urls_data, | |
863 archive_name, | |
864 adql_query, ): | |
865 return OutputHandler.generate_html_content(urls_data, | |
866 archive_name, | |
867 adql_query) | |
868 | |
869 @staticmethod | |
870 def generate_html_content(urls_data, archive_name, adql_query, | |
871 div_attr="", table_attr="border='1'"): | |
872 html_file = \ | |
873 f""" | |
874 <div {div_attr}> | |
875 <h2>Resources Preview archive: | |
876 <span> | |
877 {archive_name} | |
878 </span> | |
879 </h2> | |
880 <span>ADQL query : {adql_query}</span> | |
881 </div>""" | |
882 | |
883 html_file += f'<table {table_attr}><thead><tr>' | |
884 | |
885 for key in Utils.collect_resource_keys(urls_data): | |
886 html_file += '<th>' + str(key) + '</th>' | |
887 | |
888 html_file += '</thead></tr><tbody>' | |
889 | |
890 for resource in urls_data: | |
891 html_file += '<tr>' | |
892 | |
893 for key, value in resource.items(): | |
894 html_file += f'<td>{value}</td>' | |
895 | |
896 html_file += '<td>' | |
897 for preview_key in \ | |
898 ['preview', 'preview_url', 'postcard_url']: | |
899 if preview_key in resource: | |
900 html_file += ( | |
901 '<details><summary>Preview</summary>' | |
902 f'<img src="{resource[preview_key]}"/>' | |
903 '</details>' | |
904 ) | |
905 html_file += '</td>' | |
906 html_file += '</tr>' | |
907 | |
908 html_file += '</tbody></table>' | |
909 return html_file | |
910 | |
911 html_header = """ <head><style> | |
912 | |
913 details { | |
914 padding: 10px; | |
915 } | |
916 | |
917 .table-wrapper { | |
918 margin: 10px 70px 70px; | |
919 box-shadow: 0px 35px 50px rgba( 0, 0, 0, 0.2 ); | |
920 } | |
921 | |
922 .fl-table { | |
923 border-radius: 5px; | |
924 font-size: 12px; | |
925 font-weight: normal; | |
926 border: none; | |
927 border-collapse: collapse; | |
928 width: 100%; | |
929 max-width: 100%; | |
930 white-space: nowrap; | |
931 background-color: white; | |
932 } | |
933 | |
934 .fl-table td, .fl-table th { | |
935 text-align: center; | |
936 padding: 8px; | |
937 } | |
938 | |
939 .fl-table td { | |
940 border: 1px solid #999999; | |
941 font-size: 15px; | |
942 } | |
943 | |
944 .fl-table thead th { | |
945 color: #ffffff; | |
946 background: #4FC3A1; | |
947 border: 1px solid #999999; | |
948 } | |
949 | |
950 | |
951 .fl-table thead th:nth-child(odd) { | |
952 color: #ffffff; | |
953 background: #324960; | |
954 } | |
955 | |
956 .fl-table tr:nth-child(even) { | |
957 background: #F8F8F8; | |
958 } | |
959 | |
960 .title h2 { | |
961 text-align: center; | |
962 font-size: 22px; | |
963 font-weight: 700; color:#202020; | |
964 text-transform: uppercase; | |
965 word-spacing: 1px; letter-spacing:2px; | |
966 margin-bottom: 50px; | |
967 } | |
968 | |
969 .title h2 span { | |
970 padding-top: 40px; | |
971 text-transform: none; | |
972 font-size:.80em; | |
973 font-weight: bold; | |
974 font-family: "Playfair Display","Bookman",serif; | |
975 color:#999; | |
976 letter-spacing:-0.005em; | |
977 word-spacing:1px; | |
978 letter-spacing:none; | |
979 } | |
980 | |
981 .title h1:before { | |
982 background-color: #dfdfdf; | |
983 } | |
984 | |
985 </style></head>""" | |
986 | |
987 | |
988 class FileHandler: | |
989 | |
990 def __init__(self): | |
991 pass | |
992 | |
993 @staticmethod | |
994 def download_file_from_url(file_url): | |
995 with request.urlopen(file_url) as response: | |
996 fits_file = response.read() | |
997 | |
998 return fits_file | |
999 | |
1000 @staticmethod | |
1001 def write_file_to_output(file, output, write_type="w"): | |
1002 with open(output, write_type) as file_output: | |
1003 file_output.write(file) | |
1004 | |
1005 @staticmethod | |
1006 def write_urls_to_output(urls: [], output, access_url="access_url"): | |
1007 with open(output, "w") as file_output: | |
1008 for url in urls: | |
1009 try: | |
1010 file_output.write(url[access_url] + ',') | |
1011 except Exception: | |
1012 error_message = "url field not found for url" | |
1013 Logger.create_action_log( | |
1014 Logger.ACTION_ERROR, | |
1015 Logger.ACTION_TYPE_WRITE_URL, | |
1016 error_message) | |
1017 | |
1018 @staticmethod | |
1019 def write_file_to_subdir(file, index): | |
1020 dir = os.getcwd() | |
1021 | |
1022 dir += '/fits' | |
1023 | |
1024 upload_dir = os.path.join(dir, str(index) + '.fits') | |
1025 | |
1026 with open(upload_dir, "wb") as file_output: | |
1027 file_output.write(file) | |
1028 | |
1029 @staticmethod | |
1030 def get_file_name_from_url(url, index=None): | |
1031 url_parts = url.split('/') | |
1032 | |
1033 file_name = '' | |
1034 | |
1035 try: | |
1036 if (url_parts[-1]) != '': | |
1037 file_name = url_parts[-1] | |
1038 elif len(url_parts) > 1: | |
1039 file_name = url_parts[-2] | |
1040 except Exception: | |
1041 file_name = 'archive file ' | |
1042 | |
1043 return file_name | |
1044 | |
1045 | |
1046 class Utils: | |
1047 | |
1048 def __init__(self): | |
1049 pass | |
1050 | |
1051 @staticmethod | |
1052 def collect_resource_keys(urls_data: list) -> list: | |
1053 """ | |
1054 Collect all the keys from the resources, | |
1055 keeping the order in the order of key appearance in the resources | |
1056 """ | |
1057 | |
1058 resource_keys = [] | |
1059 for resource in urls_data: | |
1060 for key in resource.keys(): | |
1061 if key not in resource_keys: | |
1062 resource_keys.append(key) | |
1063 return resource_keys | |
1064 | |
1065 | |
1066 class Logger: | |
1067 _logs = [] | |
1068 | |
1069 ACTION_SUCCESS = 1 | |
1070 ACTION_ERROR = 2 | |
1071 | |
1072 ACTION_TYPE = 1 | |
1073 INFO_TYPE = 2 | |
1074 | |
1075 ACTION_TYPE_DOWNLOAD = 1 | |
1076 ACTION_TYPE_ARCHIVE_CONNECTION = 2 | |
1077 ACTION_TYPE_WRITE_URL = 3 | |
1078 ACTION_TYPE_WRITE_FILE = 4 | |
1079 | |
1080 def __init__(self): | |
1081 pass | |
1082 | |
1083 @staticmethod | |
1084 def create_action_log(outcome, action, message) -> bool: | |
1085 | |
1086 is_log_created = False | |
1087 log = "" | |
1088 | |
1089 if action == Logger.ACTION_TYPE_DOWNLOAD: | |
1090 if outcome == Logger.ACTION_SUCCESS: | |
1091 log += "Success downloading file : " + message | |
1092 else: | |
1093 log += "Error downloading file : " + message | |
1094 | |
1095 is_log_created = True | |
1096 elif action == Logger.ACTION_TYPE_ARCHIVE_CONNECTION: | |
1097 if outcome == Logger.ACTION_SUCCESS: | |
1098 log += "Success connecting to archive : " + message | |
1099 else: | |
1100 log += "Error connecting to archive : " + message | |
1101 | |
1102 is_log_created = True | |
1103 elif action == Logger.ACTION_TYPE_WRITE_URL: | |
1104 if outcome == Logger.ACTION_SUCCESS: | |
1105 log += "Success writing url to file : " + message | |
1106 else: | |
1107 log += "Error writing to file : " + message | |
1108 | |
1109 is_log_created = True | |
1110 | |
1111 if is_log_created: | |
1112 Logger._insert_log(Logger.ACTION_TYPE, log) | |
1113 | |
1114 return is_log_created | |
1115 | |
1116 @staticmethod | |
1117 def create_info_log(message): | |
1118 pass | |
1119 | |
1120 @staticmethod | |
1121 def _insert_log(type, log): | |
1122 Logger._logs.append(log) | |
1123 | |
1124 @staticmethod | |
1125 def create_log_file(archive_name, query): | |
1126 log_file = "" | |
1127 | |
1128 log_file += "Run summary for archive : " + archive_name + "\n" | |
1129 log_file += "With query : " + query + "\n" | |
1130 | |
1131 for log in Logger._logs: | |
1132 log_file += log + "\n" | |
1133 | |
1134 return log_file | |
1135 | |
1136 | |
1137 if __name__ == "__main__": | |
1138 output = sys.argv[1] | |
1139 output_csv = sys.argv[2] | |
1140 output_html = sys.argv[3] | |
1141 output_basic_html = sys.argv[4] | |
1142 output_error = sys.argv[5] | |
1143 | |
1144 inputs = sys.argv[6] | |
1145 | |
1146 tool_runner = ToolRunner(inputs, | |
1147 output, | |
1148 output_csv, | |
1149 output_html, | |
1150 output_basic_html, | |
1151 output_error) | |
1152 | |
1153 tool_runner.run() |