comparison pipeline_telescope.py @ 0:a35056104c2c draft default tip

planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools commit da42ae0d18f550dec7f6d7e29d297e7cf1909df2
author astroteam
date Fri, 13 Jun 2025 13:26:36 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a35056104c2c
1 import pandas as pd
2 import re
3
4 from rdflib import Graph, Namespace
5 from rdflib.namespace import RDF, RDFS, SKOS
6
7 from aux_functions import compute_sensitivity, compute_sensitivity_int, list_tel
8
9 ODA = Namespace("https://odahub.io/ontology#")
10 g_label_site = len("http://www.w3.org/2000/01/rdf-schema#label")
11
12
13 def find_entity(g, class_, text_id_text, text_id_text_upper):
14 dict_ = {"label": {"val": [], "URI": [], "Sensitivity": []}, "altLabel": {"val": [], "URI": [], "Sensitivity": []}, "hiddenLabel": {"val": [], "URI": [], "Sensitivity": []}}
15 for i, [u_telescope, p, o] in enumerate(g.triples((None, RDF.type, class_))):
16
17 exists_label_telescope = 0
18 for s, p, label_telescope in g.triples((u_telescope, RDFS.label, None)):
19
20 result = re.search("\\b(" + label_telescope.lower() + ")([1-2]{0,1})\\b", text_id_text)
21 if result:
22 exists_label_telescope = 1
23 val_ = result.group(0)
24 # add_ = val_[len(label_telescope):]
25 dict_["label"]["val"].append(label_telescope)
26 dict_["label"]["URI"].append(u_telescope)
27 dict_["label"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g)))
28
29 exists_altlabel_telescope = 0
30 if exists_label_telescope == 0:
31 for s, p, altlabel_telescope in g.triples((u_telescope, SKOS.altLabel, None)):
32
33 result = re.search("\\b(" + altlabel_telescope + ")\\b", text_id_text_upper)
34 if result:
35 exists_altlabel_telescope = 1
36 val_ = result.group(0)
37 dict_["altLabel"]["val"].append(val_)
38 dict_["altLabel"]["URI"].append(u_telescope)
39 dict_["altLabel"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g)))
40
41 if exists_altlabel_telescope == 0:
42 for s, p, hiddenlabel_telescope in g.triples((u_telescope, SKOS.hiddenLabel, None)):
43
44 result = re.search("\\b" + hiddenlabel_telescope.lower() + "\\b", text_id_text)
45 if result:
46 val_ = result.group(0)
47 dict_["hiddenLabel"]["val"].append(hiddenlabel_telescope)
48 dict_["hiddenLabel"]["URI"].append(u_telescope)
49 dict_["hiddenLabel"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g)))
50
51 return dict_
52
53
54 def rule_based_telescope_detector(text_id, text_id_text, telescope_ontology):
55 g = Graph()
56 g.parse(telescope_ontology, format="n3")
57
58 text_id_text_lower = text_id_text.lower()
59
60 dict_observatory = find_entity(g, ODA.observatory, text_id_text_lower, text_id_text)
61 dict_survey = find_entity(g, ODA.survey, text_id_text_lower, text_id_text)
62 dict_telescope = find_entity(g, ODA.telescope, text_id_text_lower, text_id_text)
63 dict_misctelescope = find_entity(g, ODA.misctelescope, text_id_text_lower, text_id_text)
64 dict_telescopetype = find_entity(g, ODA.telescopetype, text_id_text_lower, text_id_text)
65
66 dict_spacetelescope = find_entity(g, ODA.spacetelescope, text_id_text_lower, text_id_text)
67 dict_instrument = find_entity(g, ODA.instrument, text_id_text_lower, text_id_text)
68 dict_institution = find_entity(g, ODA.institution, text_id_text_lower, text_id_text)
69 dict_radiotelescope = find_entity(g, ODA.radiotelescope, text_id_text_lower, text_id_text)
70
71 tel_sur_obs = []
72 type_key = []
73 uri_list = []
74 sens_list = []
75
76 for key in ["label", "altLabel", "hiddenLabel"]:
77 list_key = dict_institution[key]["val"] + dict_spacetelescope[key]["val"] + dict_telescope[key]["val"] + dict_survey[key]["val"] + dict_observatory[key]["val"] + dict_radiotelescope[key]["val"] + dict_instrument[key]["val"] + dict_telescopetype[key]["val"] + dict_misctelescope[key]["val"]
78 tel_sur_obs += list_key
79
80 list_uri_key = dict_institution[key]["URI"] + dict_spacetelescope[key]["URI"] + dict_telescope[key]["URI"] + dict_survey[key]["URI"] + dict_observatory[key]["URI"] + dict_radiotelescope[key]["URI"] + dict_instrument[key]["URI"] + dict_telescopetype[key]["URI"] + dict_misctelescope[key]["URI"]
81 uri_list += list_uri_key
82
83 sens_list += dict_institution[key]["Sensitivity"] + dict_spacetelescope[key]["Sensitivity"] + dict_telescope[key]["Sensitivity"] + dict_survey[key]["Sensitivity"] + dict_observatory[key]["Sensitivity"] + dict_radiotelescope[key]["Sensitivity"] + dict_instrument[key]["Sensitivity"] + dict_telescopetype[key]["Sensitivity"] + dict_misctelescope[key]["Sensitivity"]
84
85 type_key += [key]*len(list_key)
86
87 dict_data = {"TEXT_ID": [text_id] * len(tel_sur_obs), "Telescope": tel_sur_obs, "LabelType": type_key, "URI": uri_list, "Sensitivity": sens_list, "Total Sensitivity": [compute_sensitivity_int(sens_list)] * len(tel_sur_obs)}
88
89 df_data = pd.DataFrame(dict_data)
90 df_data.drop_duplicates(subset=['URI'], inplace=True)
91
92 return df_data