Mercurial > repos > astroteam > analyse_short_astro_text_astro_tool
comparison pipeline_telescope.py @ 0:a35056104c2c draft default tip
planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools commit da42ae0d18f550dec7f6d7e29d297e7cf1909df2
author | astroteam |
---|---|
date | Fri, 13 Jun 2025 13:26:36 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a35056104c2c |
---|---|
1 import pandas as pd | |
2 import re | |
3 | |
4 from rdflib import Graph, Namespace | |
5 from rdflib.namespace import RDF, RDFS, SKOS | |
6 | |
7 from aux_functions import compute_sensitivity, compute_sensitivity_int, list_tel | |
8 | |
9 ODA = Namespace("https://odahub.io/ontology#") | |
10 g_label_site = len("http://www.w3.org/2000/01/rdf-schema#label") | |
11 | |
12 | |
13 def find_entity(g, class_, text_id_text, text_id_text_upper): | |
14 dict_ = {"label": {"val": [], "URI": [], "Sensitivity": []}, "altLabel": {"val": [], "URI": [], "Sensitivity": []}, "hiddenLabel": {"val": [], "URI": [], "Sensitivity": []}} | |
15 for i, [u_telescope, p, o] in enumerate(g.triples((None, RDF.type, class_))): | |
16 | |
17 exists_label_telescope = 0 | |
18 for s, p, label_telescope in g.triples((u_telescope, RDFS.label, None)): | |
19 | |
20 result = re.search("\\b(" + label_telescope.lower() + ")([1-2]{0,1})\\b", text_id_text) | |
21 if result: | |
22 exists_label_telescope = 1 | |
23 val_ = result.group(0) | |
24 # add_ = val_[len(label_telescope):] | |
25 dict_["label"]["val"].append(label_telescope) | |
26 dict_["label"]["URI"].append(u_telescope) | |
27 dict_["label"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g))) | |
28 | |
29 exists_altlabel_telescope = 0 | |
30 if exists_label_telescope == 0: | |
31 for s, p, altlabel_telescope in g.triples((u_telescope, SKOS.altLabel, None)): | |
32 | |
33 result = re.search("\\b(" + altlabel_telescope + ")\\b", text_id_text_upper) | |
34 if result: | |
35 exists_altlabel_telescope = 1 | |
36 val_ = result.group(0) | |
37 dict_["altLabel"]["val"].append(val_) | |
38 dict_["altLabel"]["URI"].append(u_telescope) | |
39 dict_["altLabel"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g))) | |
40 | |
41 if exists_altlabel_telescope == 0: | |
42 for s, p, hiddenlabel_telescope in g.triples((u_telescope, SKOS.hiddenLabel, None)): | |
43 | |
44 result = re.search("\\b" + hiddenlabel_telescope.lower() + "\\b", text_id_text) | |
45 if result: | |
46 val_ = result.group(0) | |
47 dict_["hiddenLabel"]["val"].append(hiddenlabel_telescope) | |
48 dict_["hiddenLabel"]["URI"].append(u_telescope) | |
49 dict_["hiddenLabel"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g))) | |
50 | |
51 return dict_ | |
52 | |
53 | |
54 def rule_based_telescope_detector(text_id, text_id_text, telescope_ontology): | |
55 g = Graph() | |
56 g.parse(telescope_ontology, format="n3") | |
57 | |
58 text_id_text_lower = text_id_text.lower() | |
59 | |
60 dict_observatory = find_entity(g, ODA.observatory, text_id_text_lower, text_id_text) | |
61 dict_survey = find_entity(g, ODA.survey, text_id_text_lower, text_id_text) | |
62 dict_telescope = find_entity(g, ODA.telescope, text_id_text_lower, text_id_text) | |
63 dict_misctelescope = find_entity(g, ODA.misctelescope, text_id_text_lower, text_id_text) | |
64 dict_telescopetype = find_entity(g, ODA.telescopetype, text_id_text_lower, text_id_text) | |
65 | |
66 dict_spacetelescope = find_entity(g, ODA.spacetelescope, text_id_text_lower, text_id_text) | |
67 dict_instrument = find_entity(g, ODA.instrument, text_id_text_lower, text_id_text) | |
68 dict_institution = find_entity(g, ODA.institution, text_id_text_lower, text_id_text) | |
69 dict_radiotelescope = find_entity(g, ODA.radiotelescope, text_id_text_lower, text_id_text) | |
70 | |
71 tel_sur_obs = [] | |
72 type_key = [] | |
73 uri_list = [] | |
74 sens_list = [] | |
75 | |
76 for key in ["label", "altLabel", "hiddenLabel"]: | |
77 list_key = dict_institution[key]["val"] + dict_spacetelescope[key]["val"] + dict_telescope[key]["val"] + dict_survey[key]["val"] + dict_observatory[key]["val"] + dict_radiotelescope[key]["val"] + dict_instrument[key]["val"] + dict_telescopetype[key]["val"] + dict_misctelescope[key]["val"] | |
78 tel_sur_obs += list_key | |
79 | |
80 list_uri_key = dict_institution[key]["URI"] + dict_spacetelescope[key]["URI"] + dict_telescope[key]["URI"] + dict_survey[key]["URI"] + dict_observatory[key]["URI"] + dict_radiotelescope[key]["URI"] + dict_instrument[key]["URI"] + dict_telescopetype[key]["URI"] + dict_misctelescope[key]["URI"] | |
81 uri_list += list_uri_key | |
82 | |
83 sens_list += dict_institution[key]["Sensitivity"] + dict_spacetelescope[key]["Sensitivity"] + dict_telescope[key]["Sensitivity"] + dict_survey[key]["Sensitivity"] + dict_observatory[key]["Sensitivity"] + dict_radiotelescope[key]["Sensitivity"] + dict_instrument[key]["Sensitivity"] + dict_telescopetype[key]["Sensitivity"] + dict_misctelescope[key]["Sensitivity"] | |
84 | |
85 type_key += [key]*len(list_key) | |
86 | |
87 dict_data = {"TEXT_ID": [text_id] * len(tel_sur_obs), "Telescope": tel_sur_obs, "LabelType": type_key, "URI": uri_list, "Sensitivity": sens_list, "Total Sensitivity": [compute_sensitivity_int(sens_list)] * len(tel_sur_obs)} | |
88 | |
89 df_data = pd.DataFrame(dict_data) | |
90 df_data.drop_duplicates(subset=['URI'], inplace=True) | |
91 | |
92 return df_data |