diff pipeline_telescope.py @ 0:a35056104c2c draft default tip

planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools commit da42ae0d18f550dec7f6d7e29d297e7cf1909df2
author astroteam
date Fri, 13 Jun 2025 13:26:36 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pipeline_telescope.py	Fri Jun 13 13:26:36 2025 +0000
@@ -0,0 +1,92 @@
+import pandas as pd
+import re
+
+from rdflib import Graph, Namespace
+from rdflib.namespace import RDF, RDFS, SKOS
+
+from aux_functions import compute_sensitivity, compute_sensitivity_int, list_tel
+
+ODA = Namespace("https://odahub.io/ontology#")
+g_label_site = len("http://www.w3.org/2000/01/rdf-schema#label")
+
+
+def find_entity(g, class_, text_id_text, text_id_text_upper):
+    dict_ = {"label": {"val": [], "URI": [], "Sensitivity": []}, "altLabel": {"val": [], "URI": [], "Sensitivity": []}, "hiddenLabel": {"val": [], "URI": [], "Sensitivity": []}}
+    for i, [u_telescope, p, o] in enumerate(g.triples((None, RDF.type, class_))):
+
+        exists_label_telescope = 0
+        for s, p, label_telescope in g.triples((u_telescope, RDFS.label, None)):
+
+            result = re.search("\\b(" + label_telescope.lower() + ")([1-2]{0,1})\\b", text_id_text)
+            if result:
+                exists_label_telescope = 1
+                val_ = result.group(0)
+                # add_ = val_[len(label_telescope):]
+                dict_["label"]["val"].append(label_telescope)
+                dict_["label"]["URI"].append(u_telescope)
+                dict_["label"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g)))
+
+        exists_altlabel_telescope = 0
+        if exists_label_telescope == 0:
+            for s, p, altlabel_telescope in g.triples((u_telescope, SKOS.altLabel, None)):
+
+                result = re.search("\\b(" + altlabel_telescope + ")\\b", text_id_text_upper)
+                if result:
+                    exists_altlabel_telescope = 1
+                    val_ = result.group(0)
+                    dict_["altLabel"]["val"].append(val_)
+                    dict_["altLabel"]["URI"].append(u_telescope)
+                    dict_["altLabel"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g)))
+
+            if exists_altlabel_telescope == 0:
+                for s, p, hiddenlabel_telescope in g.triples((u_telescope, SKOS.hiddenLabel, None)):
+
+                    result = re.search("\\b" + hiddenlabel_telescope.lower() + "\\b", text_id_text)
+                    if result:
+                        val_ = result.group(0)
+                        dict_["hiddenLabel"]["val"].append(hiddenlabel_telescope)
+                        dict_["hiddenLabel"]["URI"].append(u_telescope)
+                        dict_["hiddenLabel"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g)))
+
+    return dict_
+
+
+def rule_based_telescope_detector(text_id, text_id_text, telescope_ontology):
+    g = Graph()
+    g.parse(telescope_ontology, format="n3")
+
+    text_id_text_lower = text_id_text.lower()
+
+    dict_observatory = find_entity(g, ODA.observatory,     text_id_text_lower, text_id_text)
+    dict_survey = find_entity(g, ODA.survey,               text_id_text_lower, text_id_text)
+    dict_telescope = find_entity(g, ODA.telescope,         text_id_text_lower, text_id_text)
+    dict_misctelescope = find_entity(g, ODA.misctelescope, text_id_text_lower, text_id_text)
+    dict_telescopetype = find_entity(g, ODA.telescopetype, text_id_text_lower, text_id_text)
+
+    dict_spacetelescope = find_entity(g, ODA.spacetelescope, text_id_text_lower, text_id_text)
+    dict_instrument = find_entity(g, ODA.instrument,         text_id_text_lower, text_id_text)
+    dict_institution = find_entity(g, ODA.institution,       text_id_text_lower, text_id_text)
+    dict_radiotelescope = find_entity(g, ODA.radiotelescope, text_id_text_lower, text_id_text)
+
+    tel_sur_obs = []
+    type_key = []
+    uri_list = []
+    sens_list = []
+
+    for key in ["label", "altLabel", "hiddenLabel"]:
+        list_key = dict_institution[key]["val"] + dict_spacetelescope[key]["val"] + dict_telescope[key]["val"] + dict_survey[key]["val"] + dict_observatory[key]["val"] + dict_radiotelescope[key]["val"] + dict_instrument[key]["val"] + dict_telescopetype[key]["val"] + dict_misctelescope[key]["val"]
+        tel_sur_obs += list_key
+
+        list_uri_key = dict_institution[key]["URI"] + dict_spacetelescope[key]["URI"] + dict_telescope[key]["URI"] + dict_survey[key]["URI"] + dict_observatory[key]["URI"] + dict_radiotelescope[key]["URI"] + dict_instrument[key]["URI"] + dict_telescopetype[key]["URI"] + dict_misctelescope[key]["URI"]
+        uri_list += list_uri_key
+
+        sens_list += dict_institution[key]["Sensitivity"] + dict_spacetelescope[key]["Sensitivity"] + dict_telescope[key]["Sensitivity"] + dict_survey[key]["Sensitivity"] + dict_observatory[key]["Sensitivity"] + dict_radiotelescope[key]["Sensitivity"] + dict_instrument[key]["Sensitivity"] + dict_telescopetype[key]["Sensitivity"] + dict_misctelescope[key]["Sensitivity"]
+
+        type_key += [key]*len(list_key)
+
+    dict_data = {"TEXT_ID": [text_id] * len(tel_sur_obs), "Telescope": tel_sur_obs, "LabelType": type_key, "URI": uri_list, "Sensitivity": sens_list, "Total Sensitivity": [compute_sensitivity_int(sens_list)] * len(tel_sur_obs)}
+
+    df_data = pd.DataFrame(dict_data)
+    df_data.drop_duplicates(subset=['URI'], inplace=True)
+
+    return df_data