Mercurial > repos > astroteam > analyse_short_astro_text_astro_tool
view pipeline_telescope.py @ 0:a35056104c2c draft default tip
planemo upload for repository https://github.com/esg-epfl-apc/tools-astro/tree/main/tools commit da42ae0d18f550dec7f6d7e29d297e7cf1909df2
author | astroteam |
---|---|
date | Fri, 13 Jun 2025 13:26:36 +0000 |
parents | |
children |
line wrap: on
line source
import pandas as pd import re from rdflib import Graph, Namespace from rdflib.namespace import RDF, RDFS, SKOS from aux_functions import compute_sensitivity, compute_sensitivity_int, list_tel ODA = Namespace("https://odahub.io/ontology#") g_label_site = len("http://www.w3.org/2000/01/rdf-schema#label") def find_entity(g, class_, text_id_text, text_id_text_upper): dict_ = {"label": {"val": [], "URI": [], "Sensitivity": []}, "altLabel": {"val": [], "URI": [], "Sensitivity": []}, "hiddenLabel": {"val": [], "URI": [], "Sensitivity": []}} for i, [u_telescope, p, o] in enumerate(g.triples((None, RDF.type, class_))): exists_label_telescope = 0 for s, p, label_telescope in g.triples((u_telescope, RDFS.label, None)): result = re.search("\\b(" + label_telescope.lower() + ")([1-2]{0,1})\\b", text_id_text) if result: exists_label_telescope = 1 val_ = result.group(0) # add_ = val_[len(label_telescope):] dict_["label"]["val"].append(label_telescope) dict_["label"]["URI"].append(u_telescope) dict_["label"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g))) exists_altlabel_telescope = 0 if exists_label_telescope == 0: for s, p, altlabel_telescope in g.triples((u_telescope, SKOS.altLabel, None)): result = re.search("\\b(" + altlabel_telescope + ")\\b", text_id_text_upper) if result: exists_altlabel_telescope = 1 val_ = result.group(0) dict_["altLabel"]["val"].append(val_) dict_["altLabel"]["URI"].append(u_telescope) dict_["altLabel"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g))) if exists_altlabel_telescope == 0: for s, p, hiddenlabel_telescope in g.triples((u_telescope, SKOS.hiddenLabel, None)): result = re.search("\\b" + hiddenlabel_telescope.lower() + "\\b", text_id_text) if result: val_ = result.group(0) dict_["hiddenLabel"]["val"].append(hiddenlabel_telescope) dict_["hiddenLabel"]["URI"].append(u_telescope) dict_["hiddenLabel"]["Sensitivity"].append(compute_sensitivity(list_tel(u_telescope, g))) return dict_ def rule_based_telescope_detector(text_id, text_id_text, telescope_ontology): g = Graph() g.parse(telescope_ontology, format="n3") text_id_text_lower = text_id_text.lower() dict_observatory = find_entity(g, ODA.observatory, text_id_text_lower, text_id_text) dict_survey = find_entity(g, ODA.survey, text_id_text_lower, text_id_text) dict_telescope = find_entity(g, ODA.telescope, text_id_text_lower, text_id_text) dict_misctelescope = find_entity(g, ODA.misctelescope, text_id_text_lower, text_id_text) dict_telescopetype = find_entity(g, ODA.telescopetype, text_id_text_lower, text_id_text) dict_spacetelescope = find_entity(g, ODA.spacetelescope, text_id_text_lower, text_id_text) dict_instrument = find_entity(g, ODA.instrument, text_id_text_lower, text_id_text) dict_institution = find_entity(g, ODA.institution, text_id_text_lower, text_id_text) dict_radiotelescope = find_entity(g, ODA.radiotelescope, text_id_text_lower, text_id_text) tel_sur_obs = [] type_key = [] uri_list = [] sens_list = [] for key in ["label", "altLabel", "hiddenLabel"]: list_key = dict_institution[key]["val"] + dict_spacetelescope[key]["val"] + dict_telescope[key]["val"] + dict_survey[key]["val"] + dict_observatory[key]["val"] + dict_radiotelescope[key]["val"] + dict_instrument[key]["val"] + dict_telescopetype[key]["val"] + dict_misctelescope[key]["val"] tel_sur_obs += list_key list_uri_key = dict_institution[key]["URI"] + dict_spacetelescope[key]["URI"] + dict_telescope[key]["URI"] + dict_survey[key]["URI"] + dict_observatory[key]["URI"] + dict_radiotelescope[key]["URI"] + dict_instrument[key]["URI"] + dict_telescopetype[key]["URI"] + dict_misctelescope[key]["URI"] uri_list += list_uri_key sens_list += dict_institution[key]["Sensitivity"] + dict_spacetelescope[key]["Sensitivity"] + dict_telescope[key]["Sensitivity"] + dict_survey[key]["Sensitivity"] + dict_observatory[key]["Sensitivity"] + dict_radiotelescope[key]["Sensitivity"] + dict_instrument[key]["Sensitivity"] + dict_telescopetype[key]["Sensitivity"] + dict_misctelescope[key]["Sensitivity"] type_key += [key]*len(list_key) dict_data = {"TEXT_ID": [text_id] * len(tel_sur_obs), "Telescope": tel_sur_obs, "LabelType": type_key, "URI": uri_list, "Sensitivity": sens_list, "Total Sensitivity": [compute_sensitivity_int(sens_list)] * len(tel_sur_obs)} df_data = pd.DataFrame(dict_data) df_data.drop_duplicates(subset=['URI'], inplace=True) return df_data