Mercurial > repos > bimib > cobraxy
changeset 427:4a385fdb9e58 draft
Uploaded
author | francesco_lapi |
---|---|
date | Wed, 10 Sep 2025 11:38:08 +0000 |
parents | 00a78da611ba |
children | 8cd0c70b0084 |
files | COBRAxy/custom_data_generator_beta.xml COBRAxy/marea_cluster.xml COBRAxy/ras_generator_beta.py COBRAxy/utils/general_utils.py COBRAxy/utils/model_utils.py COBRAxy/utils/reaction_parsing.py |
diffstat | 6 files changed, 88 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/COBRAxy/custom_data_generator_beta.xml Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/custom_data_generator_beta.xml Wed Sep 10 11:38:08 2025 +0000 @@ -89,7 +89,7 @@ <outputs> <data name="log" format="txt" label="CustomDataGenerator - Log" /> - <data name="out_tabular" format="tabular" label="${cond_model.model_selector}_data_tabular" optional="true"/> + <data name="out_tabular" format="tabular" label="${cond_model.model_selector}_model_tabular" optional="true"/> </outputs> <help>
--- a/COBRAxy/marea_cluster.xml Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/marea_cluster.xml Wed Sep 10 11:38:08 2025 +0000 @@ -19,6 +19,7 @@ --out_log $log --best_cluster $best_cluster --cluster_type ${data.clust_type} + --scaling $scaling #if $data.clust_type == 'kmeans': --k_min ${data.k_min} --k_max ${data.k_max} @@ -40,7 +41,8 @@ </command> <inputs> <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" /> - + <param name="scaling" argument="--scaling" type="boolean" value="true" label="Apply scaling to the dataset before clustering" /> + <conditional name="data"> <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:"> <option value="kmeans" selected="true">KMeans</option>
--- a/COBRAxy/ras_generator_beta.py Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/ras_generator_beta.py Wed Sep 10 11:38:08 2025 +0000 @@ -649,30 +649,50 @@ dict_rule = {} try: + rows = utils.readCsv(datFilePath, delimiter = "\t", skipHeader=False) + if len(rows) <= 1: + raise ValueError("Model tabular with 1 column is not supported.") + + if not rows: + raise ValueError("Model tabular is file is empty.") + + id_idx, idx_gpr = utils.findIdxByName(rows[0], "GPR") + # Proviamo prima con delimitatore tab - for line in utils.readCsv(datFilePath, delimiter = "\t"): - if len(line) < 3: # Controlliamo che ci siano almeno 3 colonne + for line in rows[1:]: + if len(line) <= idx_gpr: utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) continue - if line[2] == "": - dict_rule[line[0]] = ruleUtils.OpList([""]) + if line[idx_gpr] == "": + dict_rule[line[id_idx]] = ruleUtils.OpList([""]) else: - dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2]) + dict_rule[line[id_idx]] = ruleUtils.parseRuleToNestedList(line[idx_gpr]) except Exception as e: # Se fallisce con tab, proviamo con virgola try: - dict_rule = {} - for line in utils.readCsv(datFilePath, delimiter = ","): - if len(line) < 3: + rows = utils.readCsv(datFilePath, delimiter = ",", skipHeader=False) + + if len(rows) <= 1: + raise ValueError("Model tabular with 1 column is not supported.") + + if not rows: + raise ValueError("Model tabular is file is empty.") + + id_idx, idx_gpr = utils.findIdxByName(rows[0], "GPR") + + # Proviamo prima con delimitatore tab + for line in rows[1:]: + if len(line) <= idx_gpr: utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) continue - if line[2] == "": - dict_rule[line[0]] = ruleUtils.OpList([""]) + if line[idx_gpr] == "": + dict_rule[line[id_idx]] = ruleUtils.OpList([""]) else: - dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2]) + dict_rule[line[id_idx]] = ruleUtils.parseRuleToNestedList(line[idx_gpr]) + except Exception as e2: raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}")
--- a/COBRAxy/utils/general_utils.py Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/utils/general_utils.py Wed Sep 10 11:38:08 2025 +0000 @@ -505,6 +505,39 @@ """ with open(path.show(), "r", newline = "") as fd: return list(csv.reader(fd, delimiter = delimiter))[skipHeader:] +def findIdxByName(header: List[str], name: str, colName="name") -> Optional[int]: + """ + Find the indices of the 'ReactionID' column and a user-specified column name + within the header row of a tabular file. + + Args: + header (List[str]): The header row, as a list of column names. + name (str): The name of the column to look for (e.g. 'GPR'). + colName (str, optional): Label used in error messages for clarity. Defaults to "name". + + Returns: + Tuple[int, int]: A tuple containing: + - The index of the 'ReactionID' column. + - The index of the requested column `name`. + + Raises: + ValueError: If 'ReactionID' or the requested column `name` is not found in the header. + + Notes: + Both 'ReactionID' and the requested column are mandatory for downstream processing. + """ + + col_index = {col_name: idx for idx, col_name in enumerate(header)} + + if name not in col_index or "ReactionID" not in col_index: + raise ValueError(f"Tabular file must contain 'ReactionID' and {name} columns.") + + id_idx = col_index["ReactionID"] + idx_gpr = col_index[name] + + return id_idx, idx_gpr + + def readSvg(path :FilePath, customErr :Optional[Exception] = None) -> ET.ElementTree: """ Reads the contents of a .svg file, which needs to exist at the given path.
--- a/COBRAxy/utils/model_utils.py Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/utils/model_utils.py Wed Sep 10 11:38:08 2025 +0000 @@ -221,7 +221,7 @@ for idx, row in df.iterrows(): reaction_id = str(row['ReactionID']).strip() - reaction_formula = str(row['Reaction']).strip() + reaction_formula = str(row['Formula']).strip() # Salta reazioni senza formula if not reaction_formula or reaction_formula == 'nan': @@ -236,8 +236,8 @@ reaction.upper_bound = float(row['upper_bound']) if pd.notna(row['upper_bound']) else 1000.0 # Aggiungi gene rule se presente - if pd.notna(row['Rule']) and str(row['Rule']).strip(): - reaction.gene_reaction_rule = str(row['Rule']).strip() + if pd.notna(row['GPR']) and str(row['GPR']).strip(): + reaction.gene_reaction_rule = str(row['GPR']).strip() # Parse della formula della reazione try:
--- a/COBRAxy/utils/reaction_parsing.py Wed Sep 10 09:25:32 2025 +0000 +++ b/COBRAxy/utils/reaction_parsing.py Wed Sep 10 11:38:08 2025 +0000 @@ -124,6 +124,22 @@ Returns: ReactionsDict : dictionary encoding custom reactions information. """ - reactionsData :Dict[str, str] = {row[0]: row[1] for row in utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t")} + try: + rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) + if len(rows) <= 1: + raise ValueError("The custom reactions file must contain at least one reaction.") + + id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") + + except Exception as e: + + rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False) + if len(rows) <= 1: + raise ValueError("The custom reactions file must contain at least one reaction.") + + id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula") + + reactionsData = {row[id_idx] : row[idx_formula] for row in rows[1:]} + return create_reaction_dict(reactionsData)