changeset 427:4a385fdb9e58 draft

Uploaded
author francesco_lapi
date Wed, 10 Sep 2025 11:38:08 +0000
parents 00a78da611ba
children 8cd0c70b0084
files COBRAxy/custom_data_generator_beta.xml COBRAxy/marea_cluster.xml COBRAxy/ras_generator_beta.py COBRAxy/utils/general_utils.py COBRAxy/utils/model_utils.py COBRAxy/utils/reaction_parsing.py
diffstat 6 files changed, 88 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/COBRAxy/custom_data_generator_beta.xml	Wed Sep 10 09:25:32 2025 +0000
+++ b/COBRAxy/custom_data_generator_beta.xml	Wed Sep 10 11:38:08 2025 +0000
@@ -89,7 +89,7 @@
 
     <outputs>
         <data name="log" format="txt" label="CustomDataGenerator - Log" />
-        <data name="out_tabular" format="tabular" label="${cond_model.model_selector}_data_tabular" optional="true"/>
+        <data name="out_tabular" format="tabular" label="${cond_model.model_selector}_model_tabular" optional="true"/>
     </outputs>
 
     <help>
--- a/COBRAxy/marea_cluster.xml	Wed Sep 10 09:25:32 2025 +0000
+++ b/COBRAxy/marea_cluster.xml	Wed Sep 10 11:38:08 2025 +0000
@@ -19,6 +19,7 @@
         --out_log $log
         --best_cluster $best_cluster
         --cluster_type ${data.clust_type}
+		--scaling $scaling
         #if $data.clust_type == 'kmeans':
         	--k_min ${data.k_min}
         	--k_max ${data.k_max}
@@ -40,7 +41,8 @@
     </command>
     <inputs>
         <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
-        
+		<param name="scaling" argument="--scaling" type="boolean" value="true" label="Apply scaling to the dataset before clustering" />
+    
         <conditional name="data">
 			<param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
                 	<option value="kmeans" selected="true">KMeans</option>
--- a/COBRAxy/ras_generator_beta.py	Wed Sep 10 09:25:32 2025 +0000
+++ b/COBRAxy/ras_generator_beta.py	Wed Sep 10 11:38:08 2025 +0000
@@ -649,30 +649,50 @@
     dict_rule = {}
 
     try:
+        rows = utils.readCsv(datFilePath, delimiter = "\t", skipHeader=False)
+        if len(rows) <= 1:
+            raise ValueError("Model tabular with 1 column is not supported.")
+
+        if not rows:
+            raise ValueError("Model tabular is file is empty.")
+        
+        id_idx, idx_gpr = utils.findIdxByName(rows[0], "GPR")
+        
         # Proviamo prima con delimitatore tab
-        for line in utils.readCsv(datFilePath, delimiter = "\t"):
-            if len(line) < 3:  # Controlliamo che ci siano almeno 3 colonne
+        for line in rows[1:]:
+            if len(line) <= idx_gpr:
                 utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
                 continue
             
-            if line[2] == "":
-                dict_rule[line[0]] = ruleUtils.OpList([""])
+            if line[idx_gpr] == "":
+                dict_rule[line[id_idx]] = ruleUtils.OpList([""])
             else:
-                dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
+                dict_rule[line[id_idx]] = ruleUtils.parseRuleToNestedList(line[idx_gpr])
                 
     except Exception as e:
         # Se fallisce con tab, proviamo con virgola
         try:
-            dict_rule = {}
-            for line in utils.readCsv(datFilePath, delimiter = ","):
-                if len(line) < 3:
+            rows = utils.readCsv(datFilePath, delimiter = ",", skipHeader=False)
+            
+            if len(rows) <= 1:
+                raise ValueError("Model tabular with 1 column is not supported.")
+
+            if not rows:
+                raise ValueError("Model tabular is file is empty.")
+            
+            id_idx, idx_gpr = utils.findIdxByName(rows[0], "GPR")
+            
+            # Proviamo prima con delimitatore tab
+            for line in rows[1:]:
+                if len(line) <= idx_gpr:
                     utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log)
                     continue
                 
-                if line[2] == "":
-                    dict_rule[line[0]] = ruleUtils.OpList([""])
+                if line[idx_gpr] == "":
+                    dict_rule[line[id_idx]] = ruleUtils.OpList([""])
                 else:
-                    dict_rule[line[0]] = ruleUtils.parseRuleToNestedList(line[2])
+                    dict_rule[line[id_idx]] = ruleUtils.parseRuleToNestedList(line[idx_gpr])
+                    
         except Exception as e2:
             raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}")
 
--- a/COBRAxy/utils/general_utils.py	Wed Sep 10 09:25:32 2025 +0000
+++ b/COBRAxy/utils/general_utils.py	Wed Sep 10 11:38:08 2025 +0000
@@ -505,6 +505,39 @@
     """
     with open(path.show(), "r", newline = "") as fd: return list(csv.reader(fd, delimiter = delimiter))[skipHeader:]
 
+def findIdxByName(header: List[str], name: str, colName="name") -> Optional[int]:
+    """
+    Find the indices of the 'ReactionID' column and a user-specified column name
+    within the header row of a tabular file.
+
+    Args:
+        header (List[str]): The header row, as a list of column names.
+        name (str): The name of the column to look for (e.g. 'GPR').
+        colName (str, optional): Label used in error messages for clarity. Defaults to "name".
+
+    Returns:
+        Tuple[int, int]: A tuple containing:
+            - The index of the 'ReactionID' column.
+            - The index of the requested column `name`.
+
+    Raises:
+        ValueError: If 'ReactionID' or the requested column `name` is not found in the header.
+
+    Notes:
+        Both 'ReactionID' and the requested column are mandatory for downstream processing.
+    """
+
+    col_index = {col_name: idx for idx, col_name in enumerate(header)}
+
+    if name not in col_index or "ReactionID" not in col_index:
+        raise ValueError(f"Tabular file must contain 'ReactionID' and {name} columns.")
+
+    id_idx = col_index["ReactionID"]
+    idx_gpr = col_index[name]
+
+    return id_idx, idx_gpr
+
+
 def readSvg(path :FilePath, customErr :Optional[Exception] = None) -> ET.ElementTree:
     """
     Reads the contents of a .svg file, which needs to exist at the given path.
--- a/COBRAxy/utils/model_utils.py	Wed Sep 10 09:25:32 2025 +0000
+++ b/COBRAxy/utils/model_utils.py	Wed Sep 10 11:38:08 2025 +0000
@@ -221,7 +221,7 @@
     for idx, row in df.iterrows():
 
         reaction_id = str(row['ReactionID']).strip()
-        reaction_formula = str(row['Reaction']).strip()
+        reaction_formula = str(row['Formula']).strip()
         
         # Salta reazioni senza formula
         if not reaction_formula or reaction_formula == 'nan':
@@ -236,8 +236,8 @@
         reaction.upper_bound = float(row['upper_bound']) if pd.notna(row['upper_bound']) else 1000.0
         
         # Aggiungi gene rule se presente
-        if pd.notna(row['Rule']) and str(row['Rule']).strip():
-            reaction.gene_reaction_rule = str(row['Rule']).strip()
+        if pd.notna(row['GPR']) and str(row['GPR']).strip():
+            reaction.gene_reaction_rule = str(row['GPR']).strip()
         
         # Parse della formula della reazione
         try:
--- a/COBRAxy/utils/reaction_parsing.py	Wed Sep 10 09:25:32 2025 +0000
+++ b/COBRAxy/utils/reaction_parsing.py	Wed Sep 10 11:38:08 2025 +0000
@@ -124,6 +124,22 @@
   Returns:
     ReactionsDict : dictionary encoding custom reactions information.
   """
-  reactionsData :Dict[str, str] = {row[0]: row[1] for row in utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t")} 
+  try:
+    rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
+    if len(rows) <= 1:
+      raise ValueError("The custom reactions file must contain at least one reaction.")
+
+    id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
+
+  except Exception as e:
+        
+    rows = utils.readCsv(utils.FilePath.fromStrPath(customReactionsPath), delimiter = "\t", skipHeader=False)
+    if len(rows) <= 1:
+      raise ValueError("The custom reactions file must contain at least one reaction.")
+    
+    id_idx, idx_formula = utils.findIdxByName(rows[0], "Formula")
+  
+  reactionsData = {row[id_idx] : row[idx_formula] for row in rows[1:]}
+  
   return create_reaction_dict(reactionsData)