comparison COBRAxy/ras_to_bounds_beta.py @ 411:6b015d3184ab draft

Uploaded
author francesco_lapi
date Mon, 08 Sep 2025 21:07:34 +0000
parents f413b78d61bf
children 5086145cfb96
comparison
equal deleted inserted replaced
410:d660c5b03c14 411:6b015d3184ab
28 description = 'process some value\'s') 28 description = 'process some value\'s')
29 29
30 30
31 parser.add_argument("-mo", "--model_upload", type = str, 31 parser.add_argument("-mo", "--model_upload", type = str,
32 help = "path to input file with custom rules, if provided") 32 help = "path to input file with custom rules, if provided")
33
34 parser.add_argument("-meo", "--medium", type = str,
35 help = "path to input file with custom medium, if provided")
36 33
37 parser.add_argument('-ol', '--out_log', 34 parser.add_argument('-ol', '--out_log',
38 help = "Output log") 35 help = "Output log")
39 36
40 parser.add_argument('-td', '--tool_dir', 37 parser.add_argument('-td', '--tool_dir',
63 '-idop', '--output_path', 60 '-idop', '--output_path',
64 type = str, 61 type = str,
65 default='ras_to_bounds/', 62 default='ras_to_bounds/',
66 help = 'output path for maps') 63 help = 'output path for maps')
67 64
65 parser.add_argument('-sm', '--save_models',
66 type=utils.Bool("save_models"),
67 default=False,
68 help = 'whether to save models with applied bounds')
69
70 parser.add_argument('-smp', '--save_models_path',
71 type = str,
72 default='saved_models/',
73 help = 'output path for saved models')
74
75 parser.add_argument('-smf', '--save_models_format',
76 type = str,
77 default='csv',
78 help = 'format for saved models (csv, xml, json, mat, yaml, tabular)')
79
68 80
69 ARGS = parser.parse_args(args) 81 ARGS = parser.parse_args(args)
70 return ARGS 82 return ARGS
71 83
72 ########################### warning ########################################### 84 ########################### warning ###########################################
78 s (str): The warning message to be logged and printed. 90 s (str): The warning message to be logged and printed.
79 91
80 Returns: 92 Returns:
81 None 93 None
82 """ 94 """
83 with open(ARGS.out_log, 'a') as log: 95 if ARGS.out_log:
84 log.write(s + "\n\n") 96 with open(ARGS.out_log, 'a') as log:
97 log.write(s + "\n\n")
85 print(s) 98 print(s)
86 99
87 ############################ dataset input #################################### 100 ############################ dataset input ####################################
88 def read_dataset(data :str, name :str) -> pd.DataFrame: 101 def read_dataset(data :str, name :str) -> pd.DataFrame:
89 """ 102 """
134 if upper_bound!=0 and lower_bound!=0: 147 if upper_bound!=0 and lower_bound!=0:
135 new_bounds.loc[reaction, "lower_bound"] = valMin 148 new_bounds.loc[reaction, "lower_bound"] = valMin
136 new_bounds.loc[reaction, "upper_bound"] = valMax 149 new_bounds.loc[reaction, "upper_bound"] = valMax
137 return new_bounds 150 return new_bounds
138 151
139 def process_ras_cell(cellName, ras_row, model, rxns_ids, output_folder): 152 def save_model(model, filename, output_folder, file_format='csv'):
153 """
154 Save a COBRA model to file in the specified format.
155
156 Args:
157 model (cobra.Model): The model to save.
158 filename (str): Base filename (without extension).
159 output_folder (str): Output directory.
160 file_format (str): File format ('xml', 'json', 'mat', 'yaml', 'tabular', 'csv').
161
162 Returns:
163 None
164 """
165 if not os.path.exists(output_folder):
166 os.makedirs(output_folder)
167
168 try:
169 if file_format == 'tabular' or file_format == 'csv':
170 # Special handling for tabular format using utils functions
171 filepath = os.path.join(output_folder, f"{filename}.csv")
172
173 rules = utils.generate_rules(model, asParsed = False)
174 reactions = utils.generate_reactions(model, asParsed = False)
175 bounds = utils.generate_bounds(model)
176 medium = utils.get_medium(model)
177
178 try:
179 compartments = utils.generate_compartments(model)
180 except:
181 compartments = None
182
183 df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"])
184 df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"])
185 df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"})
186 df_medium = medium.rename(columns = {"reaction": "ReactionID"})
187 df_medium["InMedium"] = True # flag per indicare la presenza nel medium
188
189 merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer")
190 merged = merged.merge(df_bounds, on = "ReactionID", how = "outer")
191
192 # Add compartments only if they exist and model name is ENGRO2
193 if compartments is not None and hasattr(ARGS, 'name') and ARGS.name == "ENGRO2":
194 merged = merged.merge(compartments, on = "ReactionID", how = "outer")
195
196 merged = merged.merge(df_medium, on = "ReactionID", how = "left")
197 merged["InMedium"] = merged["InMedium"].fillna(False)
198 merged = merged.sort_values(by = "InMedium", ascending = False)
199
200 merged.to_csv(filepath, sep="\t", index=False)
201
202 else:
203 # Standard COBRA formats
204 filepath = os.path.join(output_folder, f"{filename}.{file_format}")
205
206 if file_format == 'xml':
207 cobra.io.write_sbml_model(model, filepath)
208 elif file_format == 'json':
209 cobra.io.save_json_model(model, filepath)
210 elif file_format == 'mat':
211 cobra.io.save_matlab_model(model, filepath)
212 elif file_format == 'yaml':
213 cobra.io.save_yaml_model(model, filepath)
214 else:
215 raise ValueError(f"Unsupported format: {file_format}")
216
217 print(f"Model saved: {filepath}")
218
219 except Exception as e:
220 warning(f"Error saving model {filename}: {str(e)}")
221
222 def apply_bounds_to_model(model, bounds):
223 """
224 Apply bounds from a DataFrame to a COBRA model.
225
226 Args:
227 model (cobra.Model): The metabolic model to modify.
228 bounds (pd.DataFrame): DataFrame with reaction bounds.
229
230 Returns:
231 cobra.Model: Modified model with new bounds.
232 """
233 model_copy = model.copy()
234 for reaction_id in bounds.index:
235 try:
236 reaction = model_copy.reactions.get_by_id(reaction_id)
237 reaction.lower_bound = bounds.loc[reaction_id, "lower_bound"]
238 reaction.upper_bound = bounds.loc[reaction_id, "upper_bound"]
239 except KeyError:
240 # Reaction not found in model, skip
241 continue
242 return model_copy
243
244 def process_ras_cell(cellName, ras_row, model, rxns_ids, output_folder, save_models=False, save_models_path='saved_models/', save_models_format='csv'):
140 """ 245 """
141 Process a single RAS cell, apply bounds, and save the bounds to a CSV file. 246 Process a single RAS cell, apply bounds, and save the bounds to a CSV file.
142 247
143 Args: 248 Args:
144 cellName (str): The name of the RAS cell (used for naming the output file). 249 cellName (str): The name of the RAS cell (used for naming the output file).
145 ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds. 250 ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds.
146 model (cobra.Model): The metabolic model to be modified. 251 model (cobra.Model): The metabolic model to be modified.
147 rxns_ids (list of str): List of reaction IDs to which the scaling factors will be applied. 252 rxns_ids (list of str): List of reaction IDs to which the scaling factors will be applied.
148 output_folder (str): Folder path where the output CSV file will be saved. 253 output_folder (str): Folder path where the output CSV file will be saved.
254 save_models (bool): Whether to save models with applied bounds.
255 save_models_path (str): Path where to save models.
256 save_models_format (str): Format for saved models.
149 257
150 Returns: 258 Returns:
151 None 259 None
152 """ 260 """
153 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) 261 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"])
154 new_bounds = apply_ras_bounds(bounds, ras_row) 262 new_bounds = apply_ras_bounds(bounds, ras_row)
155 new_bounds.to_csv(output_folder + cellName + ".csv", sep='\t', index=True) 263 new_bounds.to_csv(output_folder + cellName + ".csv", sep='\t', index=True)
264
265 # Save model if requested
266 if save_models:
267 modified_model = apply_bounds_to_model(model, new_bounds)
268 save_model(modified_model, cellName, save_models_path, save_models_format)
269
156 pass 270 pass
157 271
158 def generate_bounds(model: cobra.Model, ras=None, output_folder='output/') -> pd.DataFrame: 272 def generate_bounds(model: cobra.Model, ras=None, output_folder='output/', save_models=False, save_models_path='saved_models/', save_models_format='csv') -> pd.DataFrame:
159 """ 273 """
160 Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments. 274 Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments.
161 275
162 Args: 276 Args:
163 model (cobra.Model): The metabolic model for which bounds will be generated. 277 model (cobra.Model): The metabolic model for which bounds will be generated.
164 medium (dict): A dictionary where keys are reaction IDs and values are the medium conditions.
165 ras (pd.DataFrame, optional): RAS pandas dataframe. Defaults to None. 278 ras (pd.DataFrame, optional): RAS pandas dataframe. Defaults to None.
166 output_folder (str, optional): Folder path where output CSV files will be saved. Defaults to 'output/'. 279 output_folder (str, optional): Folder path where output CSV files will be saved. Defaults to 'output/'.
280 save_models (bool): Whether to save models with applied bounds.
281 save_models_path (str): Path where to save models.
282 save_models_format (str): Format for saved models.
167 283
168 Returns: 284 Returns:
169 pd.DataFrame: DataFrame containing the bounds of reactions in the model. 285 pd.DataFrame: DataFrame containing the bounds of reactions in the model.
170 """ 286 """
171 rxns_ids = [rxn.id for rxn in model.reactions] 287 rxns_ids = [rxn.id for rxn in model.reactions]
177 for reaction in rxns_ids: 293 for reaction in rxns_ids:
178 model.reactions.get_by_id(reaction).lower_bound = float(df_FVA.loc[reaction, "minimum"]) 294 model.reactions.get_by_id(reaction).lower_bound = float(df_FVA.loc[reaction, "minimum"])
179 model.reactions.get_by_id(reaction).upper_bound = float(df_FVA.loc[reaction, "maximum"]) 295 model.reactions.get_by_id(reaction).upper_bound = float(df_FVA.loc[reaction, "maximum"])
180 296
181 if ras is not None: 297 if ras is not None:
182 Parallel(n_jobs=cpu_count())(delayed(process_ras_cell)(cellName, ras_row, model, rxns_ids, output_folder) for cellName, ras_row in ras.iterrows()) 298 Parallel(n_jobs=cpu_count())(delayed(process_ras_cell)(
299 cellName, ras_row, model, rxns_ids, output_folder,
300 save_models, save_models_path, save_models_format
301 ) for cellName, ras_row in ras.iterrows())
183 else: 302 else:
184 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) 303 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"])
185 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) 304 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids))
186 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) 305 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True)
306
307 # Save model if requested
308 if save_models:
309 modified_model = apply_bounds_to_model(model, newBounds)
310 save_model(modified_model, "model_with_bounds", save_models_path, save_models_format)
311
187 pass 312 pass
188 313
189 ############################# main ########################################### 314 ############################# main ###########################################
190 def main(args:List[str] = None) -> None: 315 def main(args:List[str] = None) -> None:
191 """ 316 """
194 Returns: 319 Returns:
195 None 320 None
196 """ 321 """
197 if not os.path.exists('ras_to_bounds'): 322 if not os.path.exists('ras_to_bounds'):
198 os.makedirs('ras_to_bounds') 323 os.makedirs('ras_to_bounds')
199
200 324
201 global ARGS 325 global ARGS
202 ARGS = process_args(args) 326 ARGS = process_args(args)
203 327
204 if(ARGS.ras_selector == True): 328 if(ARGS.ras_selector == True):
234 ras_combined = pd.concat(ras_list, axis=0) 358 ras_combined = pd.concat(ras_list, axis=0)
235 # Normalize the RAS values by max RAS 359 # Normalize the RAS values by max RAS
236 ras_combined = ras_combined.div(ras_combined.max(axis=0)) 360 ras_combined = ras_combined.div(ras_combined.max(axis=0))
237 ras_combined.dropna(axis=1, how='all', inplace=True) 361 ras_combined.dropna(axis=1, how='all', inplace=True)
238 362
239
240
241 #model_type :utils.Model = ARGS.model_selector
242 #if model_type is utils.Model.Custom:
243 # model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext)
244 #else:
245 # model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir)
246
247 # TODO LOAD MODEL FROM UPLOAD
248
249 model = utils.build_cobra_model_from_csv(ARGS.model_upload) 363 model = utils.build_cobra_model_from_csv(ARGS.model_upload)
250 364
251 validation = utils.validate_model(model) 365 validation = utils.validate_model(model)
252 366
253 print("\n=== VALIDAZIONE MODELLO ===") 367 print("\n=== VALIDAZIONE MODELLO ===")
254 for key, value in validation.items(): 368 for key, value in validation.items():
255 print(f"{key}: {value}") 369 print(f"{key}: {value}")
256 370
257 #if(ARGS.medium_selector == "Custom"):
258 # medium = read_dataset(ARGS.medium, "medium dataset")
259 # medium.set_index(medium.columns[0], inplace=True)
260 # medium = medium.astype(float)
261 # medium = medium[medium.columns[0]].to_dict()
262 #else:
263 # df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0)
264 # ARGS.medium_selector = ARGS.medium_selector.replace("_", " ")
265 # medium = df_mediums[[ARGS.medium_selector]]
266 # medium = medium[ARGS.medium_selector].to_dict()
267
268 if(ARGS.ras_selector == True): 371 if(ARGS.ras_selector == True):
269 generate_bounds(model, ras = ras_combined, output_folder=ARGS.output_path) 372 generate_bounds(model, ras=ras_combined, output_folder=ARGS.output_path,
270 class_assignments.to_csv(ARGS.cell_class, sep = '\t', index = False) 373 save_models=ARGS.save_models, save_models_path=ARGS.save_models_path,
374 save_models_format=ARGS.save_models_format)
375 class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False)
271 else: 376 else:
272 generate_bounds(model, output_folder=ARGS.output_path) 377 generate_bounds(model, output_folder=ARGS.output_path,
378 save_models=ARGS.save_models, save_models_path=ARGS.save_models_path,
379 save_models_format=ARGS.save_models_format)
273 380
274 pass 381 pass
275 382
276 ############################################################################## 383 ##############################################################################
277 if __name__ == "__main__": 384 if __name__ == "__main__":