Mercurial > repos > bimib > cobraxy
comparison COBRAxy/ras_to_bounds_beta.py @ 411:6b015d3184ab draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Mon, 08 Sep 2025 21:07:34 +0000 |
| parents | f413b78d61bf |
| children | 5086145cfb96 |
comparison
equal
deleted
inserted
replaced
| 410:d660c5b03c14 | 411:6b015d3184ab |
|---|---|
| 28 description = 'process some value\'s') | 28 description = 'process some value\'s') |
| 29 | 29 |
| 30 | 30 |
| 31 parser.add_argument("-mo", "--model_upload", type = str, | 31 parser.add_argument("-mo", "--model_upload", type = str, |
| 32 help = "path to input file with custom rules, if provided") | 32 help = "path to input file with custom rules, if provided") |
| 33 | |
| 34 parser.add_argument("-meo", "--medium", type = str, | |
| 35 help = "path to input file with custom medium, if provided") | |
| 36 | 33 |
| 37 parser.add_argument('-ol', '--out_log', | 34 parser.add_argument('-ol', '--out_log', |
| 38 help = "Output log") | 35 help = "Output log") |
| 39 | 36 |
| 40 parser.add_argument('-td', '--tool_dir', | 37 parser.add_argument('-td', '--tool_dir', |
| 63 '-idop', '--output_path', | 60 '-idop', '--output_path', |
| 64 type = str, | 61 type = str, |
| 65 default='ras_to_bounds/', | 62 default='ras_to_bounds/', |
| 66 help = 'output path for maps') | 63 help = 'output path for maps') |
| 67 | 64 |
| 65 parser.add_argument('-sm', '--save_models', | |
| 66 type=utils.Bool("save_models"), | |
| 67 default=False, | |
| 68 help = 'whether to save models with applied bounds') | |
| 69 | |
| 70 parser.add_argument('-smp', '--save_models_path', | |
| 71 type = str, | |
| 72 default='saved_models/', | |
| 73 help = 'output path for saved models') | |
| 74 | |
| 75 parser.add_argument('-smf', '--save_models_format', | |
| 76 type = str, | |
| 77 default='csv', | |
| 78 help = 'format for saved models (csv, xml, json, mat, yaml, tabular)') | |
| 79 | |
| 68 | 80 |
| 69 ARGS = parser.parse_args(args) | 81 ARGS = parser.parse_args(args) |
| 70 return ARGS | 82 return ARGS |
| 71 | 83 |
| 72 ########################### warning ########################################### | 84 ########################### warning ########################################### |
| 78 s (str): The warning message to be logged and printed. | 90 s (str): The warning message to be logged and printed. |
| 79 | 91 |
| 80 Returns: | 92 Returns: |
| 81 None | 93 None |
| 82 """ | 94 """ |
| 83 with open(ARGS.out_log, 'a') as log: | 95 if ARGS.out_log: |
| 84 log.write(s + "\n\n") | 96 with open(ARGS.out_log, 'a') as log: |
| 97 log.write(s + "\n\n") | |
| 85 print(s) | 98 print(s) |
| 86 | 99 |
| 87 ############################ dataset input #################################### | 100 ############################ dataset input #################################### |
| 88 def read_dataset(data :str, name :str) -> pd.DataFrame: | 101 def read_dataset(data :str, name :str) -> pd.DataFrame: |
| 89 """ | 102 """ |
| 134 if upper_bound!=0 and lower_bound!=0: | 147 if upper_bound!=0 and lower_bound!=0: |
| 135 new_bounds.loc[reaction, "lower_bound"] = valMin | 148 new_bounds.loc[reaction, "lower_bound"] = valMin |
| 136 new_bounds.loc[reaction, "upper_bound"] = valMax | 149 new_bounds.loc[reaction, "upper_bound"] = valMax |
| 137 return new_bounds | 150 return new_bounds |
| 138 | 151 |
| 139 def process_ras_cell(cellName, ras_row, model, rxns_ids, output_folder): | 152 def save_model(model, filename, output_folder, file_format='csv'): |
| 153 """ | |
| 154 Save a COBRA model to file in the specified format. | |
| 155 | |
| 156 Args: | |
| 157 model (cobra.Model): The model to save. | |
| 158 filename (str): Base filename (without extension). | |
| 159 output_folder (str): Output directory. | |
| 160 file_format (str): File format ('xml', 'json', 'mat', 'yaml', 'tabular', 'csv'). | |
| 161 | |
| 162 Returns: | |
| 163 None | |
| 164 """ | |
| 165 if not os.path.exists(output_folder): | |
| 166 os.makedirs(output_folder) | |
| 167 | |
| 168 try: | |
| 169 if file_format == 'tabular' or file_format == 'csv': | |
| 170 # Special handling for tabular format using utils functions | |
| 171 filepath = os.path.join(output_folder, f"{filename}.csv") | |
| 172 | |
| 173 rules = utils.generate_rules(model, asParsed = False) | |
| 174 reactions = utils.generate_reactions(model, asParsed = False) | |
| 175 bounds = utils.generate_bounds(model) | |
| 176 medium = utils.get_medium(model) | |
| 177 | |
| 178 try: | |
| 179 compartments = utils.generate_compartments(model) | |
| 180 except: | |
| 181 compartments = None | |
| 182 | |
| 183 df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"]) | |
| 184 df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"]) | |
| 185 df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"}) | |
| 186 df_medium = medium.rename(columns = {"reaction": "ReactionID"}) | |
| 187 df_medium["InMedium"] = True # flag per indicare la presenza nel medium | |
| 188 | |
| 189 merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer") | |
| 190 merged = merged.merge(df_bounds, on = "ReactionID", how = "outer") | |
| 191 | |
| 192 # Add compartments only if they exist and model name is ENGRO2 | |
| 193 if compartments is not None and hasattr(ARGS, 'name') and ARGS.name == "ENGRO2": | |
| 194 merged = merged.merge(compartments, on = "ReactionID", how = "outer") | |
| 195 | |
| 196 merged = merged.merge(df_medium, on = "ReactionID", how = "left") | |
| 197 merged["InMedium"] = merged["InMedium"].fillna(False) | |
| 198 merged = merged.sort_values(by = "InMedium", ascending = False) | |
| 199 | |
| 200 merged.to_csv(filepath, sep="\t", index=False) | |
| 201 | |
| 202 else: | |
| 203 # Standard COBRA formats | |
| 204 filepath = os.path.join(output_folder, f"{filename}.{file_format}") | |
| 205 | |
| 206 if file_format == 'xml': | |
| 207 cobra.io.write_sbml_model(model, filepath) | |
| 208 elif file_format == 'json': | |
| 209 cobra.io.save_json_model(model, filepath) | |
| 210 elif file_format == 'mat': | |
| 211 cobra.io.save_matlab_model(model, filepath) | |
| 212 elif file_format == 'yaml': | |
| 213 cobra.io.save_yaml_model(model, filepath) | |
| 214 else: | |
| 215 raise ValueError(f"Unsupported format: {file_format}") | |
| 216 | |
| 217 print(f"Model saved: {filepath}") | |
| 218 | |
| 219 except Exception as e: | |
| 220 warning(f"Error saving model {filename}: {str(e)}") | |
| 221 | |
| 222 def apply_bounds_to_model(model, bounds): | |
| 223 """ | |
| 224 Apply bounds from a DataFrame to a COBRA model. | |
| 225 | |
| 226 Args: | |
| 227 model (cobra.Model): The metabolic model to modify. | |
| 228 bounds (pd.DataFrame): DataFrame with reaction bounds. | |
| 229 | |
| 230 Returns: | |
| 231 cobra.Model: Modified model with new bounds. | |
| 232 """ | |
| 233 model_copy = model.copy() | |
| 234 for reaction_id in bounds.index: | |
| 235 try: | |
| 236 reaction = model_copy.reactions.get_by_id(reaction_id) | |
| 237 reaction.lower_bound = bounds.loc[reaction_id, "lower_bound"] | |
| 238 reaction.upper_bound = bounds.loc[reaction_id, "upper_bound"] | |
| 239 except KeyError: | |
| 240 # Reaction not found in model, skip | |
| 241 continue | |
| 242 return model_copy | |
| 243 | |
| 244 def process_ras_cell(cellName, ras_row, model, rxns_ids, output_folder, save_models=False, save_models_path='saved_models/', save_models_format='csv'): | |
| 140 """ | 245 """ |
| 141 Process a single RAS cell, apply bounds, and save the bounds to a CSV file. | 246 Process a single RAS cell, apply bounds, and save the bounds to a CSV file. |
| 142 | 247 |
| 143 Args: | 248 Args: |
| 144 cellName (str): The name of the RAS cell (used for naming the output file). | 249 cellName (str): The name of the RAS cell (used for naming the output file). |
| 145 ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds. | 250 ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds. |
| 146 model (cobra.Model): The metabolic model to be modified. | 251 model (cobra.Model): The metabolic model to be modified. |
| 147 rxns_ids (list of str): List of reaction IDs to which the scaling factors will be applied. | 252 rxns_ids (list of str): List of reaction IDs to which the scaling factors will be applied. |
| 148 output_folder (str): Folder path where the output CSV file will be saved. | 253 output_folder (str): Folder path where the output CSV file will be saved. |
| 254 save_models (bool): Whether to save models with applied bounds. | |
| 255 save_models_path (str): Path where to save models. | |
| 256 save_models_format (str): Format for saved models. | |
| 149 | 257 |
| 150 Returns: | 258 Returns: |
| 151 None | 259 None |
| 152 """ | 260 """ |
| 153 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) | 261 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) |
| 154 new_bounds = apply_ras_bounds(bounds, ras_row) | 262 new_bounds = apply_ras_bounds(bounds, ras_row) |
| 155 new_bounds.to_csv(output_folder + cellName + ".csv", sep='\t', index=True) | 263 new_bounds.to_csv(output_folder + cellName + ".csv", sep='\t', index=True) |
| 264 | |
| 265 # Save model if requested | |
| 266 if save_models: | |
| 267 modified_model = apply_bounds_to_model(model, new_bounds) | |
| 268 save_model(modified_model, cellName, save_models_path, save_models_format) | |
| 269 | |
| 156 pass | 270 pass |
| 157 | 271 |
| 158 def generate_bounds(model: cobra.Model, ras=None, output_folder='output/') -> pd.DataFrame: | 272 def generate_bounds(model: cobra.Model, ras=None, output_folder='output/', save_models=False, save_models_path='saved_models/', save_models_format='csv') -> pd.DataFrame: |
| 159 """ | 273 """ |
| 160 Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments. | 274 Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments. |
| 161 | 275 |
| 162 Args: | 276 Args: |
| 163 model (cobra.Model): The metabolic model for which bounds will be generated. | 277 model (cobra.Model): The metabolic model for which bounds will be generated. |
| 164 medium (dict): A dictionary where keys are reaction IDs and values are the medium conditions. | |
| 165 ras (pd.DataFrame, optional): RAS pandas dataframe. Defaults to None. | 278 ras (pd.DataFrame, optional): RAS pandas dataframe. Defaults to None. |
| 166 output_folder (str, optional): Folder path where output CSV files will be saved. Defaults to 'output/'. | 279 output_folder (str, optional): Folder path where output CSV files will be saved. Defaults to 'output/'. |
| 280 save_models (bool): Whether to save models with applied bounds. | |
| 281 save_models_path (str): Path where to save models. | |
| 282 save_models_format (str): Format for saved models. | |
| 167 | 283 |
| 168 Returns: | 284 Returns: |
| 169 pd.DataFrame: DataFrame containing the bounds of reactions in the model. | 285 pd.DataFrame: DataFrame containing the bounds of reactions in the model. |
| 170 """ | 286 """ |
| 171 rxns_ids = [rxn.id for rxn in model.reactions] | 287 rxns_ids = [rxn.id for rxn in model.reactions] |
| 177 for reaction in rxns_ids: | 293 for reaction in rxns_ids: |
| 178 model.reactions.get_by_id(reaction).lower_bound = float(df_FVA.loc[reaction, "minimum"]) | 294 model.reactions.get_by_id(reaction).lower_bound = float(df_FVA.loc[reaction, "minimum"]) |
| 179 model.reactions.get_by_id(reaction).upper_bound = float(df_FVA.loc[reaction, "maximum"]) | 295 model.reactions.get_by_id(reaction).upper_bound = float(df_FVA.loc[reaction, "maximum"]) |
| 180 | 296 |
| 181 if ras is not None: | 297 if ras is not None: |
| 182 Parallel(n_jobs=cpu_count())(delayed(process_ras_cell)(cellName, ras_row, model, rxns_ids, output_folder) for cellName, ras_row in ras.iterrows()) | 298 Parallel(n_jobs=cpu_count())(delayed(process_ras_cell)( |
| 299 cellName, ras_row, model, rxns_ids, output_folder, | |
| 300 save_models, save_models_path, save_models_format | |
| 301 ) for cellName, ras_row in ras.iterrows()) | |
| 183 else: | 302 else: |
| 184 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) | 303 bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) |
| 185 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) | 304 newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) |
| 186 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) | 305 newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) |
| 306 | |
| 307 # Save model if requested | |
| 308 if save_models: | |
| 309 modified_model = apply_bounds_to_model(model, newBounds) | |
| 310 save_model(modified_model, "model_with_bounds", save_models_path, save_models_format) | |
| 311 | |
| 187 pass | 312 pass |
| 188 | 313 |
| 189 ############################# main ########################################### | 314 ############################# main ########################################### |
| 190 def main(args:List[str] = None) -> None: | 315 def main(args:List[str] = None) -> None: |
| 191 """ | 316 """ |
| 194 Returns: | 319 Returns: |
| 195 None | 320 None |
| 196 """ | 321 """ |
| 197 if not os.path.exists('ras_to_bounds'): | 322 if not os.path.exists('ras_to_bounds'): |
| 198 os.makedirs('ras_to_bounds') | 323 os.makedirs('ras_to_bounds') |
| 199 | |
| 200 | 324 |
| 201 global ARGS | 325 global ARGS |
| 202 ARGS = process_args(args) | 326 ARGS = process_args(args) |
| 203 | 327 |
| 204 if(ARGS.ras_selector == True): | 328 if(ARGS.ras_selector == True): |
| 234 ras_combined = pd.concat(ras_list, axis=0) | 358 ras_combined = pd.concat(ras_list, axis=0) |
| 235 # Normalize the RAS values by max RAS | 359 # Normalize the RAS values by max RAS |
| 236 ras_combined = ras_combined.div(ras_combined.max(axis=0)) | 360 ras_combined = ras_combined.div(ras_combined.max(axis=0)) |
| 237 ras_combined.dropna(axis=1, how='all', inplace=True) | 361 ras_combined.dropna(axis=1, how='all', inplace=True) |
| 238 | 362 |
| 239 | |
| 240 | |
| 241 #model_type :utils.Model = ARGS.model_selector | |
| 242 #if model_type is utils.Model.Custom: | |
| 243 # model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext) | |
| 244 #else: | |
| 245 # model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) | |
| 246 | |
| 247 # TODO LOAD MODEL FROM UPLOAD | |
| 248 | |
| 249 model = utils.build_cobra_model_from_csv(ARGS.model_upload) | 363 model = utils.build_cobra_model_from_csv(ARGS.model_upload) |
| 250 | 364 |
| 251 validation = utils.validate_model(model) | 365 validation = utils.validate_model(model) |
| 252 | 366 |
| 253 print("\n=== VALIDAZIONE MODELLO ===") | 367 print("\n=== VALIDAZIONE MODELLO ===") |
| 254 for key, value in validation.items(): | 368 for key, value in validation.items(): |
| 255 print(f"{key}: {value}") | 369 print(f"{key}: {value}") |
| 256 | 370 |
| 257 #if(ARGS.medium_selector == "Custom"): | |
| 258 # medium = read_dataset(ARGS.medium, "medium dataset") | |
| 259 # medium.set_index(medium.columns[0], inplace=True) | |
| 260 # medium = medium.astype(float) | |
| 261 # medium = medium[medium.columns[0]].to_dict() | |
| 262 #else: | |
| 263 # df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0) | |
| 264 # ARGS.medium_selector = ARGS.medium_selector.replace("_", " ") | |
| 265 # medium = df_mediums[[ARGS.medium_selector]] | |
| 266 # medium = medium[ARGS.medium_selector].to_dict() | |
| 267 | |
| 268 if(ARGS.ras_selector == True): | 371 if(ARGS.ras_selector == True): |
| 269 generate_bounds(model, ras = ras_combined, output_folder=ARGS.output_path) | 372 generate_bounds(model, ras=ras_combined, output_folder=ARGS.output_path, |
| 270 class_assignments.to_csv(ARGS.cell_class, sep = '\t', index = False) | 373 save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, |
| 374 save_models_format=ARGS.save_models_format) | |
| 375 class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False) | |
| 271 else: | 376 else: |
| 272 generate_bounds(model, output_folder=ARGS.output_path) | 377 generate_bounds(model, output_folder=ARGS.output_path, |
| 378 save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, | |
| 379 save_models_format=ARGS.save_models_format) | |
| 273 | 380 |
| 274 pass | 381 pass |
| 275 | 382 |
| 276 ############################################################################## | 383 ############################################################################## |
| 277 if __name__ == "__main__": | 384 if __name__ == "__main__": |
