diff COBRAxy/flux_simulation_beta.py @ 419:ed2c1f9e20ba draft

Uploaded
author francesco_lapi
date Tue, 09 Sep 2025 09:08:17 +0000
parents 6b015d3184ab
children f9fe44c65772
line wrap: on
line diff
--- a/COBRAxy/flux_simulation_beta.py	Tue Sep 09 07:36:30 2025 +0000
+++ b/COBRAxy/flux_simulation_beta.py	Tue Sep 09 09:08:17 2025 +0000
@@ -9,7 +9,7 @@
 from joblib import Parallel, delayed, cpu_count
 from cobra.sampling import OptGPSampler
 import sys
-import utils.general_utils as utils
+import utils.model_utils as model_utils
 
 
 ################################# process args ###############################
@@ -29,6 +29,12 @@
     parser.add_argument("-mo", "--model_upload", type = str,
         help = "path to input file with custom rules, if provided")
 
+    parser.add_argument("-mab", "--model_and_bounds", type = str,
+        choices = ['True', 'False'],
+        required = True,
+        help = "upload mode: True for model+bounds, False for complete models")
+
+
     parser.add_argument('-ol', '--out_log', 
                         help = "Output log")
     
@@ -38,11 +44,11 @@
                         help = 'your tool directory')
     
     parser.add_argument('-in', '--input',
-                        required = True,
-                        type=str,
-                        help = 'inputs bounds')
+                    required = True,
+                    type=str,
+                    help = 'input bounds files or complete model files')
     
-    parser.add_argument('-ni', '--names',
+    parser.add_argument('-ni', '--name',
                         required = True,
                         type=str,
                         help = 'cell names')
@@ -215,9 +221,10 @@
     pass
 
 
-def model_sampler(model_input_original:cobra.Model, bounds_path:str, cell_name:str)-> List[pd.DataFrame]:
+
+def model_sampler_with_bounds(model_input_original: cobra.Model, bounds_path: str, cell_name: str) -> List[pd.DataFrame]:
     """
-    Prepares the model with bounds from the dataset and performs sampling and analysis based on the selected algorithm.
+    MODE 1: Prepares the model with bounds from separate bounds file and performs sampling.
 
     Args:
         model_input_original (cobra.Model): The original COBRA model.
@@ -230,26 +237,41 @@
 
     model_input = model_input_original.copy()
     bounds_df = read_dataset(bounds_path, "bounds dataset")
+    
+    # Apply bounds to model
     for rxn_index, row in bounds_df.iterrows():
-        model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound
-        model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound
+        try:
+            model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound
+            model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound
+        except KeyError:
+            warning(f"Warning: Reaction {rxn_index} not found in model. Skipping.")
     
+    return perform_sampling_and_analysis(model_input, cell_name)
+
+
+def perform_sampling_and_analysis(model_input: cobra.Model, cell_name: str) -> List[pd.DataFrame]:
+    """
+    Common function to perform sampling and analysis on a prepared model.
+
+    Args:
+        model_input (cobra.Model): The prepared COBRA model with bounds applied.
+        cell_name (str): Name of the cell, used to generate filenames for output.
+
+    Returns:
+        List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results.
+    """
     
     if ARGS.algorithm == 'OPTGP':
         OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed)
-
     elif ARGS.algorithm == 'CBS':
-        CBS_sampler(model_input,  cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed)
+        CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed)
 
     df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types)
 
     if("fluxes" not in ARGS.output_types):
-        os.remove(ARGS.output_path + "/"  +  cell_name + '.csv')
+        os.remove(ARGS.output_path + "/" + cell_name + '.csv')
 
-    returnList = []
-    returnList.append(df_mean)
-    returnList.append(df_median)
-    returnList.append(df_quantiles)
+    returnList = [df_mean, df_median, df_quantiles]
 
     df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis)
 
@@ -332,7 +354,7 @@
             model.objective = "Biomass"
             solution = cobra.flux_analysis.pfba(model)
             fluxes = solution.fluxes
-            df_pFBA.loc[0,[rxn._id for rxn in model.reactions]] = fluxes.tolist()
+            df_pFBA.loc[0,[rxn.id for rxn in model.reactions]] = fluxes.tolist()
             df_pFBA = df_pFBA.reset_index(drop=True)
             df_pFBA.index = [model_name]
             df_pFBA = df_pFBA.astype(float).round(6)
@@ -371,38 +393,63 @@
         None
     """
 
-    num_processors = cpu_count()
+    num_processors = max(1, cpu_count() - 1)
 
     global ARGS
     ARGS = process_args(args)
 
     if not os.path.exists(ARGS.output_path):
         os.makedirs(ARGS.output_path)
+
+    #ARGS.bounds = ARGS.input.split(",")
+    #ARGS.bounds_name = ARGS.name.split(",")
+    #ARGS.output_types = ARGS.output_type.split(",")
+    #ARGS.output_type_analysis = ARGS.output_type_analysis.split(",")
+
+    # --- Normalize inputs (the tool may pass comma-separated --input and either --name or --names) ---
+    ARGS.input_files = ARGS.input.split(",") if getattr(ARGS, "input", None) else []
+    ARGS.file_names = ARGS.name.split(",")
+    # output types (required) -> list
+    ARGS.output_types = ARGS.output_type.split(",") if getattr(ARGS, "output_type", None) else []
+    # optional analysis output types -> list or empty
+    ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") if getattr(ARGS, "output_type_analysis", None) else []
+
     
-    #model_type :utils.Model = ARGS.model_selector
-    #if model_type is utils.Model.Custom:
-    #    model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext)
-    #else:
-    #    model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir)
+    if ARGS.model_and_bounds == "True":
+        # MODE 1: Model + bounds (separate files)
+        print("=== MODE 1: Model + Bounds (separate files) ===")
+        
+        # Load base model
+        if not ARGS.model_upload:
+            sys.exit("Error: model_upload is required for Mode 1")
 
-    model = utils.build_cobra_model_from_csv(ARGS.model_upload)
-
-    validation = utils.validate_model(model)
+        base_model = model_utils.build_cobra_model_from_csv(ARGS.model_upload)
 
-    print("\n=== VALIDAZIONE MODELLO ===")
-    for key, value in validation.items():
-        print(f"{key}: {value}")
+        validation = model_utils.validate_model(base_model)
+
+        print("\n=== VALIDAZIONE MODELLO ===")
+        for key, value in validation.items():
+            print(f"{key}: {value}")
+
+        #Set solver verbosity to 1 to see warning and error messages only.
+        base_model.solver.configuration.verbosity = 1
 
-    #Set solver verbosity to 1 to see warning and error messages only.
-    model.solver.configuration.verbosity = 1
-    
-    ARGS.bounds = ARGS.input.split(",")
-    ARGS.bounds_name = ARGS.names.split(",")
-    ARGS.output_types = ARGS.output_type.split(",")
-    ARGS.output_type_analysis = ARGS.output_type_analysis.split(",")
+                # Process each bounds file with the base model
+        results = Parallel(n_jobs=num_processors)(
+            delayed(model_sampler_with_bounds)(base_model, bounds_file, cell_name) 
+            for bounds_file, cell_name in zip(ARGS.input_files, ARGS.file_names)
+        )
 
+    else:
+        # MODE 2: Multiple complete models
+        print("=== MODE 2: Multiple complete models ===")
+        
+        # Process each complete model file
+        results = Parallel(n_jobs=num_processors)(
+            delayed(perform_sampling_and_analysis)(model_utils.build_cobra_model_from_csv(model_file), cell_name) 
+            for model_file, cell_name in zip(ARGS.input_files, ARGS.file_names)
+        )
 
-    results = Parallel(n_jobs=num_processors)(delayed(model_sampler)(model, bounds_path, cell_name) for bounds_path, cell_name in zip(ARGS.bounds, ARGS.bounds_name))
 
     all_mean = pd.concat([result[0] for result in results], ignore_index=False)
     all_median = pd.concat([result[1] for result in results], ignore_index=False)