Mercurial > repos > bimib > cobraxy
changeset 489:97eea560a10f draft
Uploaded
| author | francesco_lapi | 
|---|---|
| date | Mon, 29 Sep 2025 10:33:26 +0000 | 
| parents | e0bcc61b2feb | 
| children | c6ea189ea7e9 | 
| files | COBRAxy/custom_data_generator.py COBRAxy/custom_data_generator.xml COBRAxy/flux_simulation.py COBRAxy/flux_simulation.xml COBRAxy/flux_simulation_beta.py COBRAxy/flux_simulation_beta.xml COBRAxy/fromCSVtoCOBRA.py COBRAxy/fromCSVtoCOBRA.xml COBRAxy/fromCSVtoCOBRA_beta.py COBRAxy/fromCSVtoCOBRA_beta.xml COBRAxy/local/medium/medium.csv COBRAxy/local/pickle files/HMRcore_genes.p COBRAxy/local/pickle files/HMRcore_rules.p COBRAxy/local/pickle files/Recon_genes.p COBRAxy/local/pickle files/Recon_rules.p COBRAxy/marea_cluster.py COBRAxy/marea_macros.xml COBRAxy/ras_generator.py COBRAxy/ras_generator.xml COBRAxy/ras_generator_beta.py COBRAxy/ras_generator_beta.xml COBRAxy/ras_to_bounds.py COBRAxy/ras_to_bounds.xml COBRAxy/ras_to_bounds_beta.py COBRAxy/ras_to_bounds_beta.xml COBRAxy/rps_generator.py COBRAxy/rps_generator.xml COBRAxy/rps_generator_beta.py COBRAxy/rps_generator_beta.xml | 
| diffstat | 24 files changed, 1276 insertions(+), 3694 deletions(-) [+] | 
line wrap: on
 line diff
--- a/COBRAxy/custom_data_generator.py Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,217 +0,0 @@ -import os -import csv -import cobra -import pickle -import argparse -import pandas as pd -import utils.general_utils as utils -import utils.rule_parsing as rulesUtils -from typing import Optional, Tuple, Union, List, Dict -import utils.reaction_parsing as reactionUtils - -ARGS : argparse.Namespace -def process_args(args:List[str] = None) -> argparse.Namespace: - """ - Interfaces the script of a module with its frontend, making the user's choices for - various parameters available as values in code. - - Args: - args : Always obtained (in file) from sys.argv - - Returns: - Namespace : An object containing the parsed arguments - """ - parser = argparse.ArgumentParser( - usage = "%(prog)s [options]", - description = "generate custom data from a given model") - - parser.add_argument("-ol", "--out_log", type = str, required = True, help = "Output log") - - parser.add_argument("-orules", "--out_rules", type = str, required = True, help = "Output rules") - parser.add_argument("-orxns", "--out_reactions", type = str, required = True, help = "Output reactions") - parser.add_argument("-omedium", "--out_medium", type = str, required = True, help = "Output medium") - parser.add_argument("-obnds", "--out_bounds", type = str, required = True, help = "Output bounds") - - parser.add_argument("-id", "--input", type = str, required = True, help = "Input model") - parser.add_argument("-mn", "--name", type = str, required = True, help = "Input model name") - # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in - parser.add_argument('-idop', '--output_path', type = str, default='result', help = 'output path for maps') - argsNamespace = parser.parse_args(args) - # ^ can't get this one to work from xml, there doesn't seem to be a way to get the directory attribute from the collection - - return argsNamespace - -################################- INPUT DATA LOADING -################################ -def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model: - """ - Loads a custom model from a file, either in JSON or XML format. - - Args: - file_path : The path to the file containing the custom model. - ext : explicit file extension. Necessary for standard use in galaxy because of its weird behaviour. - - Raises: - DataErr : if the file is in an invalid format or cannot be opened for whatever reason. - - Returns: - cobra.Model : the model, if successfully opened. - """ - ext = ext if ext else file_path.ext - try: - if ext is utils.FileFormat.XML: - return cobra.io.read_sbml_model(file_path.show()) - - if ext is utils.FileFormat.JSON: - return cobra.io.load_json_model(file_path.show()) - - except Exception as e: raise utils.DataErr(file_path, e.__str__()) - raise utils.DataErr(file_path, - f"Formato \"{file_path.ext}\" non riconosciuto, sono supportati solo file JSON e XML") - -################################- DATA GENERATION -################################ -ReactionId = str -def generate_rules(model: cobra.Model, *, asParsed = True) -> Union[Dict[ReactionId, rulesUtils.OpList], Dict[ReactionId, str]]: - """ - Generates a dictionary mapping reaction ids to rules from the model. - - Args: - model : the model to derive data from. - asParsed : if True parses the rules to an optimized runtime format, otherwise leaves them as strings. - - Returns: - Dict[ReactionId, rulesUtils.OpList] : the generated dictionary of parsed rules. - Dict[ReactionId, str] : the generated dictionary of raw rules. - """ - # Is the below approach convoluted? yes - # Ok but is it inefficient? probably - # Ok but at least I don't have to repeat the check at every rule (I'm clinically insane) - _ruleGetter = lambda reaction : reaction.gene_reaction_rule - ruleExtractor = (lambda reaction : - rulesUtils.parseRuleToNestedList(_ruleGetter(reaction))) if asParsed else _ruleGetter - - return { - reaction.id : ruleExtractor(reaction) - for reaction in model.reactions - if reaction.gene_reaction_rule } - -def generate_reactions(model :cobra.Model, *, asParsed = True) -> Dict[ReactionId, str]: - """ - Generates a dictionary mapping reaction ids to reaction formulas from the model. - - Args: - model : the model to derive data from. - asParsed : if True parses the reactions to an optimized runtime format, otherwise leaves them as they are. - - Returns: - Dict[ReactionId, str] : the generated dictionary. - """ - - unparsedReactions = { - reaction.id : reaction.reaction - for reaction in model.reactions - if reaction.reaction - } - - if not asParsed: return unparsedReactions - - return reactionUtils.create_reaction_dict(unparsedReactions) - -def get_medium(model:cobra.Model) -> pd.DataFrame: - trueMedium=[] - for r in model.reactions: - positiveCoeff=0 - for m in r.metabolites: - if r.get_coefficient(m.id)>0: - positiveCoeff=1; - if (positiveCoeff==0 and r.lower_bound<0): - trueMedium.append(r.id) - - df_medium = pd.DataFrame() - df_medium["reaction"] = trueMedium - return df_medium - -def generate_bounds(model:cobra.Model) -> pd.DataFrame: - - rxns = [] - for reaction in model.reactions: - rxns.append(reaction.id) - - bounds = pd.DataFrame(columns = ["lower_bound", "upper_bound"], index=rxns) - - for reaction in model.reactions: - bounds.loc[reaction.id] = [reaction.lower_bound, reaction.upper_bound] - return bounds - - -###############################- FILE SAVING -################################ -def save_as_csv_filePath(data :dict, file_path :utils.FilePath, fieldNames :Tuple[str, str]) -> None: - """ - Saves any dictionary-shaped data in a .csv file created at the given file_path as FilePath. - - Args: - data : the data to be written to the file. - file_path : the path to the .csv file. - fieldNames : the names of the fields (columns) in the .csv file. - - Returns: - None - """ - with open(file_path.show(), 'w', newline='') as csvfile: - writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab") - writer.writeheader() - - for key, value in data.items(): - writer.writerow({ fieldNames[0] : key, fieldNames[1] : value }) - -def save_as_csv(data :dict, file_path :str, fieldNames :Tuple[str, str]) -> None: - """ - Saves any dictionary-shaped data in a .csv file created at the given file_path as string. - - Args: - data : the data to be written to the file. - file_path : the path to the .csv file. - fieldNames : the names of the fields (columns) in the .csv file. - - Returns: - None - """ - with open(file_path, 'w', newline='') as csvfile: - writer = csv.DictWriter(csvfile, fieldnames = fieldNames, dialect="excel-tab") - writer.writeheader() - - for key, value in data.items(): - writer.writerow({ fieldNames[0] : key, fieldNames[1] : value }) - -###############################- ENTRY POINT -################################ -def main(args:List[str] = None) -> None: - """ - Initializes everything and sets the program in motion based on the fronted input arguments. - - Returns: - None - """ - # get args from frontend (related xml) - global ARGS - ARGS = process_args(args) - - # this is the worst thing I've seen so far, congrats to the former MaREA devs for suggesting this! - if os.path.isdir(ARGS.output_path) == False: os.makedirs(ARGS.output_path) - - # load custom model - model = load_custom_model( - utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext) - - # generate data - rules = generate_rules(model, asParsed = False) - reactions = generate_reactions(model, asParsed = False) - bounds = generate_bounds(model) - medium = get_medium(model) - - # save files out of collection: path coming from xml - save_as_csv(rules, ARGS.out_rules, ("ReactionID", "Rule")) - save_as_csv(reactions, ARGS.out_reactions, ("ReactionID", "Reaction")) - bounds.to_csv(ARGS.out_bounds, sep = '\t') - medium.to_csv(ARGS.out_medium, sep = '\t') - -if __name__ == '__main__': - main() \ No newline at end of file
--- a/COBRAxy/custom_data_generator.xml Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -<tool id="CustomDataGenerator" name="Custom Data Generator" version="2.0.0"> - - <macros> - <import>marea_macros.xml</import> - </macros> - - <requirements> - <requirement type="package" version="1.24.4">numpy</requirement> - <requirement type="package" version="2.0.3">pandas</requirement> - <requirement type="package" version="0.29.0">cobra</requirement> - <requirement type="package" version="5.2.2">lxml</requirement> - </requirements> - - <command detect_errors="exit_code"> - <![CDATA[ - python $__tool_directory__/custom_data_generator.py - --input $input - --name $input.element_identifier - --out_log $log - --out_rules $rules - --out_reactions $reactions - --out_bounds $bounds - --out_medium $medium - ]]> - </command> - <inputs> - <param name="input" argument="--input" type="data" format="xml, json" label="Custom model:" /> - <param name="name" argument="--name" type="text" label="Model's name:" value="Model" help="Default: Model" /> - </inputs> - - <outputs> - <data format="txt" name="log" label="${tool.name} - Log" /> - <data format="tabular" name="rules" label="${name}_Rules" /> - <data format="tabular" name="reactions" label="${name}_Reactions" /> - <data format="tabular" name="bounds" label="${name}_Bounds" /> - <data format="tabular" name="medium" label="${name}_Medium" /> - </outputs> - - <help> - <![CDATA[ -What it does -------------- - -This tool generates four files containing reactions, rules, reaction bounds and medium composition respectively, starting from a custom model in JSON or XML format. -Reactions and rules can be used as inputs for the RAS and RPS generator tools. - -Accepted files: - - A model: JSON or XML file reporting reactions and rules contained in the model. - - -Output: -------------- - -The tool generates: - - rules: reporting the rules for each reaction in the custom model given. Format: csv (tab separated). - - reactions: reporting the reactions in the custom model given. Format: csv (tab separated). - - reaction bounds: reporting the lower and upper bounds of each model reaction. Format: csv (tab separated). - - medium composition: reporting the list of exchange/transport reactions. Format: csv (tab separated). - - a log file (.txt). - ]]> - </help> - <expand macro="citations" /> -</tool> \ No newline at end of file
--- a/COBRAxy/flux_simulation.py Tue Sep 23 13:48:24 2025 +0000 +++ b/COBRAxy/flux_simulation.py Mon Sep 29 10:33:26 2025 +0000 @@ -1,101 +1,124 @@ +""" +Flux sampling and analysis utilities for COBRA models. + +This script supports two modes: +- Mode 1 (model_and_bounds=True): load a base model and apply bounds from + separate files before sampling. +- Mode 2 (model_and_bounds=False): load complete models and sample directly. + +Sampling algorithms supported: OPTGP and CBS. Outputs include flux samples +and optional analyses (pFBA, FVA, sensitivity), saved as tabular files. +""" + import argparse import utils.general_utils as utils -from typing import Optional, List +from typing import List import os +import pandas as pd import numpy as np -import pandas as pd import cobra import utils.CBS_backend as CBS_backend from joblib import Parallel, delayed, cpu_count from cobra.sampling import OptGPSampler import sys +import utils.model_utils as model_utils + ################################# process args ############################### -def process_args(args :List[str] = None) -> argparse.Namespace: +def process_args(args: List[str] = None) -> argparse.Namespace: """ Processes command-line arguments. - + Args: args (list): List of command-line arguments. - + Returns: Namespace: An object containing parsed arguments. """ - parser = argparse.ArgumentParser(usage = '%(prog)s [options]', - description = 'process some value\'s') - - parser.add_argument('-ol', '--out_log', - help = "Output log") + parser = argparse.ArgumentParser(usage='%(prog)s [options]', + description='process some value\'s') + + parser.add_argument("-mo", "--model_upload", type=str, + help="path to input file with custom rules, if provided") + + parser.add_argument("-mab", "--model_and_bounds", type=str, + choices=['True', 'False'], + required=True, + help="upload mode: True for model+bounds, False for complete models") + + parser.add_argument("-ens", "--sampling_enabled", type=str, + choices=['true', 'false'], + required=True, + help="enable sampling: 'true' for sampling, 'false' for no sampling") + + parser.add_argument('-ol', '--out_log', + help="Output log") parser.add_argument('-td', '--tool_dir', - type = str, - required = True, - help = 'your tool directory') + type=str, + required=True, + help='your tool directory') parser.add_argument('-in', '--input', - required = True, - type=str, - help = 'inputs bounds') - - parser.add_argument('-ni', '--names', - required = True, + required=True, type=str, - help = 'cell names') - - parser.add_argument( - '-ms', '--model_selector', - type = utils.Model, default = utils.Model.ENGRO2, choices = [utils.Model.ENGRO2, utils.Model.Custom], - help = 'chose which type of model you want use') + help='input bounds files or complete model files') - parser.add_argument("-mo", "--model", type = str) - - parser.add_argument("-mn", "--model_name", type = str, help = "custom mode name") + parser.add_argument('-ni', '--name', + required=True, + type=str, + help='cell names') parser.add_argument('-a', '--algorithm', - type = str, - choices = ['OPTGP', 'CBS'], - required = True, - help = 'choose sampling algorithm') + type=str, + choices=['OPTGP', 'CBS'], + required=True, + help='choose sampling algorithm') - parser.add_argument('-th', '--thinning', - type = int, - default= 100, - required=False, - help = 'choose thinning') + parser.add_argument('-th', '--thinning', + type=int, + default=100, + required=True, + help='choose thinning') - parser.add_argument('-ns', '--n_samples', - type = int, - required = True, - help = 'choose how many samples') + parser.add_argument('-ns', '--n_samples', + type=int, + required=True, + help='choose how many samples (set to 0 for optimization only)') + + parser.add_argument('-sd', '--seed', + type=int, + required=True, + help='seed for random number generation') - parser.add_argument('-sd', '--seed', - type = int, - required = True, - help = 'seed') + parser.add_argument('-nb', '--n_batches', + type=int, + required=True, + help='choose how many batches') - parser.add_argument('-nb', '--n_batches', - type = int, - required = True, - help = 'choose how many batches') + parser.add_argument('-opt', '--perc_opt', + type=float, + default=0.9, + required=False, + help='choose the fraction of optimality for FVA (0-1)') - parser.add_argument('-ot', '--output_type', - type = str, - required = True, - help = 'output type') + parser.add_argument('-ot', '--output_type', + type=str, + required=True, + help='output type for sampling results') - parser.add_argument('-ota', '--output_type_analysis', - type = str, - required = False, - help = 'output type analysis') + parser.add_argument('-ota', '--output_type_analysis', + type=str, + required=False, + help='output type analysis (optimization methods)') - parser.add_argument('-idop', '--output_path', - type = str, + parser.add_argument('-idop', '--output_path', + type=str, default='flux_simulation', - help = 'output path for maps') + help='output path for maps') ARGS = parser.parse_args(args) return ARGS - ########################### warning ########################################### def warning(s :str) -> None: """ @@ -113,6 +136,17 @@ def write_to_file(dataset: pd.DataFrame, name: str, keep_index:bool=False)->None: + """ + Write a DataFrame to a TSV file under ARGS.output_path with a given base name. + + Args: + dataset: The DataFrame to write. + name: Base file name (without extension). + keep_index: Whether to keep the DataFrame index in the file. + + Returns: + None + """ dataset.index.name = 'Reactions' dataset.to_csv(ARGS.output_path + "/" + name + ".csv", sep = '\t', index = keep_index) @@ -142,10 +176,10 @@ -def OPTGP_sampler(model:cobra.Model, model_name:str, n_samples:int=1000, thinning:int=100, n_batches:int=1, seed:int=0)-> None: +def OPTGP_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, thinning: int = 100, n_batches: int = 1, seed: int = 0) -> None: """ Samples from the OPTGP (Optimal Global Perturbation) algorithm and saves the results to CSV files. - + Args: model (cobra.Model): The COBRA model to sample from. model_name (str): The name of the model, used in naming output files. @@ -153,75 +187,131 @@ thinning (int, optional): Thinning parameter for the sampler. Default is 100. n_batches (int, optional): Number of batches to run. Default is 1. seed (int, optional): Random seed for reproducibility. Default is 0. - + Returns: None """ - - for i in range(0, n_batches): + import numpy as np + + # Get reaction IDs for consistent column ordering + reaction_ids = [rxn.id for rxn in model.reactions] + + # Sample and save each batch as numpy file + for i in range(n_batches): optgp = OptGPSampler(model, thinning, seed) samples = optgp.sample(n_samples) - samples.to_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv', index=False) - seed+=1 - samplesTotal = pd.DataFrame() - for i in range(0, n_batches): - samples_batch = pd.read_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv') - samplesTotal = pd.concat([samplesTotal, samples_batch], ignore_index = True) - + + # Save as numpy array (more memory efficient) + batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" + np.save(batch_filename, samples.to_numpy()) + + seed += 1 + + # Merge all batches into a single DataFrame + all_samples = [] + + for i in range(n_batches): + batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" + batch_data = np.load(batch_filename, allow_pickle=True) + all_samples.append(batch_data) + + # Concatenate all batches + samplesTotal_array = np.vstack(all_samples) + + # Convert back to DataFrame with proper column names + samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids) + + # Save the final merged result as CSV write_to_file(samplesTotal.T, model_name, True) - - for i in range(0, n_batches): - os.remove(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_OPTGP.csv') - pass + + # Clean up temporary numpy files + for i in range(n_batches): + batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" + if os.path.exists(batch_filename): + os.remove(batch_filename) -def CBS_sampler(model:cobra.Model, model_name:str, n_samples:int=1000, n_batches:int=1, seed:int=0)-> None: +def CBS_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, n_batches: int = 1, seed: int = 0) -> None: """ Samples using the CBS (Constraint-based Sampling) algorithm and saves the results to CSV files. - + Args: model (cobra.Model): The COBRA model to sample from. model_name (str): The name of the model, used in naming output files. n_samples (int, optional): Number of samples per batch. Default is 1000. n_batches (int, optional): Number of batches to run. Default is 1. seed (int, optional): Random seed for reproducibility. Default is 0. - + Returns: None """ - - df_FVA = cobra.flux_analysis.flux_variability_analysis(model,fraction_of_optimum=0).round(6) + import numpy as np + + # Get reaction IDs for consistent column ordering + reaction_ids = [reaction.id for reaction in model.reactions] + + # Perform FVA analysis once for all batches + df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0).round(6) + + # Generate random objective functions for all samples across all batches + df_coefficients = CBS_backend.randomObjectiveFunction(model, n_samples * n_batches, df_FVA, seed=seed) - df_coefficients = CBS_backend.randomObjectiveFunction(model, n_samples*n_batches, df_FVA, seed=seed) - - for i in range(0, n_batches): - samples = pd.DataFrame(columns =[reaction.id for reaction in model.reactions], index = range(n_samples)) + # Sample and save each batch as numpy file + for i in range(n_batches): + samples = pd.DataFrame(columns=reaction_ids, index=range(n_samples)) + try: - CBS_backend.randomObjectiveFunctionSampling(model, n_samples, df_coefficients.iloc[:,i*n_samples:(i+1)*n_samples], samples) + CBS_backend.randomObjectiveFunctionSampling( + model, + n_samples, + df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples], + samples + ) except Exception as e: utils.logWarning( - "Warning: GLPK solver has failed for " + model_name + ". Trying with COBRA interface. Error:" + str(e), - ARGS.out_log) - CBS_backend.randomObjectiveFunctionSampling_cobrapy(model, n_samples, df_coefficients.iloc[:,i*n_samples:(i+1)*n_samples], - samples) - utils.logWarning(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv', ARGS.out_log) - samples.to_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv', index=False) - - samplesTotal = pd.DataFrame() - for i in range(0, n_batches): - samples_batch = pd.read_csv(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv') - samplesTotal = pd.concat([samplesTotal, samples_batch], ignore_index = True) - + f"Warning: GLPK solver has failed for {model_name}. Trying with COBRA interface. Error: {str(e)}", + ARGS.out_log + ) + CBS_backend.randomObjectiveFunctionSampling_cobrapy( + model, + n_samples, + df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples], + samples + ) + + # Save as numpy array (more memory efficient) + batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" + utils.logWarning(batch_filename, ARGS.out_log) + np.save(batch_filename, samples.to_numpy()) + + # Merge all batches into a single DataFrame + all_samples = [] + + for i in range(n_batches): + batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" + batch_data = np.load(batch_filename, allow_pickle=True) + all_samples.append(batch_data) + + # Concatenate all batches + samplesTotal_array = np.vstack(all_samples) + + # Convert back to DataFrame with proper column namesq + samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids) + + # Save the final merged result as CSV write_to_file(samplesTotal.T, model_name, True) - - for i in range(0, n_batches): - os.remove(ARGS.output_path + "/" + model_name + '_'+ str(i)+'_CBS.csv') - pass + + # Clean up temporary numpy files + for i in range(n_batches): + batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" + if os.path.exists(batch_filename): + os.remove(batch_filename) -def model_sampler(model_input_original:cobra.Model, bounds_path:str, cell_name:str)-> List[pd.DataFrame]: + +def model_sampler_with_bounds(model_input_original: cobra.Model, bounds_path: str, cell_name: str) -> List[pd.DataFrame]: """ - Prepares the model with bounds from the dataset and performs sampling and analysis based on the selected algorithm. + MODE 1: Prepares the model with bounds from separate bounds file and performs sampling. Args: model_input_original (cobra.Model): The original COBRA model. @@ -234,26 +324,45 @@ model_input = model_input_original.copy() bounds_df = read_dataset(bounds_path, "bounds dataset") - for rxn_index, row in bounds_df.iterrows(): - model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound - model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound + # Apply bounds to model + for rxn_index, row in bounds_df.iterrows(): + try: + model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound + model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound + except KeyError: + warning(f"Warning: Reaction {rxn_index} not found in model. Skipping.") - if ARGS.algorithm == 'OPTGP': - OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed) + return perform_sampling_and_analysis(model_input, cell_name) + - elif ARGS.algorithm == 'CBS': - CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed) +def perform_sampling_and_analysis(model_input: cobra.Model, cell_name: str) -> List[pd.DataFrame]: + """ + Common function to perform sampling and analysis on a prepared model. - df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types) + Args: + model_input (cobra.Model): The prepared COBRA model with bounds applied. + cell_name (str): Name of the cell, used to generate filenames for output. - if("fluxes" not in ARGS.output_types): - os.remove(ARGS.output_path + "/" + cell_name + '.csv') + Returns: + List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. + """ returnList = [] - returnList.append(df_mean) - returnList.append(df_median) - returnList.append(df_quantiles) + + if ARGS.sampling_enabled == "true": + + if ARGS.algorithm == 'OPTGP': + OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed) + elif ARGS.algorithm == 'CBS': + CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed) + + df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types) + + if("fluxes" not in ARGS.output_types): + os.remove(ARGS.output_path + "/" + cell_name + '.csv') + + returnList = [df_mean, df_median, df_quantiles] df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis) @@ -316,7 +425,8 @@ def fluxes_analysis(model:cobra.Model, model_name:str, output_types:List)-> List[pd.DataFrame]: """ - Performs flux analysis including pFBA, FVA, and sensitivity analysis. + Performs flux analysis including pFBA, FVA, and sensitivity analysis. The objective function + is assumed to be already set in the model. Args: model (cobra.Model): The COBRA model to analyze. @@ -333,15 +443,14 @@ for output_type in output_types: if(output_type == "pFBA"): - model.objective = "Biomass" solution = cobra.flux_analysis.pfba(model) fluxes = solution.fluxes - df_pFBA.loc[0,[rxn._id for rxn in model.reactions]] = fluxes.tolist() + df_pFBA.loc[0,[rxn.id for rxn in model.reactions]] = fluxes.tolist() df_pFBA = df_pFBA.reset_index(drop=True) df_pFBA.index = [model_name] df_pFBA = df_pFBA.astype(float).round(6) elif(output_type == "FVA"): - fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8) + fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=ARGS.perc_opt, processes=1).round(8) columns = [] for rxn in fva.index.to_list(): columns.append(rxn + "_min") @@ -354,7 +463,6 @@ df_FVA.index = [model_name] df_FVA = df_FVA.astype(float).round(6) elif(output_type == "sensitivity"): - model.objective = "Biomass" solution_original = model.optimize().objective_value reactions = model.reactions single = cobra.flux_analysis.single_reaction_deletion(model) @@ -367,75 +475,130 @@ return df_pFBA, df_FVA, df_sensitivity ############################# main ########################################### -def main(args :List[str] = None) -> None: +def main(args: List[str] = None) -> None: """ - Initializes everything and sets the program in motion based on the fronted input arguments. + Initialize and run sampling/analysis based on the frontend input arguments. Returns: None """ - num_processors = cpu_count() + num_processors = max(1, cpu_count() - 1) global ARGS ARGS = process_args(args) if not os.path.exists(ARGS.output_path): os.makedirs(ARGS.output_path) - - model_type :utils.Model = ARGS.model_selector - if model_type is utils.Model.Custom: - model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext) + + # --- Normalize inputs (the tool may pass comma-separated --input and either --name or --names) --- + ARGS.input_files = ARGS.input.split(",") if ARGS.input else [] + ARGS.file_names = ARGS.name.split(",") + # output types (required) -> list + ARGS.output_types = ARGS.output_type.split(",") if ARGS.output_type else [] + # optional analysis output types -> list or empty + ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") if ARGS.output_type_analysis else [] + + # Determine if sampling should be performed + if ARGS.sampling_enabled == "true": + perform_sampling = True else: - model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) + perform_sampling = False - #Set solver verbosity to 1 to see warning and error messages only. - model.solver.configuration.verbosity = 1 + print("=== INPUT FILES ===") + print(f"{ARGS.input_files}") + print(f"{ARGS.file_names}") + print(f"{ARGS.output_type}") + print(f"{ARGS.output_types}") + print(f"{ARGS.output_type_analysis}") + print(f"Sampling enabled: {perform_sampling} (n_samples: {ARGS.n_samples})") - ARGS.bounds = ARGS.input.split(",") - ARGS.bounds_name = ARGS.names.split(",") - ARGS.output_types = ARGS.output_type.split(",") - ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") + if ARGS.model_and_bounds == "True": + # MODE 1: Model + bounds (separate files) + print("=== MODE 1: Model + Bounds (separate files) ===") + + # Load base model + if not ARGS.model_upload: + sys.exit("Error: model_upload is required for Mode 1") + base_model = model_utils.build_cobra_model_from_csv(ARGS.model_upload) - results = Parallel(n_jobs=num_processors)(delayed(model_sampler)(model, bounds_path, cell_name) for bounds_path, cell_name in zip(ARGS.bounds, ARGS.bounds_name)) + validation = model_utils.validate_model(base_model) + + print("\n=== MODEL VALIDATION ===") + for key, value in validation.items(): + print(f"{key}: {value}") + + # Set solver verbosity to 1 to see warning and error messages only. + base_model.solver.configuration.verbosity = 1 - all_mean = pd.concat([result[0] for result in results], ignore_index=False) - all_median = pd.concat([result[1] for result in results], ignore_index=False) - all_quantiles = pd.concat([result[2] for result in results], ignore_index=False) + # Process each bounds file with the base model + results = Parallel(n_jobs=num_processors)( + delayed(model_sampler_with_bounds)(base_model, bounds_file, cell_name) + for bounds_file, cell_name in zip(ARGS.input_files, ARGS.file_names) + ) - if("mean" in ARGS.output_types): - all_mean = all_mean.fillna(0.0) - all_mean = all_mean.sort_index() - write_to_file(all_mean.T, "mean", True) + else: + # MODE 2: Multiple complete models + print("=== MODE 2: Multiple complete models ===") + + # Process each complete model file + results = Parallel(n_jobs=num_processors)( + delayed(perform_sampling_and_analysis)(model_utils.build_cobra_model_from_csv(model_file), cell_name) + for model_file, cell_name in zip(ARGS.input_files, ARGS.file_names) + ) + + # Handle sampling outputs (only if sampling was performed) + if perform_sampling: + print("=== PROCESSING SAMPLING RESULTS ===") + + all_mean = pd.concat([result[0] for result in results], ignore_index=False) + all_median = pd.concat([result[1] for result in results], ignore_index=False) + all_quantiles = pd.concat([result[2] for result in results], ignore_index=False) - if("median" in ARGS.output_types): - all_median = all_median.fillna(0.0) - all_median = all_median.sort_index() - write_to_file(all_median.T, "median", True) + if "mean" in ARGS.output_types: + all_mean = all_mean.fillna(0.0) + all_mean = all_mean.sort_index() + write_to_file(all_mean.T, "mean", True) + + if "median" in ARGS.output_types: + all_median = all_median.fillna(0.0) + all_median = all_median.sort_index() + write_to_file(all_median.T, "median", True) + + if "quantiles" in ARGS.output_types: + all_quantiles = all_quantiles.fillna(0.0) + all_quantiles = all_quantiles.sort_index() + write_to_file(all_quantiles.T, "quantiles", True) + else: + print("=== SAMPLING SKIPPED (n_samples = 0 or sampling disabled) ===") + + # Handle optimization analysis outputs (always available) + print("=== PROCESSING OPTIMIZATION RESULTS ===") - if("quantiles" in ARGS.output_types): - all_quantiles = all_quantiles.fillna(0.0) - all_quantiles = all_quantiles.sort_index() - write_to_file(all_quantiles.T, "quantiles", True) - - index_result = 3 - if("pFBA" in ARGS.output_type_analysis): + # Determine the starting index for optimization results + # If sampling was performed, optimization results start at index 3 + # If no sampling, optimization results start at index 0 + index_result = 3 if perform_sampling else 0 + + if "pFBA" in ARGS.output_type_analysis: all_pFBA = pd.concat([result[index_result] for result in results], ignore_index=False) all_pFBA = all_pFBA.sort_index() write_to_file(all_pFBA.T, "pFBA", True) - index_result+=1 - if("FVA" in ARGS.output_type_analysis): - all_FVA= pd.concat([result[index_result] for result in results], ignore_index=False) + index_result += 1 + + if "FVA" in ARGS.output_type_analysis: + all_FVA = pd.concat([result[index_result] for result in results], ignore_index=False) all_FVA = all_FVA.sort_index() write_to_file(all_FVA.T, "FVA", True) - index_result+=1 - if("sensitivity" in ARGS.output_type_analysis): + index_result += 1 + + if "sensitivity" in ARGS.output_type_analysis: all_sensitivity = pd.concat([result[index_result] for result in results], ignore_index=False) all_sensitivity = all_sensitivity.sort_index() write_to_file(all_sensitivity.T, "sensitivity", True) - pass + return ############################################################################## if __name__ == "__main__":
--- a/COBRAxy/flux_simulation.xml Tue Sep 23 13:48:24 2025 +0000 +++ b/COBRAxy/flux_simulation.xml Mon Sep 29 10:33:26 2025 +0000 @@ -4,96 +4,153 @@ <import>marea_macros.xml</import> </macros> - <requirements> + <requirements> <requirement type="package" version="1.24.4">numpy</requirement> <requirement type="package" version="2.0.3">pandas</requirement> - <requirement type="package" version="0.29.0">cobra</requirement> + <requirement type="package" version="0.29.0">cobra</requirement> <requirement type="package" version="5.2.2">lxml</requirement> <requirement type="package" version="1.4.2">joblib</requirement> <requirement type="package" version="1.11">scipy</requirement> - </requirements> + </requirements> <command detect_errors="exit_code"> <![CDATA[ python $__tool_directory__/flux_simulation.py --tool_dir $__tool_directory__ - --model_selector $cond_model.model_selector - #if $cond_model.model_selector == 'Custom' - --model $model - --model_name $model.element_identifier + --model_and_bounds $model_and_bounds.model_and_bounds + + #if $model_and_bounds.model_and_bounds == 'True': + --model_upload $model_and_bounds.model_upload + --input "${",".join(map(str, $model_and_bounds.inputs))}" + #set $names = "" + #for $input_temp in $model_and_bounds.inputs: + #set $names = $names + $input_temp.element_identifier + "," + #end for + --name $names + #else: + --input "${",".join(map(str, $model_and_bounds.model_files))}" + #set $names = "" + #for $input_temp in $model_and_bounds.model_files: + #set $names = $names + $input_temp.element_identifier + "," + #end for + --name $names #end if - --input "${",".join(map(str, $inputs))}" - #set $names = "" - #for $input_temp in $inputs: - #set $names = $names + $input_temp.element_identifier + "," - #end for - --name $names - --thinning 0 - #if $algorithm_param.algorithm == 'OPTGP': - --thinning $algorithm_param.thinning + + --sampling_enabled $sampling_params.sampling_enabled + + #if $sampling_params.sampling_enabled == 'true': + --thinning 0 + #if $sampling_params.algorithm_param.algorithm == 'OPTGP': + --thinning $sampling_params.algorithm_param.thinning + #end if + --algorithm $sampling_params.algorithm_param.algorithm + --n_batches $sampling_params.n_batches + --n_samples $sampling_params.n_samples + --seed $sampling_params.seed + --output_type "${",".join(map(str, $sampling_params.output_types))}" + #else: + --thinning 0 + --algorithm 'CBS' + --n_batches 1 + --n_samples 1 + --seed 0 + --output_type 'mean' #end if - --algorithm $algorithm_param.algorithm - --n_batches $n_batches - --n_samples $n_samples - --seed $seed - --output_type "${",".join(map(str, $output_types))}" + --output_type_analysis "${",".join(map(str, $output_types_analysis))}" + + #if 'FVA' in str($output_types_analysis): + --perc_opt $fva_params.optimality_fraction + #end if + --out_log $log ]]> </command> + <inputs> + <conditional name="model_and_bounds"> + <param name="model_and_bounds" argument="--model_and_bounds" type="select" label="Upload mode:" help="Choose whether to upload the model and bounds in separate files or to upload multiple complete model files."> + <option value="True" selected="true">Model + bounds (separate files)</option> + <option value="False">Multiple complete models</option> + </param> - <conditional name="cond_model"> - <expand macro="options_ras_to_bounds_model"/> - <when value="Custom"> - <param name="model" argument="--model" type="data" format="json, xml" label="Custom model" /> + <when value="True"> + <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular" + label="Model (rules) file:" + help="Upload a CSV/TSV file that contains the model reaction rules. Recommended columns: ReactionID, Reaction (formula), Rule (GPR). Optional columns: name, lower_bound, upper_bound, InMedium. If bounds are present here they may be overridden by separate bound files." /> + + <param name="inputs" argument="--inputs" multiple="true" type="data" format="tabular,csv,tsv" + label="Bound file(s):" + help="Upload one or more CSV/TSV files containing reaction bounds. Each file must include at least: ReactionID, lower_bound, upper_bound. Files are applied in the order provided; later files override earlier ones for the same ReactionID." /> </when> - </conditional> + + <when value="False"> + <param name="model_files" argument="--model_files" multiple="true" type="data" format="csv,tsv,tabular" + label="Complete model files:" + help="Upload one or more CSV/TSV files, each containing both model rules and reaction bounds for different contexts/cells. Required columns: ReactionID, Reaction, Rule, lower_bound, upper_bound." /> + </when> + </conditional> - <param name="inputs" argument="--inputs" multiple="true" type="data" format="tabular, csv, tsv" label="Bound(s):" /> - - - <conditional name="algorithm_param"> - <param name="algorithm" argument="--algorithm" type="select" label="Choose sampling algorithm:"> - <option value="CBS" selected="true">CBS</option> - <option value="OPTGP">OPTGP</option> - </param> - <when value="OPTGP"> - <param name="thinning" argument="--thinning" type="integer" label="Thinning:" value="100" help="Number of iterations to wait before taking a sample."/> - </when> + <conditional name="sampling_params"> + <param name="sampling_enabled" argument="--sampling_enabled" type="boolean" display="checkboxes" checked="false" label="Enable sampling" help="Enable flux sampling"/> + + <when value="true"> + <conditional name="algorithm_param"> + <param name="algorithm" argument="--algorithm" type="select" label="Choose sampling algorithm:"> + <option value="CBS" selected="true">CBS</option> + <option value="OPTGP">OPTGP</option> + </param> + <when value="OPTGP"> + <param name="thinning" argument="--thinning" type="integer" label="Thinning:" value="100" help="Number of iterations to wait before taking a sample."/> + </when> + </conditional> + + <param name="n_samples" argument="--n_samples" type="integer" label="Samples:" value="1000" min="1" max="1000"/> + <param name="n_batches" argument="--n_batches" type="integer" label="Batches:" value="1" help="This is useful for computational performances."/> + <param name="seed" argument="--seed" type="integer" label="Seed:" value="0" help="Random seed."/> - </conditional> - - - <param name="n_samples" argument="--n_samples" type="integer" label="Samples:" value="1000"/> - - <param name="n_batches" argument="--n_batches" type="integer" label="Batches:" value="1" help="This is useful for computational perfomances."/> + <param type="select" argument="--output_types" multiple="true" name="output_types" label="Choose outputs from sampling"> + <option value="mean" selected="true">Mean</option> + <option value="median" selected="true">Median</option> + <option value="quantiles" selected="true">Quantiles</option> + <option value="fluxes" selected="false">All fluxes</option> + </param> + </when> + + <when value="false"> + <!-- Hidden parameters when sampling is disabled --> + <param name="algorithm" type="hidden" value="CBS"/> + <param name="n_samples" type="hidden" value="1000"/> + <param name="n_batches" type="hidden" value="1"/> + <param name="seed" type="hidden" value="0"/> + <param name="output_types" type="hidden" value="mean"/> + </when> + </conditional> - <param name="seed" argument="--seed" type="integer" label="Seed:" value="0" helph="Random seed."/> - - <param type="select" argument="--output_types" multiple="true" name="output_types" label="Desired outputs from sampling"> - <option value="mean" selected="true">Mean</option> - <option value="median" selected="true">Median</option> - <option value="quantiles" selected="true">Quantiles</option> - <option value="fluxes" selected="false">All fluxes</option> + <param type="select" argument="--output_types_analysis" multiple="true" name="output_types_analysis" label="Choose outputs from optimization"> + <option value="FVA" selected="true">FVA</option> + <option value="pFBA" selected="false">pFBA</option> + <option value="sensitivity" selected="false">Sensitivity reaction knock-out (Biomass)</option> </param> - <param type="select" argument="--output_types_analysis" multiple="true" name="output_types_analysis" label="Desired outputs from flux analysis"> - <option value="pFBA" selected="false">pFBA</option> - <option value="FVA" selected="false">FVA</option> - <option value="sensitivity" selected="false">Sensitivity reaction knock-out (Biomass)</option> - </param> + <conditional name="fva_params"> + <param name="show_fva_options" type="boolean" display="checkboxes" checked="false" label="Configure FVA parameters" help="Show additional FVA configuration options"/> + <when value="true"> + <param name="optimality_fraction" argument="--fva_optimality" type="float" label="FVA Optimality (fraction):" value="0.90" min="0.0" max="1.0" + help="Fraction of optimality for FVA analysis. 1.0 means the flux must be optimal, lower values allow suboptimal solutions."/> + </when> + <when value="false"> + <param name="optimality_fraction" argument="--fva_optimality" type="hidden" value="1.0"/> + </when> + </conditional> + </inputs> - <outputs> <data format="txt" name="log" label="Flux Simulation - Log" /> - <data name="output" format="tabular" label="Flux Simulation - Output"> - <discover_datasets pattern="__name_and_ext__" - directory="flux_simulation" visible="true" /> + <discover_datasets pattern="__name_and_ext__" directory="flux_simulation" visible="true" /> </data> - </outputs> <help> @@ -101,21 +158,21 @@ What it does ------------- -This tool generates flux samples starting from a model in JSON or XML format by using CBS (Corner-based sampling) or OPTGP (Improved Artificial Centering Hit-and-Run sampler) sampling algorithms. +This tool generates flux samples starting from metabolic models using CBS (Corner-based sampling) or OPTGP (Improved Artificial Centering Hit-and-Run sampler) algorithms. -It can return sampled fluxes by appliying summary statistics: +Two upload modes are supported: +1. **Model + bounds**: Upload one base model and multiple bound files (one per context/cell type) +2. **Multiple complete models**: Upload multiple complete model files, each with integrated bounds + +It can return sampled fluxes by applying summary statistics: - mean - median - - quantiles (0.25, 0.50, 0.75). + - quantiles (0.25, 0.50, 0.75) -Flux analysis can be perfomed over the metabolic model: - - parsimoniuos-FBA (optimized by Biomass) - - FVA - - Biomass sensitivity analysis (single reaction knock-out). It is the ratio between the optimal of the Biomass reaction computed by FBA after knocking-out a reaction and the same over the complete model. - -Accepted files: - - A model: JSON, XML, MAT or YAML (.yml) file reporting reactions and rules contained in the model. Supported compressed formats: .zip, .gz and .bz2. Filename must follow the pattern: {model_name}.{extension}.[zip|gz|bz2] - - Context-specific bounds: generated by RAS to Bounds tool. This can be a collection of bounds too (one bounds file per context). +Flux analysis can be performed over the metabolic model by using the objective function already set in the model. The following analyses are supported: + - parsimonious-FBA (optimized by Biomass) + - FVA (with configurable optimality percentage) + - Biomass sensitivity analysis (single reaction knock-out) Output: ------------- @@ -124,11 +181,14 @@ - Samples: reporting the sampled fluxes for each reaction (reaction names on the rows and sample names on the columns). Format: tab-separated. - a log file (.txt). -**TIP**: The Batches parameter is useful to mantain in memory just a batch of samples at time. For example, if you wish to sample 10.000 points, than it is suggested to select n_samples = 1.000 and n_batches=10. -**TIP**: The Thinning parameter of the OPTGP algorithm is useful to converge to a stationary distribution (see cited articles by Galuzzi, Milazzo and Damiani). +**TIP**: Bounds generated by RAStoBound are grouped in a collection. You can select collections by clicking on "Dataset Collection" option in the "Bound file(s):" input parameter. + +**TIP**: The Batches parameter helps maintain memory efficiency. For 10,000 samples, use n_samples=1,000 and n_batches=10. +**TIP**: The Thinning parameter for OPTGP helps converge to stationary distribution. + +**TIP**: FVA optimality percentage allows you to explore suboptimal flux ranges. 100% restricts to optimal solutions, while lower values (e.g., 90%) allow broader flux ranges. ]]> </help> <expand macro="citations_fluxes" /> - </tool> \ No newline at end of file
--- a/COBRAxy/flux_simulation_beta.py Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,605 +0,0 @@ -""" -Flux sampling and analysis utilities for COBRA models. - -This script supports two modes: -- Mode 1 (model_and_bounds=True): load a base model and apply bounds from - separate files before sampling. -- Mode 2 (model_and_bounds=False): load complete models and sample directly. - -Sampling algorithms supported: OPTGP and CBS. Outputs include flux samples -and optional analyses (pFBA, FVA, sensitivity), saved as tabular files. -""" - -import argparse -import utils.general_utils as utils -from typing import List -import os -import pandas as pd -import numpy as np -import cobra -import utils.CBS_backend as CBS_backend -from joblib import Parallel, delayed, cpu_count -from cobra.sampling import OptGPSampler -import sys -import utils.model_utils as model_utils - - -################################# process args ############################### -def process_args(args: List[str] = None) -> argparse.Namespace: - """ - Processes command-line arguments. - - Args: - args (list): List of command-line arguments. - - Returns: - Namespace: An object containing parsed arguments. - """ - parser = argparse.ArgumentParser(usage='%(prog)s [options]', - description='process some value\'s') - - parser.add_argument("-mo", "--model_upload", type=str, - help="path to input file with custom rules, if provided") - - parser.add_argument("-mab", "--model_and_bounds", type=str, - choices=['True', 'False'], - required=True, - help="upload mode: True for model+bounds, False for complete models") - - parser.add_argument("-ens", "--sampling_enabled", type=str, - choices=['true', 'false'], - required=True, - help="enable sampling: 'true' for sampling, 'false' for no sampling") - - parser.add_argument('-ol', '--out_log', - help="Output log") - - parser.add_argument('-td', '--tool_dir', - type=str, - required=True, - help='your tool directory') - - parser.add_argument('-in', '--input', - required=True, - type=str, - help='input bounds files or complete model files') - - parser.add_argument('-ni', '--name', - required=True, - type=str, - help='cell names') - - parser.add_argument('-a', '--algorithm', - type=str, - choices=['OPTGP', 'CBS'], - required=True, - help='choose sampling algorithm') - - parser.add_argument('-th', '--thinning', - type=int, - default=100, - required=True, - help='choose thinning') - - parser.add_argument('-ns', '--n_samples', - type=int, - required=True, - help='choose how many samples (set to 0 for optimization only)') - - parser.add_argument('-sd', '--seed', - type=int, - required=True, - help='seed for random number generation') - - parser.add_argument('-nb', '--n_batches', - type=int, - required=True, - help='choose how many batches') - - parser.add_argument('-opt', '--perc_opt', - type=float, - default=0.9, - required=False, - help='choose the fraction of optimality for FVA (0-1)') - - parser.add_argument('-ot', '--output_type', - type=str, - required=True, - help='output type for sampling results') - - parser.add_argument('-ota', '--output_type_analysis', - type=str, - required=False, - help='output type analysis (optimization methods)') - - parser.add_argument('-idop', '--output_path', - type=str, - default='flux_simulation', - help='output path for maps') - - ARGS = parser.parse_args(args) - return ARGS -########################### warning ########################################### -def warning(s :str) -> None: - """ - Log a warning message to an output log file and print it to the console. - - Args: - s (str): The warning message to be logged and printed. - - Returns: - None - """ - with open(ARGS.out_log, 'a') as log: - log.write(s + "\n\n") - print(s) - - -def write_to_file(dataset: pd.DataFrame, name: str, keep_index:bool=False)->None: - """ - Write a DataFrame to a TSV file under ARGS.output_path with a given base name. - - Args: - dataset: The DataFrame to write. - name: Base file name (without extension). - keep_index: Whether to keep the DataFrame index in the file. - - Returns: - None - """ - dataset.index.name = 'Reactions' - dataset.to_csv(ARGS.output_path + "/" + name + ".csv", sep = '\t', index = keep_index) - -############################ dataset input #################################### -def read_dataset(data :str, name :str) -> pd.DataFrame: - """ - Read a dataset from a CSV file and return it as a pandas DataFrame. - - Args: - data (str): Path to the CSV file containing the dataset. - name (str): Name of the dataset, used in error messages. - - Returns: - pandas.DataFrame: DataFrame containing the dataset. - - Raises: - pd.errors.EmptyDataError: If the CSV file is empty. - sys.exit: If the CSV file has the wrong format, the execution is aborted. - """ - try: - dataset = pd.read_csv(data, sep = '\t', header = 0, index_col=0, engine='python') - except pd.errors.EmptyDataError: - sys.exit('Execution aborted: wrong format of ' + name + '\n') - if len(dataset.columns) < 2: - sys.exit('Execution aborted: wrong format of ' + name + '\n') - return dataset - - - -def OPTGP_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, thinning: int = 100, n_batches: int = 1, seed: int = 0) -> None: - """ - Samples from the OPTGP (Optimal Global Perturbation) algorithm and saves the results to CSV files. - - Args: - model (cobra.Model): The COBRA model to sample from. - model_name (str): The name of the model, used in naming output files. - n_samples (int, optional): Number of samples per batch. Default is 1000. - thinning (int, optional): Thinning parameter for the sampler. Default is 100. - n_batches (int, optional): Number of batches to run. Default is 1. - seed (int, optional): Random seed for reproducibility. Default is 0. - - Returns: - None - """ - import numpy as np - - # Get reaction IDs for consistent column ordering - reaction_ids = [rxn.id for rxn in model.reactions] - - # Sample and save each batch as numpy file - for i in range(n_batches): - optgp = OptGPSampler(model, thinning, seed) - samples = optgp.sample(n_samples) - - # Save as numpy array (more memory efficient) - batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" - np.save(batch_filename, samples.to_numpy()) - - seed += 1 - - # Merge all batches into a single DataFrame - all_samples = [] - - for i in range(n_batches): - batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" - batch_data = np.load(batch_filename, allow_pickle=True) - all_samples.append(batch_data) - - # Concatenate all batches - samplesTotal_array = np.vstack(all_samples) - - # Convert back to DataFrame with proper column names - samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids) - - # Save the final merged result as CSV - write_to_file(samplesTotal.T, model_name, True) - - # Clean up temporary numpy files - for i in range(n_batches): - batch_filename = f"{ARGS.output_path}/{model_name}_{i}_OPTGP.npy" - if os.path.exists(batch_filename): - os.remove(batch_filename) - - -def CBS_sampler(model: cobra.Model, model_name: str, n_samples: int = 1000, n_batches: int = 1, seed: int = 0) -> None: - """ - Samples using the CBS (Constraint-based Sampling) algorithm and saves the results to CSV files. - - Args: - model (cobra.Model): The COBRA model to sample from. - model_name (str): The name of the model, used in naming output files. - n_samples (int, optional): Number of samples per batch. Default is 1000. - n_batches (int, optional): Number of batches to run. Default is 1. - seed (int, optional): Random seed for reproducibility. Default is 0. - - Returns: - None - """ - import numpy as np - - # Get reaction IDs for consistent column ordering - reaction_ids = [reaction.id for reaction in model.reactions] - - # Perform FVA analysis once for all batches - df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0).round(6) - - # Generate random objective functions for all samples across all batches - df_coefficients = CBS_backend.randomObjectiveFunction(model, n_samples * n_batches, df_FVA, seed=seed) - - # Sample and save each batch as numpy file - for i in range(n_batches): - samples = pd.DataFrame(columns=reaction_ids, index=range(n_samples)) - - try: - CBS_backend.randomObjectiveFunctionSampling( - model, - n_samples, - df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples], - samples - ) - except Exception as e: - utils.logWarning( - f"Warning: GLPK solver has failed for {model_name}. Trying with COBRA interface. Error: {str(e)}", - ARGS.out_log - ) - CBS_backend.randomObjectiveFunctionSampling_cobrapy( - model, - n_samples, - df_coefficients.iloc[:, i * n_samples:(i + 1) * n_samples], - samples - ) - - # Save as numpy array (more memory efficient) - batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" - utils.logWarning(batch_filename, ARGS.out_log) - np.save(batch_filename, samples.to_numpy()) - - # Merge all batches into a single DataFrame - all_samples = [] - - for i in range(n_batches): - batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" - batch_data = np.load(batch_filename, allow_pickle=True) - all_samples.append(batch_data) - - # Concatenate all batches - samplesTotal_array = np.vstack(all_samples) - - # Convert back to DataFrame with proper column namesq - samplesTotal = pd.DataFrame(samplesTotal_array, columns=reaction_ids) - - # Save the final merged result as CSV - write_to_file(samplesTotal.T, model_name, True) - - # Clean up temporary numpy files - for i in range(n_batches): - batch_filename = f"{ARGS.output_path}/{model_name}_{i}_CBS.npy" - if os.path.exists(batch_filename): - os.remove(batch_filename) - - - -def model_sampler_with_bounds(model_input_original: cobra.Model, bounds_path: str, cell_name: str) -> List[pd.DataFrame]: - """ - MODE 1: Prepares the model with bounds from separate bounds file and performs sampling. - - Args: - model_input_original (cobra.Model): The original COBRA model. - bounds_path (str): Path to the CSV file containing the bounds dataset. - cell_name (str): Name of the cell, used to generate filenames for output. - - Returns: - List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. - """ - - model_input = model_input_original.copy() - bounds_df = read_dataset(bounds_path, "bounds dataset") - - # Apply bounds to model - for rxn_index, row in bounds_df.iterrows(): - try: - model_input.reactions.get_by_id(rxn_index).lower_bound = row.lower_bound - model_input.reactions.get_by_id(rxn_index).upper_bound = row.upper_bound - except KeyError: - warning(f"Warning: Reaction {rxn_index} not found in model. Skipping.") - - return perform_sampling_and_analysis(model_input, cell_name) - - -def perform_sampling_and_analysis(model_input: cobra.Model, cell_name: str) -> List[pd.DataFrame]: - """ - Common function to perform sampling and analysis on a prepared model. - - Args: - model_input (cobra.Model): The prepared COBRA model with bounds applied. - cell_name (str): Name of the cell, used to generate filenames for output. - - Returns: - List[pd.DataFrame]: A list of DataFrames containing statistics and analysis results. - """ - - returnList = [] - - if ARGS.sampling_enabled == "true": - - if ARGS.algorithm == 'OPTGP': - OPTGP_sampler(model_input, cell_name, ARGS.n_samples, ARGS.thinning, ARGS.n_batches, ARGS.seed) - elif ARGS.algorithm == 'CBS': - CBS_sampler(model_input, cell_name, ARGS.n_samples, ARGS.n_batches, ARGS.seed) - - df_mean, df_median, df_quantiles = fluxes_statistics(cell_name, ARGS.output_types) - - if("fluxes" not in ARGS.output_types): - os.remove(ARGS.output_path + "/" + cell_name + '.csv') - - returnList = [df_mean, df_median, df_quantiles] - - df_pFBA, df_FVA, df_sensitivity = fluxes_analysis(model_input, cell_name, ARGS.output_type_analysis) - - if("pFBA" in ARGS.output_type_analysis): - returnList.append(df_pFBA) - if("FVA" in ARGS.output_type_analysis): - returnList.append(df_FVA) - if("sensitivity" in ARGS.output_type_analysis): - returnList.append(df_sensitivity) - - return returnList - -def fluxes_statistics(model_name: str, output_types:List)-> List[pd.DataFrame]: - """ - Computes statistics (mean, median, quantiles) for the fluxes. - - Args: - model_name (str): Name of the model, used in filename for input. - output_types (List[str]): Types of statistics to compute (mean, median, quantiles). - - Returns: - List[pd.DataFrame]: List of DataFrames containing mean, median, and quantiles statistics. - """ - - df_mean = pd.DataFrame() - df_median= pd.DataFrame() - df_quantiles= pd.DataFrame() - - df_samples = pd.read_csv(ARGS.output_path + "/" + model_name + '.csv', sep = '\t', index_col = 0).T - df_samples = df_samples.round(8) - - for output_type in output_types: - if(output_type == "mean"): - df_mean = df_samples.mean() - df_mean = df_mean.to_frame().T - df_mean = df_mean.reset_index(drop=True) - df_mean.index = [model_name] - elif(output_type == "median"): - df_median = df_samples.median() - df_median = df_median.to_frame().T - df_median = df_median.reset_index(drop=True) - df_median.index = [model_name] - elif(output_type == "quantiles"): - newRow = [] - cols = [] - for rxn in df_samples.columns: - quantiles = df_samples[rxn].quantile([0.25, 0.50, 0.75]) - newRow.append(quantiles[0.25]) - cols.append(rxn + "_q1") - newRow.append(quantiles[0.5]) - cols.append(rxn + "_q2") - newRow.append(quantiles[0.75]) - cols.append(rxn + "_q3") - df_quantiles = pd.DataFrame(columns=cols) - df_quantiles.loc[0] = newRow - df_quantiles = df_quantiles.reset_index(drop=True) - df_quantiles.index = [model_name] - - return df_mean, df_median, df_quantiles - -def fluxes_analysis(model:cobra.Model, model_name:str, output_types:List)-> List[pd.DataFrame]: - """ - Performs flux analysis including pFBA, FVA, and sensitivity analysis. The objective function - is assumed to be already set in the model. - - Args: - model (cobra.Model): The COBRA model to analyze. - model_name (str): Name of the model, used in filenames for output. - output_types (List[str]): Types of analysis to perform (pFBA, FVA, sensitivity). - - Returns: - List[pd.DataFrame]: List of DataFrames containing pFBA, FVA, and sensitivity analysis results. - """ - - df_pFBA = pd.DataFrame() - df_FVA= pd.DataFrame() - df_sensitivity= pd.DataFrame() - - for output_type in output_types: - if(output_type == "pFBA"): - solution = cobra.flux_analysis.pfba(model) - fluxes = solution.fluxes - df_pFBA.loc[0,[rxn.id for rxn in model.reactions]] = fluxes.tolist() - df_pFBA = df_pFBA.reset_index(drop=True) - df_pFBA.index = [model_name] - df_pFBA = df_pFBA.astype(float).round(6) - elif(output_type == "FVA"): - fva = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=ARGS.perc_opt, processes=1).round(8) - columns = [] - for rxn in fva.index.to_list(): - columns.append(rxn + "_min") - columns.append(rxn + "_max") - df_FVA= pd.DataFrame(columns = columns) - for index_rxn, row in fva.iterrows(): - df_FVA.loc[0, index_rxn+ "_min"] = fva.loc[index_rxn, "minimum"] - df_FVA.loc[0, index_rxn+ "_max"] = fva.loc[index_rxn, "maximum"] - df_FVA = df_FVA.reset_index(drop=True) - df_FVA.index = [model_name] - df_FVA = df_FVA.astype(float).round(6) - elif(output_type == "sensitivity"): - solution_original = model.optimize().objective_value - reactions = model.reactions - single = cobra.flux_analysis.single_reaction_deletion(model) - newRow = [] - df_sensitivity = pd.DataFrame(columns = [rxn.id for rxn in reactions], index = [model_name]) - for rxn in reactions: - newRow.append(single.knockout[rxn.id].growth.values[0]/solution_original) - df_sensitivity.loc[model_name] = newRow - df_sensitivity = df_sensitivity.astype(float).round(6) - return df_pFBA, df_FVA, df_sensitivity - -############################# main ########################################### -def main(args: List[str] = None) -> None: - """ - Initialize and run sampling/analysis based on the frontend input arguments. - - Returns: - None - """ - - num_processors = max(1, cpu_count() - 1) - - global ARGS - ARGS = process_args(args) - - if not os.path.exists(ARGS.output_path): - os.makedirs(ARGS.output_path) - - # --- Normalize inputs (the tool may pass comma-separated --input and either --name or --names) --- - ARGS.input_files = ARGS.input.split(",") if ARGS.input else [] - ARGS.file_names = ARGS.name.split(",") - # output types (required) -> list - ARGS.output_types = ARGS.output_type.split(",") if ARGS.output_type else [] - # optional analysis output types -> list or empty - ARGS.output_type_analysis = ARGS.output_type_analysis.split(",") if ARGS.output_type_analysis else [] - - # Determine if sampling should be performed - if ARGS.sampling_enabled == "true": - perform_sampling = True - else: - perform_sampling = False - - print("=== INPUT FILES ===") - print(f"{ARGS.input_files}") - print(f"{ARGS.file_names}") - print(f"{ARGS.output_type}") - print(f"{ARGS.output_types}") - print(f"{ARGS.output_type_analysis}") - print(f"Sampling enabled: {perform_sampling} (n_samples: {ARGS.n_samples})") - - if ARGS.model_and_bounds == "True": - # MODE 1: Model + bounds (separate files) - print("=== MODE 1: Model + Bounds (separate files) ===") - - # Load base model - if not ARGS.model_upload: - sys.exit("Error: model_upload is required for Mode 1") - - base_model = model_utils.build_cobra_model_from_csv(ARGS.model_upload) - - validation = model_utils.validate_model(base_model) - - print("\n=== MODEL VALIDATION ===") - for key, value in validation.items(): - print(f"{key}: {value}") - - # Set solver verbosity to 1 to see warning and error messages only. - base_model.solver.configuration.verbosity = 1 - - # Process each bounds file with the base model - results = Parallel(n_jobs=num_processors)( - delayed(model_sampler_with_bounds)(base_model, bounds_file, cell_name) - for bounds_file, cell_name in zip(ARGS.input_files, ARGS.file_names) - ) - - else: - # MODE 2: Multiple complete models - print("=== MODE 2: Multiple complete models ===") - - # Process each complete model file - results = Parallel(n_jobs=num_processors)( - delayed(perform_sampling_and_analysis)(model_utils.build_cobra_model_from_csv(model_file), cell_name) - for model_file, cell_name in zip(ARGS.input_files, ARGS.file_names) - ) - - # Handle sampling outputs (only if sampling was performed) - if perform_sampling: - print("=== PROCESSING SAMPLING RESULTS ===") - - all_mean = pd.concat([result[0] for result in results], ignore_index=False) - all_median = pd.concat([result[1] for result in results], ignore_index=False) - all_quantiles = pd.concat([result[2] for result in results], ignore_index=False) - - if "mean" in ARGS.output_types: - all_mean = all_mean.fillna(0.0) - all_mean = all_mean.sort_index() - write_to_file(all_mean.T, "mean", True) - - if "median" in ARGS.output_types: - all_median = all_median.fillna(0.0) - all_median = all_median.sort_index() - write_to_file(all_median.T, "median", True) - - if "quantiles" in ARGS.output_types: - all_quantiles = all_quantiles.fillna(0.0) - all_quantiles = all_quantiles.sort_index() - write_to_file(all_quantiles.T, "quantiles", True) - else: - print("=== SAMPLING SKIPPED (n_samples = 0 or sampling disabled) ===") - - # Handle optimization analysis outputs (always available) - print("=== PROCESSING OPTIMIZATION RESULTS ===") - - # Determine the starting index for optimization results - # If sampling was performed, optimization results start at index 3 - # If no sampling, optimization results start at index 0 - index_result = 3 if perform_sampling else 0 - - if "pFBA" in ARGS.output_type_analysis: - all_pFBA = pd.concat([result[index_result] for result in results], ignore_index=False) - all_pFBA = all_pFBA.sort_index() - write_to_file(all_pFBA.T, "pFBA", True) - index_result += 1 - - if "FVA" in ARGS.output_type_analysis: - all_FVA = pd.concat([result[index_result] for result in results], ignore_index=False) - all_FVA = all_FVA.sort_index() - write_to_file(all_FVA.T, "FVA", True) - index_result += 1 - - if "sensitivity" in ARGS.output_type_analysis: - all_sensitivity = pd.concat([result[index_result] for result in results], ignore_index=False) - all_sensitivity = all_sensitivity.sort_index() - write_to_file(all_sensitivity.T, "sensitivity", True) - - return - -############################################################################## -if __name__ == "__main__": - main() \ No newline at end of file
--- a/COBRAxy/flux_simulation_beta.xml Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,194 +0,0 @@ -<tool id="fluxSimulation - Beta" name="Flux Simulation - BETA" version="2.0.0"> - - <macros> - <import>marea_macros.xml</import> - </macros> - - <requirements> - <requirement type="package" version="1.24.4">numpy</requirement> - <requirement type="package" version="2.0.3">pandas</requirement> - <requirement type="package" version="0.29.0">cobra</requirement> - <requirement type="package" version="5.2.2">lxml</requirement> - <requirement type="package" version="1.4.2">joblib</requirement> - <requirement type="package" version="1.11">scipy</requirement> - </requirements> - - <command detect_errors="exit_code"> - <![CDATA[ - python $__tool_directory__/flux_simulation_beta.py - --tool_dir $__tool_directory__ - --model_and_bounds $model_and_bounds.model_and_bounds - - #if $model_and_bounds.model_and_bounds == 'True': - --model_upload $model_and_bounds.model_upload - --input "${",".join(map(str, $model_and_bounds.inputs))}" - #set $names = "" - #for $input_temp in $model_and_bounds.inputs: - #set $names = $names + $input_temp.element_identifier + "," - #end for - --name $names - #else: - --input "${",".join(map(str, $model_and_bounds.model_files))}" - #set $names = "" - #for $input_temp in $model_and_bounds.model_files: - #set $names = $names + $input_temp.element_identifier + "," - #end for - --name $names - #end if - - --sampling_enabled $sampling_params.sampling_enabled - - #if $sampling_params.sampling_enabled == 'true': - --thinning 0 - #if $sampling_params.algorithm_param.algorithm == 'OPTGP': - --thinning $sampling_params.algorithm_param.thinning - #end if - --algorithm $sampling_params.algorithm_param.algorithm - --n_batches $sampling_params.n_batches - --n_samples $sampling_params.n_samples - --seed $sampling_params.seed - --output_type "${",".join(map(str, $sampling_params.output_types))}" - #else: - --thinning 0 - --algorithm 'CBS' - --n_batches 1 - --n_samples 1 - --seed 0 - --output_type 'mean' - #end if - - --output_type_analysis "${",".join(map(str, $output_types_analysis))}" - - #if 'FVA' in str($output_types_analysis): - --perc_opt $fva_params.optimality_fraction - #end if - - --out_log $log - ]]> - </command> - - <inputs> - <conditional name="model_and_bounds"> - <param name="model_and_bounds" argument="--model_and_bounds" type="select" label="Upload mode:" help="Choose whether to upload the model and bounds in separate files or to upload multiple complete model files."> - <option value="True" selected="true">Model + bounds (separate files)</option> - <option value="False">Multiple complete models</option> - </param> - - <when value="True"> - <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular" - label="Model (rules) file:" - help="Upload a CSV/TSV file that contains the model reaction rules. Recommended columns: ReactionID, Reaction (formula), Rule (GPR). Optional columns: name, lower_bound, upper_bound, InMedium. If bounds are present here they may be overridden by separate bound files." /> - - <param name="inputs" argument="--inputs" multiple="true" type="data" format="tabular,csv,tsv" - label="Bound file(s):" - help="Upload one or more CSV/TSV files containing reaction bounds. Each file must include at least: ReactionID, lower_bound, upper_bound. Files are applied in the order provided; later files override earlier ones for the same ReactionID." /> - </when> - - <when value="False"> - <param name="model_files" argument="--model_files" multiple="true" type="data" format="csv,tsv,tabular" - label="Complete model files:" - help="Upload one or more CSV/TSV files, each containing both model rules and reaction bounds for different contexts/cells. Required columns: ReactionID, Reaction, Rule, lower_bound, upper_bound." /> - </when> - </conditional> - - <conditional name="sampling_params"> - <param name="sampling_enabled" argument="--sampling_enabled" type="boolean" display="checkboxes" checked="false" label="Enable sampling" help="Enable flux sampling"/> - - <when value="true"> - <conditional name="algorithm_param"> - <param name="algorithm" argument="--algorithm" type="select" label="Choose sampling algorithm:"> - <option value="CBS" selected="true">CBS</option> - <option value="OPTGP">OPTGP</option> - </param> - <when value="OPTGP"> - <param name="thinning" argument="--thinning" type="integer" label="Thinning:" value="100" help="Number of iterations to wait before taking a sample."/> - </when> - </conditional> - - <param name="n_samples" argument="--n_samples" type="integer" label="Samples:" value="1000" min="1" max="1000"/> - <param name="n_batches" argument="--n_batches" type="integer" label="Batches:" value="1" help="This is useful for computational performances."/> - <param name="seed" argument="--seed" type="integer" label="Seed:" value="0" help="Random seed."/> - - <param type="select" argument="--output_types" multiple="true" name="output_types" label="Choose outputs from sampling"> - <option value="mean" selected="true">Mean</option> - <option value="median" selected="true">Median</option> - <option value="quantiles" selected="true">Quantiles</option> - <option value="fluxes" selected="false">All fluxes</option> - </param> - </when> - - <when value="false"> - <!-- Hidden parameters when sampling is disabled --> - <param name="algorithm" type="hidden" value="CBS"/> - <param name="n_samples" type="hidden" value="1000"/> - <param name="n_batches" type="hidden" value="1"/> - <param name="seed" type="hidden" value="0"/> - <param name="output_types" type="hidden" value="mean"/> - </when> - </conditional> - - <param type="select" argument="--output_types_analysis" multiple="true" name="output_types_analysis" label="Choose outputs from optimization"> - <option value="FVA" selected="true">FVA</option> - <option value="pFBA" selected="false">pFBA</option> - <option value="sensitivity" selected="false">Sensitivity reaction knock-out (Biomass)</option> - </param> - - <conditional name="fva_params"> - <param name="show_fva_options" type="boolean" display="checkboxes" checked="false" label="Configure FVA parameters" help="Show additional FVA configuration options"/> - <when value="true"> - <param name="optimality_fraction" argument="--fva_optimality" type="float" label="FVA Optimality (fraction):" value="0.90" min="0.0" max="1.0" - help="Fraction of optimality for FVA analysis. 1.0 means the flux must be optimal, lower values allow suboptimal solutions."/> - </when> - <when value="false"> - <param name="optimality_fraction" argument="--fva_optimality" type="hidden" value="1.0"/> - </when> - </conditional> - - </inputs> - - <outputs> - <data format="txt" name="log" label="Flux Simulation - Log" /> - <data name="output" format="tabular" label="Flux Simulation - Output"> - <discover_datasets pattern="__name_and_ext__" directory="flux_simulation" visible="true" /> - </data> - </outputs> - - <help> - <![CDATA[ -What it does -------------- - -This tool generates flux samples starting from metabolic models using CBS (Corner-based sampling) or OPTGP (Improved Artificial Centering Hit-and-Run sampler) algorithms. - -Two upload modes are supported: -1. **Model + bounds**: Upload one base model and multiple bound files (one per context/cell type) -2. **Multiple complete models**: Upload multiple complete model files, each with integrated bounds - -It can return sampled fluxes by applying summary statistics: - - mean - - median - - quantiles (0.25, 0.50, 0.75) - -Flux analysis can be performed over the metabolic model by using the objective function already set in the model. The following analyses are supported: - - parsimonious-FBA (optimized by Biomass) - - FVA (with configurable optimality percentage) - - Biomass sensitivity analysis (single reaction knock-out) - -Output: -------------- - -The tool generates: - - Samples: reporting the sampled fluxes for each reaction (reaction names on the rows and sample names on the columns). Format: tab-separated. - - a log file (.txt). - -**TIP**: Bounds generated by RAStoBound are grouped in a collection. You can select collections by clicking on "Dataset Collection" option in the "Bound file(s):" input parameter. - -**TIP**: The Batches parameter helps maintain memory efficiency. For 10,000 samples, use n_samples=1,000 and n_batches=10. - -**TIP**: The Thinning parameter for OPTGP helps converge to stationary distribution. - -**TIP**: FVA optimality percentage allows you to explore suboptimal flux ranges. 100% restricts to optimal solutions, while lower values (e.g., 90%) allow broader flux ranges. -]]> - </help> - <expand macro="citations_fluxes" /> -</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COBRAxy/fromCSVtoCOBRA.py Mon Sep 29 10:33:26 2025 +0000 @@ -0,0 +1,112 @@ +""" +Convert a tabular (CSV/TSV/Tabular) description of a COBRA model into a COBRA file. + +Supported output formats: SBML, JSON, MATLAB (.mat), YAML. +The script logs to a user-provided file for easier debugging in Galaxy. +""" + +import os +import cobra +import argparse +from typing import List +import logging +import utils.model_utils as modelUtils + +ARGS : argparse.Namespace +def process_args(args: List[str] = None) -> argparse.Namespace: + """ + Parse command-line arguments for the CSV-to-COBRA conversion tool. + + Returns: + argparse.Namespace: Parsed arguments. + """ + parser = argparse.ArgumentParser( + usage="%(prog)s [options]", + description="Convert a tabular/CSV file to a COBRA model" + ) + + + parser.add_argument("--out_log", type=str, required=True, + help="Output log file") + + + parser.add_argument("--input", type=str, required=True, + help="Input tabular file (CSV/TSV)") + + + parser.add_argument("--format", type=str, required=True, choices=["sbml", "json", "mat", "yaml"], + help="Model format (SBML, JSON, MATLAB, YAML)") + + + parser.add_argument("--output", type=str, required=True, + help="Output model file path") + + + parser.add_argument("--tool_dir", type=str, default=os.path.dirname(__file__), + help="Tool directory (passed from Galaxy as $__tool_directory__)") + + + return parser.parse_args(args) + + +###############################- ENTRY POINT -################################ + +def main(args: List[str] = None) -> None: + """ + Entry point: parse arguments, build the COBRA model from a CSV/TSV file, + and save it in the requested format. + + Returns: + None + """ + global ARGS + ARGS = process_args(args) + + # configure logging to the requested log file (overwrite each run) + logging.basicConfig(filename=ARGS.out_log, + level=logging.DEBUG, + format='%(asctime)s %(levelname)s: %(message)s', + filemode='w') + + logging.info('Starting fromCSVtoCOBRA tool') + logging.debug('Args: input=%s format=%s output=%s tool_dir=%s', ARGS.input, ARGS.format, ARGS.output, ARGS.tool_dir) + + try: + # Basic sanity checks + if not os.path.exists(ARGS.input): + logging.error('Input file not found: %s', ARGS.input) + + out_dir = os.path.dirname(os.path.abspath(ARGS.output)) + + if out_dir and not os.path.isdir(out_dir): + try: + os.makedirs(out_dir, exist_ok=True) + logging.info('Created missing output directory: %s', out_dir) + except Exception as e: + logging.exception('Cannot create output directory: %s', out_dir) + + model = modelUtils.build_cobra_model_from_csv(ARGS.input) + + # Save model in requested format + if ARGS.format == "sbml": + cobra.io.write_sbml_model(model, ARGS.output) + elif ARGS.format == "json": + cobra.io.save_json_model(model, ARGS.output) + elif ARGS.format == "mat": + cobra.io.save_matlab_model(model, ARGS.output) + elif ARGS.format == "yaml": + cobra.io.save_yaml_model(model, ARGS.output) + else: + logging.error('Unknown format requested: %s', ARGS.format) + print(f"ERROR: Unknown format: {ARGS.format}") + + + logging.info('Model successfully written to %s (format=%s)', ARGS.output, ARGS.format) + + except Exception: + # Log full traceback to the out_log so Galaxy users/admins can see what happened + logging.exception('Unhandled exception in fromCSVtoCOBRA') + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COBRAxy/fromCSVtoCOBRA.xml Mon Sep 29 10:33:26 2025 +0000 @@ -0,0 +1,69 @@ +<tool id="fromCSVtoCOBRA" name="fromCSVtoCOBRA" version="1.0.0"> + <description>Convert a tabular dataset to a COBRA model</description> + + <!-- Python dependencies required for COBRApy --> + <requirements> + <requirement type="package" version="0.29.0">cobra</requirement> + <requirement type="package" version="1.24.4">numpy</requirement> + <requirement type="package" version="2.0.3">pandas</requirement> + <requirement type="package" version="5.2.2">lxml</requirement> + </requirements> + + <!-- Import shared macros if available --> + <macros> + <import>marea_macros.xml</import> + </macros> + + <!-- Command to run the Python script --> + <command detect_errors="exit_code"><![CDATA[ + python $__tool_directory__/fromCSVtoCOBRA.py + --tool_dir $__tool_directory__ + --input $input + --format $format + --output $output + --out_log $log + ]]></command> + + <!-- Tool inputs --> + <inputs> + <param name="input" type="data" format="tabular,csv,tsv" label="Input table"/> + <param name="format" type="select" label="Output COBRA model format"> + <option value="sbml" selected="true">SBML (.xml)</option> + <option value="json">JSON (.json)</option> + <option value="mat">MATLAB (.mat)</option> + <option value="yaml">YAML (.yml)</option> + </param> + </inputs> + + <!-- Tool outputs --> + <outputs> + <data name="log" format="txt" label="fromcsvtocobra - Log" /> + <data name="output" format="xml" label="COBRA model"> + <change_format> + <when input="format" value="sbml" format="xml"/> + <when input="format" value="json" format="json"/> + <when input="format" value="mat" format="mat"/> + <when input="format" value="yaml" format="yaml"/> + </change_format> + </data> + </outputs> + + <!-- Help section --> + <help><![CDATA[ +This tool converts a tabular dataset into a COBRA model using COBRApy. + +**Input** +- A tabular/CSV/TSV file describing reactions, metabolites, or stoichiometry. + +**Output** +- A COBRA model in the chosen format: + - SBML (.xml) + - JSON (.json) + - MATLAB (.mat) + - YAML (.yml) + +**Notes** +- The exact table structure (columns required) depends on how you want to encode reactions and metabolites. +- You can extend the Python script to parse specific column formats. + ]]></help> +</tool>
--- a/COBRAxy/fromCSVtoCOBRA_beta.py Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -""" -Convert a tabular (CSV/TSV/Tabular) description of a COBRA model into a COBRA file. - -Supported output formats: SBML, JSON, MATLAB (.mat), YAML. -The script logs to a user-provided file for easier debugging in Galaxy. -""" - -import os -import cobra -import argparse -from typing import List -import logging -import utils.model_utils as modelUtils - -ARGS : argparse.Namespace -def process_args(args: List[str] = None) -> argparse.Namespace: - """ - Parse command-line arguments for the CSV-to-COBRA conversion tool. - - Returns: - argparse.Namespace: Parsed arguments. - """ - parser = argparse.ArgumentParser( - usage="%(prog)s [options]", - description="Convert a tabular/CSV file to a COBRA model" - ) - - - parser.add_argument("--out_log", type=str, required=True, - help="Output log file") - - - parser.add_argument("--input", type=str, required=True, - help="Input tabular file (CSV/TSV)") - - - parser.add_argument("--format", type=str, required=True, choices=["sbml", "json", "mat", "yaml"], - help="Model format (SBML, JSON, MATLAB, YAML)") - - - parser.add_argument("--output", type=str, required=True, - help="Output model file path") - - - parser.add_argument("--tool_dir", type=str, default=os.path.dirname(__file__), - help="Tool directory (passed from Galaxy as $__tool_directory__)") - - - return parser.parse_args(args) - - -###############################- ENTRY POINT -################################ - -def main(args: List[str] = None) -> None: - """ - Entry point: parse arguments, build the COBRA model from a CSV/TSV file, - and save it in the requested format. - - Returns: - None - """ - global ARGS - ARGS = process_args(args) - - # configure logging to the requested log file (overwrite each run) - logging.basicConfig(filename=ARGS.out_log, - level=logging.DEBUG, - format='%(asctime)s %(levelname)s: %(message)s', - filemode='w') - - logging.info('Starting fromCSVtoCOBRA tool') - logging.debug('Args: input=%s format=%s output=%s tool_dir=%s', ARGS.input, ARGS.format, ARGS.output, ARGS.tool_dir) - - try: - # Basic sanity checks - if not os.path.exists(ARGS.input): - logging.error('Input file not found: %s', ARGS.input) - - out_dir = os.path.dirname(os.path.abspath(ARGS.output)) - - if out_dir and not os.path.isdir(out_dir): - try: - os.makedirs(out_dir, exist_ok=True) - logging.info('Created missing output directory: %s', out_dir) - except Exception as e: - logging.exception('Cannot create output directory: %s', out_dir) - - model = modelUtils.build_cobra_model_from_csv(ARGS.input) - - # Save model in requested format - if ARGS.format == "sbml": - cobra.io.write_sbml_model(model, ARGS.output) - elif ARGS.format == "json": - cobra.io.save_json_model(model, ARGS.output) - elif ARGS.format == "mat": - cobra.io.save_matlab_model(model, ARGS.output) - elif ARGS.format == "yaml": - cobra.io.save_yaml_model(model, ARGS.output) - else: - logging.error('Unknown format requested: %s', ARGS.format) - print(f"ERROR: Unknown format: {ARGS.format}") - - - logging.info('Model successfully written to %s (format=%s)', ARGS.output, ARGS.format) - - except Exception: - # Log full traceback to the out_log so Galaxy users/admins can see what happened - logging.exception('Unhandled exception in fromCSVtoCOBRA') - - -if __name__ == '__main__': - main()
--- a/COBRAxy/fromCSVtoCOBRA_beta.xml Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -<tool id="fromCSVtoCOBRA_beta" name="fromCSVtoCOBRA - Beta" version="1.0.0"> - <description>Convert a tabular dataset to a COBRA model</description> - - <!-- Python dependencies required for COBRApy --> - <requirements> - <requirement type="package" version="0.29.0">cobra</requirement> - <requirement type="package" version="1.24.4">numpy</requirement> - <requirement type="package" version="2.0.3">pandas</requirement> - <requirement type="package" version="5.2.2">lxml</requirement> - </requirements> - - <!-- Import shared macros if available --> - <macros> - <import>marea_macros.xml</import> - </macros> - - <!-- Command to run the Python script --> - <command detect_errors="exit_code"><![CDATA[ - python $__tool_directory__/fromCSVtoCOBRA_beta.py - --tool_dir $__tool_directory__ - --input $input - --format $format - --output $output - --out_log $log - ]]></command> - - <!-- Tool inputs --> - <inputs> - <param name="input" type="data" format="tabular,csv,tsv" label="Input table"/> - <param name="format" type="select" label="Output COBRA model format"> - <option value="sbml" selected="true">SBML (.xml)</option> - <option value="json">JSON (.json)</option> - <option value="mat">MATLAB (.mat)</option> - <option value="yaml">YAML (.yml)</option> - </param> - </inputs> - - <!-- Tool outputs --> - <outputs> - <data name="log" format="txt" label="fromcsvtocobra - Log" /> - <data name="output" format="xml" label="COBRA model"> - <change_format> - <when input="format" value="sbml" format="xml"/> - <when input="format" value="json" format="json"/> - <when input="format" value="mat" format="mat"/> - <when input="format" value="yaml" format="yaml"/> - </change_format> - </data> - </outputs> - - <!-- Help section --> - <help><![CDATA[ -This tool converts a tabular dataset into a COBRA model using COBRApy. - -**Input** -- A tabular/CSV/TSV file describing reactions, metabolites, or stoichiometry. - -**Output** -- A COBRA model in the chosen format: - - SBML (.xml) - - JSON (.json) - - MATLAB (.mat) - - YAML (.yml) - -**Notes** -- The exact table structure (columns required) depends on how you want to encode reactions and metabolites. -- You can extend the Python script to parse specific column formats. - ]]></help> -</tool>
--- a/COBRAxy/local/medium/medium.csv Tue Sep 23 13:48:24 2025 +0000 +++ b/COBRAxy/local/medium/medium.csv Mon Sep 29 10:33:26 2025 +0000 @@ -1,37 +1,37 @@ -engro2_name,RPMI 1640,DMEM,EMEM,adv DMEM:F12 = 1:1,DMEM:F12 = 1:1,McCoy's 5A,IMDM,MEM,GMEM,Leibovitz's L-15,F12,F10,AMEM,Waymouth MB 7521 medium,F12K,William's E Medium,Medium 199,MCDB 105,NEAA,mIVC1:adv DMEM:F12 = 1:1,E8 ess,RPMI:F12 = 1:1,RPMI:MEM = 1:1,RPMI:EMEM = 1:1,EMEM:F12 = 1:1,DMEM:RPMI = 2:1,DMEM:IMDM = 1:1,MCDB 105:Medium 199 = 1:1,allOpen,mIVC1:adv DMEM:F12 = 1:1 open,E8 ess open,open 10,E8 open 10 -EX_Lcystin_e,0.20766774,0.20127796,0.09904154,0.09996805,0.09996805,0.0,0.29201278,0.09904154,0.09904154,0.0,0.0,0.0,0.09904154,0.0625,0.0,0.08329073,0.108333334,0.0,0.0,0.09996805,0.09996805,0.10383387,0.15335464,0.15335464,0.04952077,0.2034078866666666,0.24664537,0.054166667,1000,1000,1000,10.0,10.0 -EX_ala__L_e,0.0,0.0,0.0,0.049999997,0.049999997,0.15617977,0.28089887,0.0,0.0,2.52809,0.099999994,0.101123594,0.28089887,0.0,0.20224719,1.011236,0.28089887,0.030337078,10.0,0.049999997,0.049999997,0.049999997,0.0,0.0,0.049999997,0.0,0.140449435,0.155617974,1000,1000,1000,10.0,10.0 -EX_arg__L_e,1.1494253,0.39810428,0.5971564,0.69905216,0.69905216,0.19952606,0.39810428,0.5971564,0.19905214,2.8735633,1.0,1.0,0.49763033,0.35545024,2.0,0.28735632,0.33175355,0.29952607,0.0,0.69905216,0.69905216,1.07471265,0.8732908500000001,0.8732908500000001,0.7985782,0.64854462,0.39810428,0.3156398099999999,1000,1000,1000,10.0,10.0 -EX_asn__L_e,0.37878788,0.0,0.0,0.05,0.05,0.3409091,0.18939394,0.0,0.0,1.8939394,0.10006667,0.1,0.33333334,0.0,0.2,0.13333334,0.0,0.1,10.0,0.05,0.05,0.239427275,0.18939394,0.18939394,0.050033335,0.1262626266666666,0.09469697,0.05,1000,1000,1000,10.0,10.0 -EX_asp__L_e,0.15037593,0.0,0.0,0.05,0.05,0.15015037,0.22556391,0.0,0.0,0.0,0.1,0.09774436,0.22556391,0.45112783,0.2,0.22556391,0.22556391,0.1,10.0,0.05,0.05,0.125187965,0.075187965,0.075187965,0.05,0.05012531,0.112781955,0.162781955,1000,1000,1000,10.0,10.0 -EX_chsterol_e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00051679584,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00025839792,1000,0,0,10.0,0.0 -EX_cys__L_e,0.0,0.0,0.0,0.09977272,0.09977272,0.2603306,0.0,0.0,0.0,0.9917355,0.19954544,0.20661157,0.5681818,0.5041322,0.39772728,0.3305785,0.0005681818,0.0,0.0,0.12477272,0.09977272,0.09977272,0.0,0.0,0.09977272,0.0,0.0,0.0002840909,1000,1000,1000,10.0,10.0 -EX_fol_e,0.0022675737,0.009070295,0.0022675737,0.0060090707,0.0060090707,0.022675738,0.009070295,0.0022675737,0.0045351475,0.0022675737,0.0029478457,0.0029478457,0.0022675737,0.0011337869,0.0029478457,0.0022675737,2.2675737e-05,0.001171875,0.0,0.0060090707,0.0060090707,0.0026077097,0.0022675737,0.0022675737,0.0026077097,0.0068027212333333,0.009070295,0.0005972753685,1000,1000,1000,10.0,10.0 -EX_gal_e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1000,0,0,10.0,0.0 -EX_glc__D_e,11.111111,25.0,5.5555553,17.505556,17.505556,16.666666,25.0,5.5555553,25.0,0.0,10.011111,6.111111,5.5555553,27.777779,7.0,11.111111,5.5555553,5.5555553,0.0,17.505556,17.505556,10.561111,8.33333315,8.33333315,7.78333315,20.370370333333334,25.0,5.5555553,1000,1000,1000,10.0,10.0 -EX_gln__L_e,2.0547945,3.9726028,2.0,0.0,2.5,1.5013698,4.0,2.0,2.0,2.0547945,1.0,1.0,2.0,2.3972602,2.0,0.0,0.6849315,0.0,0.0,2.0,2.5,1.52739725,2.02739725,2.02739725,1.5,3.333333366666667,3.9863014,0.34246575,1000,1000,1000,10.0,10.0 -EX_glu__L_e,0.13605443,0.0,0.0,0.05,0.05,0.15034014,0.5102041,0.0,0.0,0.0,0.1,0.1,0.5102041,1.0204082,0.19727892,0.34013605,0.5102041,0.029931974,10.0,0.05,0.05,0.118027215,0.068027215,0.068027215,0.05,0.0453514766666666,0.25510205,0.270068037,1000,1000,1000,10.0,10.0 -EX_gly_e,0.13333334,0.4,0.0,0.25,0.25,0.1,0.4,0.0,0.0,2.6666667,0.1,0.1,0.6666667,0.6666667,0.2,0.6666667,0.6666667,0.030666666,10.0,0.25,0.25,0.11666667,0.06666667,0.06666667,0.05,0.3111111133333333,0.4,0.348666683,1000,1000,1000,10.0,10.0 -EX_gthrd_e,0.0032573289,0.0,0.0,0.0,0.0,0.0016286644,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048859935,0.0,0.00016286645,0.00016286645,0.0,0.0,0.0,0.0,0.00162866445,0.00162866445,0.00162866445,0.0,0.0010857763,0.0,8.1433225e-05,1000,0,0,10.0,0.0 -EX_hdca_e,0.0,0.0,0.0,0.00014999999,0.00014999999,0.0,0.0,0.0,0.0,0.0,0.00029999999,0.0,0.0,0.0,0.0,0.00010169491,0.0,0.0,0.0,0.00014999999,0.00014999999,0.000149999995,0.0,0.0,0.000149999995,0.0,0.0,0.0,1000,1000,1000,10.0,10.0 -EX_his__L_e,0.09677419,0.2,0.2,0.14990476,0.14990476,0.09980952,0.2,0.2,0.1,1.6129032,0.1,0.10952381,0.2,0.82580644,0.21809523,0.09677419,0.10419047,0.2,0.0,0.14990476,0.14990476,0.098387095,0.148387095,0.148387095,0.15,0.1655913966666666,0.2,0.152095235,1000,1000,1000,10.0,10.0 -EX_ile__L_e,0.3816794,0.8015267,0.39694658,0.41580153,0.41580153,0.300458,0.8015267,0.39694658,0.39694658,1.908397,0.030534351,0.019847328,0.4,0.1908397,0.060152672,0.3816794,0.3053435,0.5038168,0.0,0.41580153,0.41580153,0.2061068755,0.3893129899999999,0.3893129899999999,0.2137404655,0.6615776000000001,0.8015267,0.4045801499999999,1000,1000,1000,10.0,10.0 -EX_lac__L_e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00033,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1000,1000,0,10.0,0.0 -EX_leu__L_e,0.3816794,0.8015267,0.39694658,0.45076334,0.45076334,0.300458,0.8015267,0.39694658,0.39694658,0.9541985,0.1,0.099236645,0.39694658,0.3816794,0.2,0.57251906,0.45801526,1.0,0.0,0.45076334,0.45076334,0.2408397,0.3893129899999999,0.3893129899999999,0.24847329,0.6615776000000001,0.8015267,0.72900763,1000,1000,1000,10.0,10.0 -EX_lys__L_e,0.2739726,0.7978142,0.3989071,0.4986339,0.4986339,0.19945355,0.7978142,0.3989071,0.3989071,0.51369864,0.19945355,0.15846995,0.3989071,1.3114754,0.3989071,0.47792348,0.38251367,0.9945355,0.0,0.4986339,0.4986339,0.236713075,0.33643985,0.33643985,0.299180325,0.6232003333333334,0.7978142,0.688524585,1000,1000,1000,10.0,10.0 -EX_met__L_e,0.10067114,0.20134228,0.10067114,0.11570469,0.11570469,0.099999994,0.20134228,0.10067114,0.10067114,0.5033557,0.030201342,0.030201342,0.10067114,0.33557048,0.06013423,0.10067114,0.10067114,0.10067114,0.0,0.11570469,0.11570469,0.065436241,0.10067114,0.10067114,0.065436241,0.1677852333333333,0.20134228,0.10067114,1000,1000,1000,10.0,10.0 -EX_phe__L_e,0.09090909,0.4,0.19393939,0.2150303,0.2150303,0.1,0.4,0.19393939,0.2,0.75757575,0.030303031,0.030303031,0.19393939,0.3030303,0.060121212,0.15151516,0.15151516,0.2,0.0,0.2150303,0.2150303,0.0606060605,0.1424242399999999,0.1424242399999999,0.1121212105,0.2969696966666666,0.4,0.17575758,1000,1000,1000,10.0,10.0 -EX_pi_e,5.633803,0.91558444,1.0144928,0.95303941,0.95303941,4.2028985,0.9057971,1.0144928,0.89855075,1.77920467,1.0,1.6926885,1.0144928,2.7001757,1.23784074,1.0144928,1.0144928,0.5,0.0,0.95303941,1.18303941,3.3169015,3.3241479000000003,3.3241479000000003,1.0072464,2.48832396,0.91069077,0.7572464,1000,1000,1000,10.0,10.0 -EX_pro__L_e,0.17391305,0.0,0.0,0.15,0.15,0.15043478,0.3478261,0.0,0.0,0.0,0.3,0.1,0.3478261,0.4347826,0.6,0.26086956,0.3478261,0.1,10.0,0.15,0.15,0.236956525,0.086956525,0.086956525,0.15,0.0579710166666666,0.17391305,0.22391305,1000,1000,1000,10.0,10.0 -EX_ptrc_e,0.0,0.0,0.0,0.0005031056,0.0005031056,0.0,0.0,0.0,0.0,0.0,0.001,0.0,0.0,0.0,0.0019875776,0.0,0.0,1.242236e-06,0.0,0.0005031056,0.0005031056,0.0005,0.0,0.0,0.0005,0.0,0.0,6.21118e-07,1000,1000,1000,10.0,10.0 -EX_pyr_e,0.0,0.0,0.0,1.0,0.5,0.0,1.0,0.0,0.0,5.0,1.0,1.0,1.0,0.0,2.0,0.22727273,0.0,1.0,0.0,2.0,0.5,0.5,0.0,0.0,0.5,0.0,0.5,0.5,1000,1000,1000,10.0,10.0 -EX_ser__L_e,0.2857143,0.4,0.0,0.25,0.25,0.25047618,0.4,0.0,0.0,1.9047619,0.1,0.1,0.23809524,0.0,0.2,0.0952381,0.23809524,0.30476192,10.0,0.25,0.25,0.1928571499999999,0.14285715,0.14285715,0.05,0.3619047666666666,0.4,0.27142858,1000,1000,1000,10.0,10.0 -EX_thr__L_e,0.16806723,0.79831934,0.40336135,0.44915968,0.44915968,0.15042016,0.79831934,0.40336135,0.39999998,2.5210085,0.099999994,0.0302521,0.40336135,0.6302521,0.19327731,0.33613446,0.25210086,0.10084034,0.0,0.44915968,0.44915968,0.134033612,0.28571429,0.28571429,0.251680672,0.5882353033333333,0.79831934,0.1764705999999999,1000,1000,1000,10.0,10.0 -EX_thymd_e,0.0,0.0,0.0,0.0015082645,0.0015082645,0.0,0.0,0.0,0.0,0.0,0.002892562,0.002892562,0.041322313,0.0,0.002892562,0.0,0.0,9.917356e-05,0.0,0.0015082645,0.0015082645,0.001446281,0.0,0.0,0.001446281,0.0,0.0,4.958678e-05,1000,1000,1000,10.0,10.0 -EX_trp__L_e,0.024509804,0.078431375,0.04901961,0.04421569,0.04421569,0.0151960775,0.078431375,0.04901961,0.039215688,0.09803922,0.01,0.0029411765,0.04901961,0.19607843,0.020098038,0.04901961,0.04901961,0.020098038,0.0,0.04421569,0.04421569,0.017254902,0.036764707,0.036764707,0.029509805,0.0604575179999999,0.078431375,0.034558824,1000,1000,1000,10.0,10.0 -EX_tyr__L_e,0.11111111,0.39779004,0.19923371,0.21375479,0.21375479,0.1,0.46222222,0.19923371,0.19923371,1.6574585,0.02980916,0.010038313,0.23111111,0.22099447,0.051526718,0.19406131,0.22222222,0.1,0.0,0.21375479,0.21375479,0.070460135,0.15517241,0.15517241,0.114521435,0.3022303966666667,0.43000613,0.16111111,1000,1000,1000,10.0,10.0 -EX_val__L_e,0.17094018,0.8034188,0.3931624,0.4517094,0.4517094,0.15042736,0.8034188,0.3931624,0.4,0.85470086,0.1,0.02991453,0.3931624,0.5555556,0.1965812,0.42735043,0.21367522,1.0,0.0,0.4517094,0.4517094,0.13547009,0.28205129,0.28205129,0.2465812,0.5925925933333334,0.8034188,0.60683761,1000,1000,1000,10.0,10.0 -EX_o2_e,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,0.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000,1000,1000,1000.0,1000.0 -EX_h_e,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,0.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000,1000,1000,1000.0,1000.0 -EX_h2o_e,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,0.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000,1000,1000,1000.0,1000.0 -EX_thbpt_e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 +engro2_name,RPMI 1640,DMEM,EMEM,adv DMEM:F12 = 1:1,DMEM:F12 = 1:1,McCoy's 5A,IMDM,MEM,GMEM,Leibovitz's L-15,F12,F10,AMEM,Waymouth MB 7521 medium,F12K,William's E Medium,Medium 199,MCDB 105,NEAA,mIVC1:adv DMEM:F12 = 1:1,E8 ess,RPMI:F12 = 1:1,RPMI:MEM = 1:1,RPMI:EMEM = 1:1,EMEM:F12 = 1:1,DMEM:RPMI = 2:1,DMEM:IMDM = 1:1,MCDB 105:Medium 199 = 1:1,allOpen,mIVC1:adv DMEM:F12 = 1:1 open,E8 ess open,open 10,E8 open 10 +EX_Lcystin_e,0.20766774,0.20127796,0.09904154,0.09996805,0.09996805,0.0,0.29201278,0.09904154,0.09904154,0.0,0.0,0.0,0.09904154,0.0625,0.0,0.08329073,0.108333334,0.0,0.0,0.09996805,0.09996805,0.10383387,0.15335464,0.15335464,0.04952077,0.2034078866666666,0.24664537,0.054166667,1000,1000,1000,10.0,10.0 +EX_ala__L_e,0.0,0.0,0.0,0.049999997,0.049999997,0.15617977,0.28089887,0.0,0.0,2.52809,0.099999994,0.101123594,0.28089887,0.0,0.20224719,1.011236,0.28089887,0.030337078,10.0,0.049999997,0.049999997,0.049999997,0.0,0.0,0.049999997,0.0,0.140449435,0.155617974,1000,1000,1000,10.0,10.0 +EX_arg__L_e,1.1494253,0.39810428,0.5971564,0.69905216,0.69905216,0.19952606,0.39810428,0.5971564,0.19905214,2.8735633,1.0,1.0,0.49763033,0.35545024,2.0,0.28735632,0.33175355,0.29952607,0.0,0.69905216,0.69905216,1.07471265,0.8732908500000001,0.8732908500000001,0.7985782,0.64854462,0.39810428,0.3156398099999999,1000,1000,1000,10.0,10.0 +EX_asn__L_e,0.37878788,0.0,0.0,0.05,0.05,0.3409091,0.18939394,0.0,0.0,1.8939394,0.10006667,0.1,0.33333334,0.0,0.2,0.13333334,0.0,0.1,10.0,0.05,0.05,0.239427275,0.18939394,0.18939394,0.050033335,0.1262626266666666,0.09469697,0.05,1000,1000,1000,10.0,10.0 +EX_asp__L_e,0.15037593,0.0,0.0,0.05,0.05,0.15015037,0.22556391,0.0,0.0,0.0,0.1,0.09774436,0.22556391,0.45112783,0.2,0.22556391,0.22556391,0.1,10.0,0.05,0.05,0.125187965,0.075187965,0.075187965,0.05,0.05012531,0.112781955,0.162781955,1000,1000,1000,10.0,10.0 +EX_chsterol_e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00051679584,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00025839792,1000,0,0,10.0,0.0 +EX_cys__L_e,0.0,0.0,0.0,0.09977272,0.09977272,0.2603306,0.0,0.0,0.0,0.9917355,0.19954544,0.20661157,0.5681818,0.5041322,0.39772728,0.3305785,0.0005681818,0.0,0.0,0.12477272,0.09977272,0.09977272,0.0,0.0,0.09977272,0.0,0.0,0.0002840909,1000,1000,1000,10.0,10.0 +EX_fol_e,0.0022675737,0.009070295,0.0022675737,0.0060090707,0.0060090707,0.022675738,0.009070295,0.0022675737,0.0045351475,0.0022675737,0.0029478457,0.0029478457,0.0022675737,0.0011337869,0.0029478457,0.0022675737,2.2675737e-05,0.001171875,0.0,0.0060090707,0.0060090707,0.0026077097,0.0022675737,0.0022675737,0.0026077097,0.0068027212333333,0.009070295,0.0005972753685,1000,1000,1000,10.0,10.0 +EX_gal_e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1000,0,0,10.0,0.0 +EX_glc__D_e,11.111111,25.0,5.5555553,17.505556,17.505556,16.666666,25.0,5.5555553,25.0,0.0,10.011111,6.111111,5.5555553,27.777779,7.0,11.111111,5.5555553,5.5555553,0.0,17.505556,17.505556,10.561111,8.33333315,8.33333315,7.78333315,20.370370333333334,25.0,5.5555553,1000,1000,1000,10.0,10.0 +EX_gln__L_e,2.0547945,3.9726028,2.0,0.0,2.5,1.5013698,4.0,2.0,2.0,2.0547945,1.0,1.0,2.0,2.3972602,2.0,0.0,0.6849315,0.0,0.0,2.0,2.5,1.52739725,2.02739725,2.02739725,1.5,3.333333366666667,3.9863014,0.34246575,1000,1000,1000,10.0,10.0 +EX_glu__L_e,0.13605443,0.0,0.0,0.05,0.05,0.15034014,0.5102041,0.0,0.0,0.0,0.1,0.1,0.5102041,1.0204082,0.19727892,0.34013605,0.5102041,0.029931974,10.0,0.05,0.05,0.118027215,0.068027215,0.068027215,0.05,0.0453514766666666,0.25510205,0.270068037,1000,1000,1000,10.0,10.0 +EX_gly_e,0.13333334,0.4,0.0,0.25,0.25,0.1,0.4,0.0,0.0,2.6666667,0.1,0.1,0.6666667,0.6666667,0.2,0.6666667,0.6666667,0.030666666,10.0,0.25,0.25,0.11666667,0.06666667,0.06666667,0.05,0.3111111133333333,0.4,0.348666683,1000,1000,1000,10.0,10.0 +EX_gthrd_e,0.0032573289,0.0,0.0,0.0,0.0,0.0016286644,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048859935,0.0,0.00016286645,0.00016286645,0.0,0.0,0.0,0.0,0.00162866445,0.00162866445,0.00162866445,0.0,0.0010857763,0.0,8.1433225e-05,1000,0,0,10.0,0.0 +EX_hdca_e,0.0,0.0,0.0,0.00014999999,0.00014999999,0.0,0.0,0.0,0.0,0.0,0.00029999999,0.0,0.0,0.0,0.0,0.00010169491,0.0,0.0,0.0,0.00014999999,0.00014999999,0.000149999995,0.0,0.0,0.000149999995,0.0,0.0,0.0,1000,1000,1000,10.0,10.0 +EX_his__L_e,0.09677419,0.2,0.2,0.14990476,0.14990476,0.09980952,0.2,0.2,0.1,1.6129032,0.1,0.10952381,0.2,0.82580644,0.21809523,0.09677419,0.10419047,0.2,0.0,0.14990476,0.14990476,0.098387095,0.148387095,0.148387095,0.15,0.1655913966666666,0.2,0.152095235,1000,1000,1000,10.0,10.0 +EX_ile__L_e,0.3816794,0.8015267,0.39694658,0.41580153,0.41580153,0.300458,0.8015267,0.39694658,0.39694658,1.908397,0.030534351,0.019847328,0.4,0.1908397,0.060152672,0.3816794,0.3053435,0.5038168,0.0,0.41580153,0.41580153,0.2061068755,0.3893129899999999,0.3893129899999999,0.2137404655,0.6615776000000001,0.8015267,0.4045801499999999,1000,1000,1000,10.0,10.0 +EX_lac__L_e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00033,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1000,1000,0,10.0,0.0 +EX_leu__L_e,0.3816794,0.8015267,0.39694658,0.45076334,0.45076334,0.300458,0.8015267,0.39694658,0.39694658,0.9541985,0.1,0.099236645,0.39694658,0.3816794,0.2,0.57251906,0.45801526,1.0,0.0,0.45076334,0.45076334,0.2408397,0.3893129899999999,0.3893129899999999,0.24847329,0.6615776000000001,0.8015267,0.72900763,1000,1000,1000,10.0,10.0 +EX_lys__L_e,0.2739726,0.7978142,0.3989071,0.4986339,0.4986339,0.19945355,0.7978142,0.3989071,0.3989071,0.51369864,0.19945355,0.15846995,0.3989071,1.3114754,0.3989071,0.47792348,0.38251367,0.9945355,0.0,0.4986339,0.4986339,0.236713075,0.33643985,0.33643985,0.299180325,0.6232003333333334,0.7978142,0.688524585,1000,1000,1000,10.0,10.0 +EX_met__L_e,0.10067114,0.20134228,0.10067114,0.11570469,0.11570469,0.099999994,0.20134228,0.10067114,0.10067114,0.5033557,0.030201342,0.030201342,0.10067114,0.33557048,0.06013423,0.10067114,0.10067114,0.10067114,0.0,0.11570469,0.11570469,0.065436241,0.10067114,0.10067114,0.065436241,0.1677852333333333,0.20134228,0.10067114,1000,1000,1000,10.0,10.0 +EX_phe__L_e,0.09090909,0.4,0.19393939,0.2150303,0.2150303,0.1,0.4,0.19393939,0.2,0.75757575,0.030303031,0.030303031,0.19393939,0.3030303,0.060121212,0.15151516,0.15151516,0.2,0.0,0.2150303,0.2150303,0.0606060605,0.1424242399999999,0.1424242399999999,0.1121212105,0.2969696966666666,0.4,0.17575758,1000,1000,1000,10.0,10.0 +EX_pi_e,5.633803,0.91558444,1.0144928,0.95303941,0.95303941,4.2028985,0.9057971,1.0144928,0.89855075,1.77920467,1.0,1.6926885,1.0144928,2.7001757,1.23784074,1.0144928,1.0144928,0.5,0.0,0.95303941,1.18303941,3.3169015,3.3241479000000003,3.3241479000000003,1.0072464,2.48832396,0.91069077,0.7572464,1000,1000,1000,10.0,10.0 +EX_pro__L_e,0.17391305,0.0,0.0,0.15,0.15,0.15043478,0.3478261,0.0,0.0,0.0,0.3,0.1,0.3478261,0.4347826,0.6,0.26086956,0.3478261,0.1,10.0,0.15,0.15,0.236956525,0.086956525,0.086956525,0.15,0.0579710166666666,0.17391305,0.22391305,1000,1000,1000,10.0,10.0 +EX_ptrc_e,0.0,0.0,0.0,0.0005031056,0.0005031056,0.0,0.0,0.0,0.0,0.0,0.001,0.0,0.0,0.0,0.0019875776,0.0,0.0,1.242236e-06,0.0,0.0005031056,0.0005031056,0.0005,0.0,0.0,0.0005,0.0,0.0,6.21118e-07,1000,1000,1000,10.0,10.0 +EX_pyr_e,0.0,0.0,0.0,1.0,0.5,0.0,1.0,0.0,0.0,5.0,1.0,1.0,1.0,0.0,2.0,0.22727273,0.0,1.0,0.0,2.0,0.5,0.5,0.0,0.0,0.5,0.0,0.5,0.5,1000,1000,1000,10.0,10.0 +EX_ser__L_e,0.2857143,0.4,0.0,0.25,0.25,0.25047618,0.4,0.0,0.0,1.9047619,0.1,0.1,0.23809524,0.0,0.2,0.0952381,0.23809524,0.30476192,10.0,0.25,0.25,0.1928571499999999,0.14285715,0.14285715,0.05,0.3619047666666666,0.4,0.27142858,1000,1000,1000,10.0,10.0 +EX_thr__L_e,0.16806723,0.79831934,0.40336135,0.44915968,0.44915968,0.15042016,0.79831934,0.40336135,0.39999998,2.5210085,0.099999994,0.0302521,0.40336135,0.6302521,0.19327731,0.33613446,0.25210086,0.10084034,0.0,0.44915968,0.44915968,0.134033612,0.28571429,0.28571429,0.251680672,0.5882353033333333,0.79831934,0.1764705999999999,1000,1000,1000,10.0,10.0 +EX_thymd_e,0.0,0.0,0.0,0.0015082645,0.0015082645,0.0,0.0,0.0,0.0,0.0,0.002892562,0.002892562,0.041322313,0.0,0.002892562,0.0,0.0,9.917356e-05,0.0,0.0015082645,0.0015082645,0.001446281,0.0,0.0,0.001446281,0.0,0.0,4.958678e-05,1000,1000,1000,10.0,10.0 +EX_trp__L_e,0.024509804,0.078431375,0.04901961,0.04421569,0.04421569,0.0151960775,0.078431375,0.04901961,0.039215688,0.09803922,0.01,0.0029411765,0.04901961,0.19607843,0.020098038,0.04901961,0.04901961,0.020098038,0.0,0.04421569,0.04421569,0.017254902,0.036764707,0.036764707,0.029509805,0.0604575179999999,0.078431375,0.034558824,1000,1000,1000,10.0,10.0 +EX_tyr__L_e,0.11111111,0.39779004,0.19923371,0.21375479,0.21375479,0.1,0.46222222,0.19923371,0.19923371,1.6574585,0.02980916,0.010038313,0.23111111,0.22099447,0.051526718,0.19406131,0.22222222,0.1,0.0,0.21375479,0.21375479,0.070460135,0.15517241,0.15517241,0.114521435,0.3022303966666667,0.43000613,0.16111111,1000,1000,1000,10.0,10.0 +EX_val__L_e,0.17094018,0.8034188,0.3931624,0.4517094,0.4517094,0.15042736,0.8034188,0.3931624,0.4,0.85470086,0.1,0.02991453,0.3931624,0.5555556,0.1965812,0.42735043,0.21367522,1.0,0.0,0.4517094,0.4517094,0.13547009,0.28205129,0.28205129,0.2465812,0.5925925933333334,0.8034188,0.60683761,1000,1000,1000,10.0,10.0 +EX_o2_e,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,0.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000,1000,1000,1000.0,1000.0 +EX_h_e,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,0.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000,1000,1000,1000.0,1000.0 +EX_h2o_e,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,0.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000,1000,1000,1000.0,1000.0 +EX_thbpt_e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
--- a/COBRAxy/marea_macros.xml Tue Sep 23 13:48:24 2025 +0000 +++ b/COBRAxy/marea_macros.xml Mon Sep 29 10:33:26 2025 +0000 @@ -1,210 +1,210 @@ -<macros> - - <xml name="options"> - <param name="rules_selector" argument="--rules_selector" type="select" label="Gene-Protein-Reaction rules:"> - <option value="HMRcore">HMRcore tabular</option> - <option value="Recon">Recon3D tabular</option> - <option value="ENGRO2" selected="true">ENGRO 2 tabular</option> - <option value="Custom">Custom rules</option> - </param> - </xml> - - <xml name="options_model"> - <param name="model_selector" argument="--model_selector" type="select" label="Model:"> - <option value="ENGRO2" selected="true">ENGRO 2 tabular</option> - <option value="Recon">Recon3D tabular</option> - <option value="Custom_model">Custom tabular</option> - </param> - </xml> - - <xml name="options_ras_to_bounds_model"> - <param name="model_selector" argument="--model_selector" type="select" label="Model:"> - <option value="ENGRO2" selected="true">ENGRO 2</option> - <option value="Custom">Custom model</option> - </param> - </xml> - - <xml name="options_ras_to_bounds_medium"> - <param name="medium_selector" argument="--medium_selector" type="select" label="Medium:"> - <option value="Default" selected="true">Default (ENGRO2 built-in medium)</option> - <option value="allOpen">Open</option> - <option value="RPMI_1640">RPMI 1640</option> - <option value="DMEM">DMEM</option> - <option value="EMEM">EMEM</option> - <option value="DMEM:F12_=_1:1">DMEM:F12 = 1:1</option> - <option value="adv_DMEM:F12_=_1:1">adv DMEM:F12 = 1:1</option> - <option value="McCoy's_5A">McCoy's 5A</option> - <option value="IMDM">IMDM</option> - <option value="MEM">MEM</option> - <option value="GMEM">GMEM</option> - <option value="Leibovitz's_L-15">Leibovitz's L-15</option> - <option value="F12">F12</option> - <option value="F10">F10</option> - <option value="AMEM">AMEM</option> - <option value="Waymouth_MB_7521_medium">Waymouth MB 7521 medium</option> - <option value="F12K">F12K</option> - <option value="William's_E_Medium">William's E Medium</option> - <option value="Medium_199">Medium 199</option> - <option value="MCDB_105">MCDB 105</option> - <option value="NEAA">NEAA</option> - <option value="mIVC1:adv_DMEM:F12_=_1:1">mIVC1:adv DMEM:F12 = 1:1</option> - <option value="E8_ess">E8 ess</option> - <option value="RPMI:F12_=_1:1">RPMI:F12 = 1:1</option> - <option value="RPMI:MEM_=_1:1">RPMI:MEM = 1:1</option> - <option value="RPMI:EMEM_=_1:1">RPMI:EMEM = 1:1</option> - <option value="EMEM:F12_=_1:1">EMEM:F12 = 1:1</option> - <option value="DMEM:RPMI_=_2:1">DMEM:RPMI = 2:1</option> - <option value="DMEM:IMDM_=_1:1">DMEM:IMDM = 1:1</option> - <option value="MCDB_105:Medium_199_=_1:1">MCDB 105:Medium 199 = 1:1</option> - <option value="mIVC1:adv_DMEM:F12_=_1:1_open">mIVC1:adv DMEM:F12 = 1:1 open</option> - <option value="E8_ess_open">E8 ess open</option> - <option value="open_10">Open 10</option> - <option value="E8_open_10">E8 open 10</option> - </param> - </xml> - - <token name="@CUSTOM_RULES_EXEMPLE@"> - -+--------------------+-------------------------------+ -| id | rule (with entrez-id) | -+====================+===============================+ -| SHMT1 | 155060 or 10357 | -+--------------------+-------------------------------+ -| NIT2 | 155060 or 100134869 | -+--------------------+-------------------------------+ -| GOT1_GOT2_GOT1L1_2 | 155060 and 100134869 or 10357 | -+--------------------+-------------------------------+ - -| - - </token> - - <token name="@DATASET_EXEMPLE1@"> - -+------------+------------+------------+------------+ -| Hugo_ID | TCGAA62670 | TCGAA62671 | TCGAA62672 | -+============+============+============+============+ -| HGNC:24086 | 0.523167 | 0.371355 | 0.925661 | -+------------+------------+------------+------------+ -| HGNC:24086 | 0.568765 | 0.765567 | 0.456789 | -+------------+------------+------------+------------+ -| HGNC:9876 | 0.876545 | 0.768933 | 0.987654 | -+------------+------------+------------+------------+ -| HGNC:9 | 0.456788 | 0.876543 | 0.876542 | -+------------+------------+------------+------------+ -| HGNC:23 | 0.876543 | 0.786543 | 0.897654 | -+------------+------------+------------+------------+ - -| - - </token> - - <token name="@DATASET_EXEMPLE2@"> - -+-------------+------------+------------+------------+ -| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 | -+=============+============+============+============+ -| A1BG | 0.523167 | 0.371355 | 0.925661 | -+-------------+------------+------------+------------+ -| A1CF | 0.568765 | 0.765567 | 0.456789 | -+-------------+------------+------------+------------+ -| A2M | 0.876545 | 0.768933 | 0.987654 | -+-------------+------------+------------+------------+ -| A4GALT | 0.456788 | 0.876543 | 0.876542 | -+-------------+------------+------------+------------+ -| M664Y65 | 0.876543 | 0.786543 | 0.897654 | -+-------------+------------+------------+------------+ - -| - - </token> - - <token name="@REFERENCE@"> - -This tool is developed by the `BIMIB`_ at the `Department of Informatics, Systems and Communications`_ of `University of Milan - Bicocca`_. - -.. _BIMIB: https://bimib.disco.unimib.it/index.php/Home -.. _Department of Informatics, Systems and Communications: https://www.disco.unimib.it/en -.. _University of Milan - Bicocca: https://en.unimib.it/ - - </token> - - <xml name="citations"> - <citations> <!--esempio di citazione--> - <citation type="bibtex"> - @article{graudenzi2018integration, - title={Integration of transcriptomic data and metabolic networks in cancer samples reveals highly significant prognostic power}, - author={Graudenzi, Alex and Maspero, Davide and Di Filippo, Marzia and Gnugnoli, Marco and Isella, Claudio and Mauri, Giancarlo and Medico, Enzo and Antoniotti, Marco and Damiani, Chiara}, - journal={Journal of biomedical informatics}, - volume={87}, - pages={37--49}, - year={2018}, - publisher={Elsevier}, - url = {https://doi.org/10.1016/j.jbi.2018.09.010}, - } - </citation> - <citation type="bibtex"> - @article{damiani2020marea4galaxy, - title={MaREA4Galaxy: Metabolic reaction enrichment analysis and visualization of RNA-seq data within Galaxy}, - author={Damiani, Chiara and Rovida, Lorenzo and Maspero, Davide and Sala, Irene and Rosato, Luca and Di Filippo, Marzia and Pescini, Dario and Graudenzi, Alex and Antoniotti, Marco and Mauri, Giancarlo}, - journal={Computational and Structural Biotechnology Journal}, - volume={18}, - pages={993}, - year={2020}, - publisher={Research Network of Computational and Structural Biotechnology}, - url = {https://doi.org/10.1016/j.csbj.2020.04.008}, - } - </citation> - <citation type="bibtex"> - @article{ebrahim2013cobrapy, - title={COBRApy: constraints-based reconstruction and analysis for python}, - author={Ebrahim, Ali and Lerman, Joshua A and Palsson, Bernhard O and Hyduke, Daniel R}, - journal={BMC systems biology}, - volume={7}, - pages={1--6}, - year={2013}, - publisher={Springer} - } - </citation> - </citations> - </xml> - - <xml name="citations_fluxes"> - <citations> - <citation type="bibtex"> - @article{galuzzi2024adjusting, - title={Adjusting for false discoveries in constraint-based differential metabolic flux analysis}, - author={Galuzzi, Bruno G and Milazzo, Luca and Damiani, Chiara}, - journal={Journal of Biomedical Informatics}, - volume={150}, - pages={104597}, - year={2024}, - publisher={Elsevier} - } - </citation> - <citation type="bibtex"> - @inproceedings{galuzzi2022best, - title={Best practices in flux sampling of constrained-based models}, - author={Galuzzi, Bruno G and Milazzo, Luca and Damiani, Chiara}, - booktitle={International Conference on Machine Learning, Optimization, and Data Science}, - pages={234--248}, - year={2022}, - organization={Springer} - } - </citation> - <citation type="bibtex"> - @article{ebrahim2013cobrapy, - title={COBRApy: constraints-based reconstruction and analysis for python}, - author={Ebrahim, Ali and Lerman, Joshua A and Palsson, Bernhard O and Hyduke, Daniel R}, - journal={BMC systems biology}, - volume={7}, - pages={1--6}, - year={2013}, - publisher={Springer} - } - </citation> - </citations> - </xml> - - -</macros> +<macros> + + <xml name="options"> + <param name="rules_selector" argument="--rules_selector" type="select" label="Gene-Protein-Reaction rules:"> + <option value="HMRcore">HMRcore tabular</option> + <option value="Recon">Recon3D tabular</option> + <option value="ENGRO2" selected="true">ENGRO 2 tabular</option> + <option value="Custom">Custom rules</option> + </param> + </xml> + + <xml name="options_model"> + <param name="model_selector" argument="--model_selector" type="select" label="Model:"> + <option value="ENGRO2" selected="true">ENGRO 2 tabular</option> + <option value="Recon">Recon3D tabular</option> + <option value="Custom_model">Custom tabular</option> + </param> + </xml> + + <xml name="options_ras_to_bounds_model"> + <param name="model_selector" argument="--model_selector" type="select" label="Model:"> + <option value="ENGRO2" selected="true">ENGRO 2</option> + <option value="Custom">Custom model</option> + </param> + </xml> + + <xml name="options_ras_to_bounds_medium"> + <param name="medium_selector" argument="--medium_selector" type="select" label="Medium:"> + <option value="Default" selected="true">Default (ENGRO2 built-in medium)</option> + <option value="allOpen">Open</option> + <option value="RPMI_1640">RPMI 1640</option> + <option value="DMEM">DMEM</option> + <option value="EMEM">EMEM</option> + <option value="DMEM:F12_=_1:1">DMEM:F12 = 1:1</option> + <option value="adv_DMEM:F12_=_1:1">adv DMEM:F12 = 1:1</option> + <option value="McCoy's_5A">McCoy's 5A</option> + <option value="IMDM">IMDM</option> + <option value="MEM">MEM</option> + <option value="GMEM">GMEM</option> + <option value="Leibovitz's_L-15">Leibovitz's L-15</option> + <option value="F12">F12</option> + <option value="F10">F10</option> + <option value="AMEM">AMEM</option> + <option value="Waymouth_MB_7521_medium">Waymouth MB 7521 medium</option> + <option value="F12K">F12K</option> + <option value="William's_E_Medium">William's E Medium</option> + <option value="Medium_199">Medium 199</option> + <option value="MCDB_105">MCDB 105</option> + <option value="NEAA">NEAA</option> + <option value="mIVC1:adv_DMEM:F12_=_1:1">mIVC1:adv DMEM:F12 = 1:1</option> + <option value="E8_ess">E8 ess</option> + <option value="RPMI:F12_=_1:1">RPMI:F12 = 1:1</option> + <option value="RPMI:MEM_=_1:1">RPMI:MEM = 1:1</option> + <option value="RPMI:EMEM_=_1:1">RPMI:EMEM = 1:1</option> + <option value="EMEM:F12_=_1:1">EMEM:F12 = 1:1</option> + <option value="DMEM:RPMI_=_2:1">DMEM:RPMI = 2:1</option> + <option value="DMEM:IMDM_=_1:1">DMEM:IMDM = 1:1</option> + <option value="MCDB_105:Medium_199_=_1:1">MCDB 105:Medium 199 = 1:1</option> + <option value="mIVC1:adv_DMEM:F12_=_1:1_open">mIVC1:adv DMEM:F12 = 1:1 open</option> + <option value="E8_ess_open">E8 ess open</option> + <option value="open_10">Open 10</option> + <option value="E8_open_10">E8 open 10</option> + </param> + </xml> + + <token name="@CUSTOM_RULES_EXEMPLE@"> + ++--------------------+-------------------------------+ +| id | rule (with entrez-id) | ++====================+===============================+ +| SHMT1 | 155060 or 10357 | ++--------------------+-------------------------------+ +| NIT2 | 155060 or 100134869 | ++--------------------+-------------------------------+ +| GOT1_GOT2_GOT1L1_2 | 155060 and 100134869 or 10357 | ++--------------------+-------------------------------+ + +| + + </token> + + <token name="@DATASET_EXEMPLE1@"> + ++------------+------------+------------+------------+ +| Hugo_ID | TCGAA62670 | TCGAA62671 | TCGAA62672 | ++============+============+============+============+ +| HGNC:24086 | 0.523167 | 0.371355 | 0.925661 | ++------------+------------+------------+------------+ +| HGNC:24086 | 0.568765 | 0.765567 | 0.456789 | ++------------+------------+------------+------------+ +| HGNC:9876 | 0.876545 | 0.768933 | 0.987654 | ++------------+------------+------------+------------+ +| HGNC:9 | 0.456788 | 0.876543 | 0.876542 | ++------------+------------+------------+------------+ +| HGNC:23 | 0.876543 | 0.786543 | 0.897654 | ++------------+------------+------------+------------+ + +| + + </token> + + <token name="@DATASET_EXEMPLE2@"> + ++-------------+------------+------------+------------+ +| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 | ++=============+============+============+============+ +| A1BG | 0.523167 | 0.371355 | 0.925661 | ++-------------+------------+------------+------------+ +| A1CF | 0.568765 | 0.765567 | 0.456789 | ++-------------+------------+------------+------------+ +| A2M | 0.876545 | 0.768933 | 0.987654 | ++-------------+------------+------------+------------+ +| A4GALT | 0.456788 | 0.876543 | 0.876542 | ++-------------+------------+------------+------------+ +| M664Y65 | 0.876543 | 0.786543 | 0.897654 | ++-------------+------------+------------+------------+ + +| + + </token> + + <token name="@REFERENCE@"> + +This tool is developed by the `BIMIB`_ at the `Department of Informatics, Systems and Communications`_ of `University of Milan - Bicocca`_. + +.. _BIMIB: https://bimib.disco.unimib.it/index.php/Home +.. _Department of Informatics, Systems and Communications: https://www.disco.unimib.it/en +.. _University of Milan - Bicocca: https://en.unimib.it/ + + </token> + + <xml name="citations"> + <citations> <!--esempio di citazione--> + <citation type="bibtex"> + @article{graudenzi2018integration, + title={Integration of transcriptomic data and metabolic networks in cancer samples reveals highly significant prognostic power}, + author={Graudenzi, Alex and Maspero, Davide and Di Filippo, Marzia and Gnugnoli, Marco and Isella, Claudio and Mauri, Giancarlo and Medico, Enzo and Antoniotti, Marco and Damiani, Chiara}, + journal={Journal of biomedical informatics}, + volume={87}, + pages={37--49}, + year={2018}, + publisher={Elsevier}, + url = {https://doi.org/10.1016/j.jbi.2018.09.010}, + } + </citation> + <citation type="bibtex"> + @article{damiani2020marea4galaxy, + title={MaREA4Galaxy: Metabolic reaction enrichment analysis and visualization of RNA-seq data within Galaxy}, + author={Damiani, Chiara and Rovida, Lorenzo and Maspero, Davide and Sala, Irene and Rosato, Luca and Di Filippo, Marzia and Pescini, Dario and Graudenzi, Alex and Antoniotti, Marco and Mauri, Giancarlo}, + journal={Computational and Structural Biotechnology Journal}, + volume={18}, + pages={993}, + year={2020}, + publisher={Research Network of Computational and Structural Biotechnology}, + url = {https://doi.org/10.1016/j.csbj.2020.04.008}, + } + </citation> + <citation type="bibtex"> + @article{ebrahim2013cobrapy, + title={COBRApy: constraints-based reconstruction and analysis for python}, + author={Ebrahim, Ali and Lerman, Joshua A and Palsson, Bernhard O and Hyduke, Daniel R}, + journal={BMC systems biology}, + volume={7}, + pages={1--6}, + year={2013}, + publisher={Springer} + } + </citation> + </citations> + </xml> + + <xml name="citations_fluxes"> + <citations> + <citation type="bibtex"> + @article{galuzzi2024adjusting, + title={Adjusting for false discoveries in constraint-based differential metabolic flux analysis}, + author={Galuzzi, Bruno G and Milazzo, Luca and Damiani, Chiara}, + journal={Journal of Biomedical Informatics}, + volume={150}, + pages={104597}, + year={2024}, + publisher={Elsevier} + } + </citation> + <citation type="bibtex"> + @inproceedings{galuzzi2022best, + title={Best practices in flux sampling of constrained-based models}, + author={Galuzzi, Bruno G and Milazzo, Luca and Damiani, Chiara}, + booktitle={International Conference on Machine Learning, Optimization, and Data Science}, + pages={234--248}, + year={2022}, + organization={Springer} + } + </citation> + <citation type="bibtex"> + @article{ebrahim2013cobrapy, + title={COBRApy: constraints-based reconstruction and analysis for python}, + author={Ebrahim, Ali and Lerman, Joshua A and Palsson, Bernhard O and Hyduke, Daniel R}, + journal={BMC systems biology}, + volume={7}, + pages={1--6}, + year={2013}, + publisher={Springer} + } + </citation> + </citations> + </xml> + + +</macros>
--- a/COBRAxy/ras_generator.py Tue Sep 23 13:48:24 2025 +0000 +++ b/COBRAxy/ras_generator.py Mon Sep 29 10:33:26 2025 +0000 @@ -1,5 +1,10 @@ +""" +Generate Reaction Activity Scores (RAS) from a gene expression dataset and GPR rules. + +The script reads a tabular dataset (genes x samples) and a rules file (GPRs), +computes RAS per reaction for each sample/cell line, and writes a tabular output. +""" from __future__ import division -# galaxy complains this ^^^ needs to be at the very beginning of the file, for some reason. import sys import argparse import collections @@ -8,7 +13,6 @@ import utils.general_utils as utils import utils.rule_parsing as ruleUtils from typing import Union, Optional, List, Dict, Tuple, TypeVar -import os ERRORS = [] ########################## argparse ########################################## @@ -27,16 +31,11 @@ usage = '%(prog)s [options]', description = "process some value's genes to create a comparison's map.") - parser.add_argument( - '-rs', '--rules_selector', - type = utils.Model, default = utils.Model.ENGRO2, choices = list(utils.Model), - help = 'chose which type of dataset you want use') - - parser.add_argument("-rl", "--rule_list", type = str, - help = "path to input file with custom rules, if provided") + parser.add_argument("-rl", "--model_upload", type = str, + help = "path to input file containing the rules") - parser.add_argument("-rn", "--rules_name", type = str, help = "custom rules name") - # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in + parser.add_argument("-rn", "--model_upload_name", type = str, help = "custom rules name") + # Galaxy converts files into .dat, this helps infer the original extension when needed. parser.add_argument( '-n', '--none', @@ -54,7 +53,7 @@ help = "Output log") parser.add_argument( - '-in', '--input', #id è diventato in + '-in', '--input', type = str, help = 'input dataset') @@ -258,14 +257,14 @@ ############################ resolve ########################################## def replace_gene_value(l :str, d :str) -> Tuple[Union[int, float], list]: """ - Replace gene identifiers with corresponding values from a dictionary. + Replace gene identifiers in a parsed rule expression with values from a dict. Args: - l (str): String of gene identifier. - d (str): String corresponding to its value. + l: Parsed rule as a nested list structure (strings, lists, and operators). + d: Dict mapping gene IDs to numeric values. Returns: - tuple: A tuple containing two lists: the first list contains replaced values, and the second list contains any errors encountered during replacement. + tuple: (new_expression, not_found_genes) """ tmp = [] err = [] @@ -282,16 +281,16 @@ l = l[1:] return (tmp, err) -def replace_gene(l :str, d :str) -> Union[int, float]: +def replace_gene(l: str, d: Dict[str, Union[int, float]]) -> Union[int, float, None]: """ Replace a single gene identifier with its corresponding value from a dictionary. Args: l (str): Gene identifier to replace. - d (str): String corresponding to its value. + d (dict): Dict mapping gene IDs to numeric values. Returns: - float/int: Corresponding value from the dictionary if found, None otherwise. + float/int/None: Corresponding value from the dictionary if found, None otherwise. Raises: sys.exit: If the value associated with the gene identifier is not valid. @@ -513,9 +512,9 @@ Args: dataset (pd.DataFrame): Dataset containing gene values. rules (dict): The dict containing reaction ids as keys and rules as values. - - Side effects: - dataset : mut + + Note: + Modifies dataset in place by setting the first column as index. Returns: dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary @@ -523,8 +522,8 @@ """ ras_values_by_cell_line = {} dataset.set_index(dataset.columns[0], inplace=True) - # Considera tutte le colonne tranne la prima in cui ci sono gli hugo quindi va scartata - for cell_line_name in dataset.columns[1:]: + + for cell_line_name in dataset.columns: #[1:]: cell_line = dataset[cell_line_name].to_dict() ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line) return ras_values_by_cell_line @@ -595,11 +594,11 @@ def save_as_tsv(rasScores: Dict[str, Dict[str, Ras]], reactions :List[str]) -> None: """ - Save computed ras scores to the given path, as a tsv file. + Save computed RAS scores to ARGS.ras_output as a TSV file. Args: rasScores : the computed ras scores. - path : the output tsv file's path. + reactions : the list of reaction IDs, used as the first column. Returns: None @@ -632,7 +631,7 @@ """ supportedGenesInEncoding = geneTranslator[encoding] if geneName in supportedGenesInEncoding: return supportedGenesInEncoding[geneName] - raise ValueError(f"Gene \"{geneName}\" non trovato, verifica di star utilizzando il modello corretto!") + raise ValueError(f"Gene '{geneName}' not found. Please verify you are using the correct model.") def load_custom_rules() -> Dict[str, ruleUtils.OpList]: """ @@ -642,16 +641,63 @@ Returns: Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules. """ - datFilePath = utils.FilePath.fromStrPath(ARGS.rule_list) # actual file, stored in galaxy as a .dat - - try: filenamePath = utils.FilePath.fromStrPath(ARGS.rules_name) # file's name in input, to determine its original ext - except utils.PathErr as err: - raise utils.PathErr(filenamePath, f"Please make sure your file's name is a valid file path, {err.msg}") - - if filenamePath.ext is utils.FileFormat.PICKLE: return utils.readPickle(datFilePath) + datFilePath = utils.FilePath.fromStrPath(ARGS.model_upload) # actual file, stored in Galaxy as a .dat + + dict_rule = {} + + try: + rows = utils.readCsv(datFilePath, delimiter = "\t", skipHeader=False) + if len(rows) <= 1: + raise ValueError("Model tabular with 1 column is not supported.") + if not rows: + raise ValueError("Model tabular is file is empty.") + + id_idx, idx_gpr = utils.findIdxByName(rows[0], "GPR") + + # First, try using a tab delimiter + for line in rows[1:]: + if len(line) <= idx_gpr: + utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) + continue + + if line[idx_gpr] == "": + dict_rule[line[id_idx]] = ruleUtils.OpList([""]) + else: + dict_rule[line[id_idx]] = ruleUtils.parseRuleToNestedList(line[idx_gpr]) + + except Exception as e: + # If parsing with tabs fails, try comma delimiter + try: + rows = utils.readCsv(datFilePath, delimiter = ",", skipHeader=False) + + if len(rows) <= 1: + raise ValueError("Model tabular with 1 column is not supported.") + + if not rows: + raise ValueError("Model tabular is file is empty.") + + id_idx, idx_gpr = utils.findIdxByName(rows[0], "GPR") + + # Try again parsing row content with the GPR column using comma-separated values + for line in rows[1:]: + if len(line) <= idx_gpr: + utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) + continue + + if line[idx_gpr] == "": + dict_rule[line[id_idx]] = ruleUtils.OpList([""]) + else: + dict_rule[line[id_idx]] = ruleUtils.parseRuleToNestedList(line[idx_gpr]) + + except Exception as e2: + raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}") + + if not dict_rule: + raise ValueError("No valid rules found in the uploaded file. Please check the file format.") # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed. - return { line[0] : ruleUtils.parseRuleToNestedList(line[1]) for line in utils.readCsv(datFilePath) } + return dict_rule + def main(args:List[str] = None) -> None: """ @@ -671,37 +717,16 @@ # remove versioning from gene names dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0] - # handle custom models - model :utils.Model = ARGS.rules_selector - - if model is utils.Model.Custom: - rules = load_custom_rules() - reactions = list(rules.keys()) + rules = load_custom_rules() + reactions = list(rules.keys()) - save_as_tsv(ras_for_cell_lines(dataset, rules), reactions) - if ERRORS: utils.logWarning( - f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}", - ARGS.out_log) - - return - - # This is the standard flow of the ras_generator program, for non-custom models. - name = "RAS Dataset" - type_gene = gene_type(dataset.iloc[0, 0], name) + save_as_tsv(ras_for_cell_lines(dataset, rules), reactions) + if ERRORS: utils.logWarning( + f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}", + ARGS.out_log) - rules = model.getRules(ARGS.tool_dir) - genes = data_gene(dataset, type_gene, name, None) - ids, rules = load_id_rules(rules.get(type_gene)) - - resolve_rules, err = resolve(genes, rules, ids, ARGS.none, name) - create_ras(resolve_rules, name, rules, ids, ARGS.ras_output) - - if err: utils.logWarning( - f"Warning: gene(s) {err} not found in class \"{name}\", " + - "the expression level for this gene will be considered NaN", - ARGS.out_log) - - print("Execution succeded") + + print("Execution succeeded") ############################################################################### if __name__ == "__main__":
--- a/COBRAxy/ras_generator.xml Tue Sep 23 13:48:24 2025 +0000 +++ b/COBRAxy/ras_generator.xml Mon Sep 29 10:33:26 2025 +0000 @@ -12,27 +12,23 @@ <command detect_errors="exit_code"> <![CDATA[ python $__tool_directory__/ras_generator.py - --rules_selector $cond_rule.rules_selector + --tool_dir $__tool_directory__ + --model_upload $model_upload + --model_upload_name $model_upload.element_identifier --input $input --none $none - --tool_dir $__tool_directory__ + --out_log $log --ras_output $ras_output - #if $cond_rule.rules_selector == 'Custom' - --rule_list $rule_list - --rules_name $rule_list.element_identifier - #end if + ]]> </command> <inputs> - <conditional name="cond_rule"> - <expand macro="options"/> - <when value="Custom"> - <param name="rule_list" argument="--rule_list" type="data" format="tabular, csv, pickle, p, pk" label="Custom rules" /> - </when> - </conditional> - <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Gene Expression dataset:" /> - <param name="name" argument="--name" type="text" label="Dataset's name:" value="Dataset_RAS" help="Default: Dataset_RAS. Do not use white spaces or special symbols." /> + <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular" + label="Model file:" help="Upload a CSV/TSV file containing the information generated by the Model Initialization tool." /> + <param name="input" argument="--input" type="data" format="tabular,csv,tsv" label="Gene Expression dataset:" /> + <param name="name" argument="--name" type="text" label="Dataset's name:" value="Dataset_RAS" + help="Default: Dataset_RAS. Do not use white spaces or special symbols." /> <param name="none" argument="--none" type="boolean" checked="true" label="(A and NaN) solved as (A)?" /> </inputs> @@ -107,3 +103,4 @@ <expand macro="citations" /> </tool> +
--- a/COBRAxy/ras_generator_beta.py Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,733 +0,0 @@ -""" -Generate Reaction Activity Scores (RAS) from a gene expression dataset and GPR rules. - -The script reads a tabular dataset (genes x samples) and a rules file (GPRs), -computes RAS per reaction for each sample/cell line, and writes a tabular output. -""" -from __future__ import division -import sys -import argparse -import collections -import pandas as pd -import pickle as pk -import utils.general_utils as utils -import utils.rule_parsing as ruleUtils -from typing import Union, Optional, List, Dict, Tuple, TypeVar - -ERRORS = [] -########################## argparse ########################################## -ARGS :argparse.Namespace -def process_args(args:List[str] = None) -> argparse.Namespace: - """ - Processes command-line arguments. - - Args: - args (list): List of command-line arguments. - - Returns: - Namespace: An object containing parsed arguments. - """ - parser = argparse.ArgumentParser( - usage = '%(prog)s [options]', - description = "process some value's genes to create a comparison's map.") - - parser.add_argument("-rl", "--model_upload", type = str, - help = "path to input file containing the rules") - - parser.add_argument("-rn", "--model_upload_name", type = str, help = "custom rules name") - # Galaxy converts files into .dat, this helps infer the original extension when needed. - - parser.add_argument( - '-n', '--none', - type = utils.Bool("none"), default = True, - help = 'compute Nan values') - - parser.add_argument( - '-td', '--tool_dir', - type = str, - required = True, help = 'your tool directory') - - parser.add_argument( - '-ol', '--out_log', - type = str, - help = "Output log") - - parser.add_argument( - '-in', '--input', - type = str, - help = 'input dataset') - - parser.add_argument( - '-ra', '--ras_output', - type = str, - required = True, help = 'ras output') - - - return parser.parse_args(args) - -############################ dataset input #################################### -def read_dataset(data :str, name :str) -> pd.DataFrame: - """ - Read a dataset from a CSV file and return it as a pandas DataFrame. - - Args: - data (str): Path to the CSV file containing the dataset. - name (str): Name of the dataset, used in error messages. - - Returns: - pandas.DataFrame: DataFrame containing the dataset. - - Raises: - pd.errors.EmptyDataError: If the CSV file is empty. - sys.exit: If the CSV file has the wrong format, the execution is aborted. - """ - try: - dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python') - except pd.errors.EmptyDataError: - sys.exit('Execution aborted: wrong format of ' + name + '\n') - if len(dataset.columns) < 2: - sys.exit('Execution aborted: wrong format of ' + name + '\n') - return dataset - -############################ load id e rules ################################## -def load_id_rules(reactions :Dict[str, Dict[str, List[str]]]) -> Tuple[List[str], List[Dict[str, List[str]]]]: - """ - Load IDs and rules from a dictionary of reactions. - - Args: - reactions (dict): A dictionary where keys are IDs and values are rules. - - Returns: - tuple: A tuple containing two lists, the first list containing IDs and the second list containing rules. - """ - ids, rules = [], [] - for key, value in reactions.items(): - ids.append(key) - rules.append(value) - return (ids, rules) - -############################ check_methods #################################### -def gene_type(l :str, name :str) -> str: - """ - Determine the type of gene ID. - - Args: - l (str): The gene identifier to check. - name (str): The name of the dataset, used in error messages. - - Returns: - str: The type of gene ID ('hugo_id', 'ensembl_gene_id', 'symbol', or 'entrez_id'). - - Raises: - sys.exit: If the gene ID type is not supported, the execution is aborted. - """ - if check_hgnc(l): - return 'hugo_id' - elif check_ensembl(l): - return 'ensembl_gene_id' - elif check_symbol(l): - return 'symbol' - elif check_entrez(l): - return 'entrez_id' - else: - sys.exit('Execution aborted:\n' + - 'gene ID type in ' + name + ' not supported. Supported ID'+ - 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n') - -def check_hgnc(l :str) -> bool: - """ - Check if a gene identifier follows the HGNC format. - - Args: - l (str): The gene identifier to check. - - Returns: - bool: True if the gene identifier follows the HGNC format, False otherwise. - """ - if len(l) > 5: - if (l.upper()).startswith('HGNC:'): - return l[5:].isdigit() - else: - return False - else: - return False - -def check_ensembl(l :str) -> bool: - """ - Check if a gene identifier follows the Ensembl format. - - Args: - l (str): The gene identifier to check. - - Returns: - bool: True if the gene identifier follows the Ensembl format, False otherwise. - """ - return l.upper().startswith('ENS') - - -def check_symbol(l :str) -> bool: - """ - Check if a gene identifier follows the symbol format. - - Args: - l (str): The gene identifier to check. - - Returns: - bool: True if the gene identifier follows the symbol format, False otherwise. - """ - if len(l) > 0: - if l[0].isalpha() and l[1:].isalnum(): - return True - else: - return False - else: - return False - -def check_entrez(l :str) -> bool: - """ - Check if a gene identifier follows the Entrez ID format. - - Args: - l (str): The gene identifier to check. - - Returns: - bool: True if the gene identifier follows the Entrez ID format, False otherwise. - """ - if len(l) > 0: - return l.isdigit() - else: - return False - -############################ gene ############################################# -def data_gene(gene: pd.DataFrame, type_gene: str, name: str, gene_custom: Optional[Dict[str, str]]) -> Dict[str, str]: - """ - Process gene data to ensure correct formatting and handle duplicates. - - Args: - gene (DataFrame): DataFrame containing gene data. - type_gene (str): Type of gene data (e.g., 'hugo_id', 'ensembl_gene_id', 'symbol', 'entrez_id'). - name (str): Name of the dataset. - gene_custom (dict or None): Custom gene data dictionary if provided. - - Returns: - dict: A dictionary containing gene data with gene IDs as keys and corresponding values. - """ - - for i in range(len(gene)): - tmp = gene.iloc[i, 0] - gene.iloc[i, 0] = tmp.strip().split('.')[0] - - gene_dup = [item for item, count in - collections.Counter(gene[gene.columns[0]]).items() if count > 1] - pat_dup = [item for item, count in - collections.Counter(list(gene.columns)).items() if count > 1] - - gene_in_rule = None - - if gene_dup: - if gene_custom == None: - - if str(ARGS.rules_selector) == 'HMRcore': - gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/HMRcore_genes.p', 'rb')) - - elif str(ARGS.rules_selector) == 'Recon': - gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/Recon_genes.p', 'rb')) - - elif str(ARGS.rules_selector) == 'ENGRO2': - gene_in_rule = pk.load(open(ARGS.tool_dir + '/local/pickle files/ENGRO2_genes.p', 'rb')) - - utils.logWarning(f"{ARGS.tool_dir}'/local/pickle files/ENGRO2_genes.p'", ARGS.out_log) - - gene_in_rule = gene_in_rule.get(type_gene) - - else: - gene_in_rule = gene_custom - - tmp = [] - for i in gene_dup: - if gene_in_rule.get(i) == 'ok': - tmp.append(i) - if tmp: - sys.exit('Execution aborted because gene ID ' - +str(tmp)+' in '+name+' is duplicated\n') - - if pat_dup: utils.logWarning(f"Warning: duplicated label\n{pat_dup} in {name}", ARGS.out_log) - return (gene.set_index(gene.columns[0])).to_dict() - -############################ resolve ########################################## -def replace_gene_value(l :str, d :str) -> Tuple[Union[int, float], list]: - """ - Replace gene identifiers in a parsed rule expression with values from a dict. - - Args: - l: Parsed rule as a nested list structure (strings, lists, and operators). - d: Dict mapping gene IDs to numeric values. - - Returns: - tuple: (new_expression, not_found_genes) - """ - tmp = [] - err = [] - while l: - if isinstance(l[0], list): - tmp_rules, tmp_err = replace_gene_value(l[0], d) - tmp.append(tmp_rules) - err.extend(tmp_err) - else: - value = replace_gene(l[0], d) - tmp.append(value) - if value == None: - err.append(l[0]) - l = l[1:] - return (tmp, err) - -def replace_gene(l: str, d: Dict[str, Union[int, float]]) -> Union[int, float, None]: - """ - Replace a single gene identifier with its corresponding value from a dictionary. - - Args: - l (str): Gene identifier to replace. - d (dict): Dict mapping gene IDs to numeric values. - - Returns: - float/int/None: Corresponding value from the dictionary if found, None otherwise. - - Raises: - sys.exit: If the value associated with the gene identifier is not valid. - """ - if l =='and' or l == 'or': - return l - else: - value = d.get(l, None) - if not(value == None or isinstance(value, (int, float))): - sys.exit('Execution aborted: ' + value + ' value not valid\n') - return value - -T = TypeVar("T", bound = Optional[Union[int, float]]) -def computes(val1 :T, op :str, val2 :T, cn :bool) -> T: - """ - Compute the RAS value between two value and an operator ('and' or 'or'). - - Args: - val1(Optional(Union[float, int])): First value. - op (str): Operator ('and' or 'or'). - val2(Optional(Union[float, int])): Second value. - cn (bool): Control boolean value. - - Returns: - Optional(Union[float, int]): Result of the computation. - """ - if val1 != None and val2 != None: - if op == 'and': - return min(val1, val2) - else: - return val1 + val2 - elif op == 'and': - if cn is True: - if val1 != None: - return val1 - elif val2 != None: - return val2 - else: - return None - else: - return None - else: - if val1 != None: - return val1 - elif val2 != None: - return val2 - else: - return None - -# ris should be Literal[None] but Literal is not supported in Python 3.7 -def control(ris, l :List[Union[int, float, list]], cn :bool) -> Union[bool, int, float]: #Union[Literal[False], int, float]: - """ - Control the format of the expression. - - Args: - ris: Intermediate result. - l (list): Expression to control. - cn (bool): Control boolean value. - - Returns: - Union[Literal[False], int, float]: Result of the control. - """ - if len(l) == 1: - if isinstance(l[0], (float, int)) or l[0] == None: - return l[0] - elif isinstance(l[0], list): - return control(None, l[0], cn) - else: - return False - elif len(l) > 2: - return control_list(ris, l, cn) - else: - return False - -def control_list(ris, l :List[Optional[Union[float, int, list]]], cn :bool) -> Optional[bool]: #Optional[Literal[False]]: - """ - Control the format of a list of expressions. - - Args: - ris: Intermediate result. - l (list): List of expressions to control. - cn (bool): Control boolean value. - - Returns: - Optional[Literal[False]]: Result of the control. - """ - while l: - if len(l) == 1: - return False - elif (isinstance(l[0], (float, int)) or - l[0] == None) and l[1] in ['and', 'or']: - if isinstance(l[2], (float, int)) or l[2] == None: - ris = computes(l[0], l[1], l[2], cn) - elif isinstance(l[2], list): - tmp = control(None, l[2], cn) - if tmp is False: - return False - else: - ris = computes(l[0], l[1], tmp, cn) - else: - return False - l = l[3:] - elif l[0] in ['and', 'or']: - if isinstance(l[1], (float, int)) or l[1] == None: - ris = computes(ris, l[0], l[1], cn) - elif isinstance(l[1], list): - tmp = control(None,l[1], cn) - if tmp is False: - return False - else: - ris = computes(ris, l[0], tmp, cn) - else: - return False - l = l[2:] - elif isinstance(l[0], list) and l[1] in ['and', 'or']: - if isinstance(l[2], (float, int)) or l[2] == None: - tmp = control(None, l[0], cn) - if tmp is False: - return False - else: - ris = computes(tmp, l[1], l[2], cn) - elif isinstance(l[2], list): - tmp = control(None, l[0], cn) - tmp2 = control(None, l[2], cn) - if tmp is False or tmp2 is False: - return False - else: - ris = computes(tmp, l[1], tmp2, cn) - else: - return False - l = l[3:] - else: - return False - return ris - -ResolvedRules = Dict[str, List[Optional[Union[float, int]]]] -def resolve(genes: Dict[str, str], rules: List[str], ids: List[str], resolve_none: bool, name: str) -> Tuple[Optional[ResolvedRules], Optional[list]]: - """ - Resolve rules using gene data to compute scores for each rule. - - Args: - genes (dict): Dictionary containing gene data with gene IDs as keys and corresponding values. - rules (list): List of rules to resolve. - ids (list): List of IDs corresponding to the rules. - resolve_none (bool): Flag indicating whether to resolve None values in the rules. - name (str): Name of the dataset. - - Returns: - tuple: A tuple containing resolved rules as a dictionary and a list of gene IDs not found in the data. - """ - resolve_rules = {} - not_found = [] - flag = False - for key, value in genes.items(): - tmp_resolve = [] - for i in range(len(rules)): - tmp = rules[i] - if tmp: - tmp, err = replace_gene_value(tmp, value) - if err: - not_found.extend(err) - ris = control(None, tmp, resolve_none) - if ris is False or ris == None: - tmp_resolve.append(None) - else: - tmp_resolve.append(ris) - flag = True - else: - tmp_resolve.append(None) - resolve_rules[key] = tmp_resolve - - if flag is False: - utils.logWarning( - f"Warning: no computable score (due to missing gene values) for class {name}, the class has been disregarded", - ARGS.out_log) - - return (None, None) - - return (resolve_rules, list(set(not_found))) -############################ create_ras ####################################### -def create_ras(resolve_rules: Optional[ResolvedRules], dataset_name: str, rules: List[str], ids: List[str], file: str) -> None: - """ - Create a RAS (Reaction Activity Score) file from resolved rules. - - Args: - resolve_rules (dict): Dictionary containing resolved rules. - dataset_name (str): Name of the dataset. - rules (list): List of rules. - file (str): Path to the output RAS file. - - Returns: - None - """ - if resolve_rules is None: - utils.logWarning(f"Couldn't generate RAS for current dataset: {dataset_name}", ARGS.out_log) - - for geni in resolve_rules.values(): - for i, valori in enumerate(geni): - if valori == None: - geni[i] = 'None' - - output_ras = pd.DataFrame.from_dict(resolve_rules) - - output_ras.insert(0, 'Reactions', ids) - output_to_csv = pd.DataFrame.to_csv(output_ras, sep = '\t', index = False) - - text_file = open(file, "w") - - text_file.write(output_to_csv) - text_file.close() - -################################- NEW RAS COMPUTATION -################################ -Expr = Optional[Union[int, float]] -Ras = Expr -def ras_for_cell_lines(dataset: pd.DataFrame, rules: Dict[str, ruleUtils.OpList]) -> Dict[str, Dict[str, Ras]]: - """ - Generates the RAS scores for each cell line found in the dataset. - - Args: - dataset (pd.DataFrame): Dataset containing gene values. - rules (dict): The dict containing reaction ids as keys and rules as values. - - Note: - Modifies dataset in place by setting the first column as index. - - Returns: - dict: A dictionary where each key corresponds to a cell line name and each value is a dictionary - where each key corresponds to a reaction ID and each value is its computed RAS score. - """ - ras_values_by_cell_line = {} - dataset.set_index(dataset.columns[0], inplace=True) - - for cell_line_name in dataset.columns: #[1:]: - cell_line = dataset[cell_line_name].to_dict() - ras_values_by_cell_line[cell_line_name]= get_ras_values(rules, cell_line) - return ras_values_by_cell_line - -def get_ras_values(value_rules: Dict[str, ruleUtils.OpList], dataset: Dict[str, Expr]) -> Dict[str, Ras]: - """ - Computes the RAS (Reaction Activity Score) values for each rule in the given dict. - - Args: - value_rules (dict): A dictionary where keys are reaction ids and values are OpLists. - dataset : gene expression data of one cell line. - - Returns: - dict: A dictionary where keys are reaction ids and values are the computed RAS values for each rule. - """ - return {key: ras_op_list(op_list, dataset) for key, op_list in value_rules.items()} - -def get_gene_expr(dataset :Dict[str, Expr], name :str) -> Expr: - """ - Extracts the gene expression of the given gene from a cell line dataset. - - Args: - dataset : gene expression data of one cell line. - name : gene name. - - Returns: - Expr : the gene's expression value. - """ - expr = dataset.get(name, None) - if expr is None: ERRORS.append(name) - - return expr - -def ras_op_list(op_list: ruleUtils.OpList, dataset: Dict[str, Expr]) -> Ras: - """ - Computes recursively the RAS (Reaction Activity Score) value for the given OpList, considering the specified flag to control None behavior. - - Args: - op_list (OpList): The OpList representing a rule with gene values. - dataset : gene expression data of one cell line. - - Returns: - Ras: The computed RAS value for the given OpList. - """ - op = op_list.op - ras_value :Ras = None - if not op: return get_gene_expr(dataset, op_list[0]) - if op is ruleUtils.RuleOp.AND and not ARGS.none and None in op_list: return None - - for i in range(len(op_list)): - item = op_list[i] - if isinstance(item, ruleUtils.OpList): - item = ras_op_list(item, dataset) - - else: - item = get_gene_expr(dataset, item) - - if item is None: - if op is ruleUtils.RuleOp.AND and not ARGS.none: return None - continue - - if ras_value is None: - ras_value = item - else: - ras_value = ras_value + item if op is ruleUtils.RuleOp.OR else min(ras_value, item) - - return ras_value - -def save_as_tsv(rasScores: Dict[str, Dict[str, Ras]], reactions :List[str]) -> None: - """ - Save computed RAS scores to ARGS.ras_output as a TSV file. - - Args: - rasScores : the computed ras scores. - reactions : the list of reaction IDs, used as the first column. - - Returns: - None - """ - for scores in rasScores.values(): # this is actually a lot faster than using the ootb dataframe metod, sadly - for reactId, score in scores.items(): - if score is None: scores[reactId] = "None" - - output_ras = pd.DataFrame.from_dict(rasScores) - output_ras.insert(0, 'Reactions', reactions) - output_ras.to_csv(ARGS.ras_output, sep = '\t', index = False) - -############################ MAIN ############################################# -#TODO: not used but keep, it will be when the new translator dicts will be used. -def translateGene(geneName :str, encoding :str, geneTranslator :Dict[str, Dict[str, str]]) -> str: - """ - Translate gene from any supported encoding to HugoID. - - Args: - geneName (str): the name of the gene in its current encoding. - encoding (str): the encoding. - geneTranslator (Dict[str, Dict[str, str]]): the dict containing all supported gene names - and encodings in the current model, mapping each to the corresponding HugoID encoding. - - Raises: - ValueError: When the gene isn't supported in the model. - - Returns: - str: the gene in HugoID encoding. - """ - supportedGenesInEncoding = geneTranslator[encoding] - if geneName in supportedGenesInEncoding: return supportedGenesInEncoding[geneName] - raise ValueError(f"Gene '{geneName}' not found. Please verify you are using the correct model.") - -def load_custom_rules() -> Dict[str, ruleUtils.OpList]: - """ - Opens custom rules file and extracts the rules. If the file is in .csv format an additional parsing step will be - performed, significantly impacting the runtime. - - Returns: - Dict[str, ruleUtils.OpList] : dict mapping reaction IDs to rules. - """ - datFilePath = utils.FilePath.fromStrPath(ARGS.model_upload) # actual file, stored in Galaxy as a .dat - - dict_rule = {} - - try: - rows = utils.readCsv(datFilePath, delimiter = "\t", skipHeader=False) - if len(rows) <= 1: - raise ValueError("Model tabular with 1 column is not supported.") - - if not rows: - raise ValueError("Model tabular is file is empty.") - - id_idx, idx_gpr = utils.findIdxByName(rows[0], "GPR") - - # First, try using a tab delimiter - for line in rows[1:]: - if len(line) <= idx_gpr: - utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) - continue - - if line[idx_gpr] == "": - dict_rule[line[id_idx]] = ruleUtils.OpList([""]) - else: - dict_rule[line[id_idx]] = ruleUtils.parseRuleToNestedList(line[idx_gpr]) - - except Exception as e: - # If parsing with tabs fails, try comma delimiter - try: - rows = utils.readCsv(datFilePath, delimiter = ",", skipHeader=False) - - if len(rows) <= 1: - raise ValueError("Model tabular with 1 column is not supported.") - - if not rows: - raise ValueError("Model tabular is file is empty.") - - id_idx, idx_gpr = utils.findIdxByName(rows[0], "GPR") - - # Try again parsing row content with the GPR column using comma-separated values - for line in rows[1:]: - if len(line) <= idx_gpr: - utils.logWarning(f"Skipping malformed line: {line}", ARGS.out_log) - continue - - if line[idx_gpr] == "": - dict_rule[line[id_idx]] = ruleUtils.OpList([""]) - else: - dict_rule[line[id_idx]] = ruleUtils.parseRuleToNestedList(line[idx_gpr]) - - except Exception as e2: - raise ValueError(f"Unable to parse rules file. Tried both tab and comma delimiters. Original errors: Tab: {e}, Comma: {e2}") - - if not dict_rule: - raise ValueError("No valid rules found in the uploaded file. Please check the file format.") - # csv rules need to be parsed, those in a pickle format are taken to be pre-parsed. - return dict_rule - - -def main(args:List[str] = None) -> None: - """ - Initializes everything and sets the program in motion based on the fronted input arguments. - - Returns: - None - """ - # get args from frontend (related xml) - global ARGS - ARGS = process_args(args) - - # read dataset - dataset = read_dataset(ARGS.input, "dataset") - dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) - - # remove versioning from gene names - dataset.iloc[:, 0] = dataset.iloc[:, 0].str.split('.').str[0] - - rules = load_custom_rules() - reactions = list(rules.keys()) - - save_as_tsv(ras_for_cell_lines(dataset, rules), reactions) - if ERRORS: utils.logWarning( - f"The following genes are mentioned in the rules but don't appear in the dataset: {ERRORS}", - ARGS.out_log) - - - print("Execution succeeded") - -############################################################################### -if __name__ == "__main__": - main()
--- a/COBRAxy/ras_generator_beta.xml Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ -<tool id="MaREA RAS Generator - Beta" name="Expression2RAS - BETA" version="2.0.0"> - <description>- Reaction Activity Scores computation</description> - <macros> - <import>marea_macros.xml</import> - </macros> - <requirements> - <requirement type="package" version="1.24.4">numpy</requirement> - <requirement type="package" version="2.0.3">pandas</requirement> - <requirement type="package" version="5.2.2">lxml</requirement> - <requirement type="package" version="0.29.0">cobra</requirement> - </requirements> - <command detect_errors="exit_code"> - <![CDATA[ - python $__tool_directory__/ras_generator_beta.py - --tool_dir $__tool_directory__ - --model_upload $model_upload - --model_upload_name $model_upload.element_identifier - --input $input - --none $none - - --out_log $log - --ras_output $ras_output - - ]]> - </command> - <inputs> - <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular" - label="Model file:" help="Upload a CSV/TSV file containing the information generated by the Model Initialization tool." /> - <param name="input" argument="--input" type="data" format="tabular,csv,tsv" label="Gene Expression dataset:" /> - <param name="name" argument="--name" type="text" label="Dataset's name:" value="Dataset_RAS" - help="Default: Dataset_RAS. Do not use white spaces or special symbols." /> - <param name="none" argument="--none" type="boolean" checked="true" label="(A and NaN) solved as (A)?" /> - </inputs> - - <outputs> - <data format="txt" name="log" label="Expression2RAS - $name - Log" /> - <data format="tabular" name="ras_output" label='$name'/> - </outputs> - - <help> -<![CDATA[ - -What it does -------------- - -This tool computes Reaction Activity Scores from gene expression (RNA-seq) dataset(s), as described in Graudenzi et al. Integration of transcriptomic data and metabolic networks in cancer samples reveals highly significant prognostic power. Journal of Biomedical Informatics, 2018, 87: 37-49. - -Accepted files: - - A gene expression dataset - -Format: -Tab-separated text file reporting the normalized expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). All values must be positive to correctly compute the RAS. -Column header: sample ID. -Row header: gene ID. - - -Optional files: - - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats: - - * (Cobra Toolbox and CobraPy compliant) xml of metabolic model; - * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2). - -Computation option ‘(A and NaN) solved as (A)’: -In case of missing expression value, referred to as NaN (Not a Number), for a gene joined with an AND operator in a given GPR rule, the rule ‘A and NaN’ - -If YES is selected: the GPR will be solved as A. - -If NO is selected: the GPR will be disregarded tout-court (i.e., treated as NaN). - -Example input -------------- - -Custom GPR rules: - -+------------+--------------------------------------+ -| id | rule (with entrez-id | -+============+======================================+ -| r1642 | 155060 or 10357 | -+------------+--------------------------------------+ -| r1643 | 155060 or 100134869 | -+------------+--------------------------------------+ -| r1640 | 155060 and 100134869 or 10357 | -+------------+--------------------------------------+ - -RNA-seq dataset: - -+------------+----------------+----------------+----------------+ -| Hugo_ID | TCGAA62670 | TCGAA62671 | TCGAA62672 | -+============+================+================+================+ -| HGNC:24086 | 0.523167 | 0.371355 | 0.925661 | -+------------+----------------+----------------+----------------+ -| HGNC:24086 | 0.568765 | 0.765567 | 0.456789 | -+------------+----------------+----------------+----------------+ -| HGNC:9876 | 0.876545 | 0.768933 | 0.987654 | -+------------+----------------+----------------+----------------+ -| HGNC:9 | 0.456788 | 0.876543 | 0.876542 | -+------------+----------------+----------------+----------------+ -| HGNC:23 | 0.876543 | 0.786543 | 0.897654 | -+------------+----------------+----------------+----------------+ - -]]> - </help> -<expand macro="citations" /> -</tool> - -
--- a/COBRAxy/ras_to_bounds.py Tue Sep 23 13:48:24 2025 +0000 +++ b/COBRAxy/ras_to_bounds.py Mon Sep 29 10:33:26 2025 +0000 @@ -1,13 +1,24 @@ +""" +Apply RAS-based scaling to reaction bounds and optionally save updated models. + +Workflow: +- Read one or more RAS matrices (patients/samples x reactions) +- Normalize and merge them, optionally adding class suffixes to sample IDs +- Build a COBRA model from a tabular CSV +- Run FVA to initialize bounds, then scale per-sample based on RAS values +- Save bounds per sample and optionally export updated models in chosen formats +""" import argparse import utils.general_utils as utils -from typing import Optional, List +from typing import Optional, Dict, Set, List, Tuple, Union import os import numpy as np import pandas as pd import cobra +from cobra import Model import sys -import csv from joblib import Parallel, delayed, cpu_count +import utils.model_utils as modelUtils ################################# process args ############################### def process_args(args :List[str] = None) -> argparse.Namespace: @@ -23,23 +34,9 @@ parser = argparse.ArgumentParser(usage = '%(prog)s [options]', description = 'process some value\'s') - parser.add_argument( - '-ms', '--model_selector', - type = utils.Model, default = utils.Model.ENGRO2, choices = [utils.Model.ENGRO2, utils.Model.Custom], - help = 'chose which type of model you want use') - parser.add_argument("-mo", "--model", type = str, + parser.add_argument("-mo", "--model_upload", type = str, help = "path to input file with custom rules, if provided") - - parser.add_argument("-mn", "--model_name", type = str, help = "custom mode name") - - parser.add_argument( - '-mes', '--medium_selector', - default = "allOpen", - help = 'chose which type of medium you want use') - - parser.add_argument("-meo", "--medium", type = str, - help = "path to input file with custom medium, if provided") parser.add_argument('-ol', '--out_log', help = "Output log") @@ -57,11 +54,6 @@ parser.add_argument('-rn', '--name', type=str, help = 'ras class names') - - parser.add_argument('-rs', '--ras_selector', - required = True, - type=utils.Bool("using_RAS"), - help = 'ras selector') parser.add_argument('-cc', '--cell_class', type = str, @@ -72,6 +64,21 @@ default='ras_to_bounds/', help = 'output path for maps') + parser.add_argument('-sm', '--save_models', + type=utils.Bool("save_models"), + default=False, + help = 'whether to save models with applied bounds') + + parser.add_argument('-smp', '--save_models_path', + type = str, + default='saved_models/', + help = 'output path for saved models') + + parser.add_argument('-smf', '--save_models_format', + type = str, + default='csv', + help = 'format for saved models (csv, xml, json, mat, yaml, tabular)') + ARGS = parser.parse_args(args) return ARGS @@ -87,8 +94,9 @@ Returns: None """ - with open(ARGS.out_log, 'a') as log: - log.write(s + "\n\n") + if ARGS.out_log: + with open(ARGS.out_log, 'a') as log: + log.write(s + "\n\n") print(s) ############################ dataset input #################################### @@ -143,7 +151,100 @@ new_bounds.loc[reaction, "upper_bound"] = valMax return new_bounds -def process_ras_cell(cellName, ras_row, model, rxns_ids, output_folder): + +def save_model(model, filename, output_folder, file_format='csv'): + """ + Save a COBRA model to file in the specified format. + + Args: + model (cobra.Model): The model to save. + filename (str): Base filename (without extension). + output_folder (str): Output directory. + file_format (str): File format ('xml', 'json', 'mat', 'yaml', 'tabular', 'csv'). + + Returns: + None + """ + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + try: + if file_format == 'tabular' or file_format == 'csv': + # Special handling for tabular format using utils functions + filepath = os.path.join(output_folder, f"{filename}.csv") + + rules = modelUtils.generate_rules(model, asParsed = False) + reactions = modelUtils.generate_reactions(model, asParsed = False) + bounds = modelUtils.generate_bounds(model) + medium = modelUtils.get_medium(model) + + try: + compartments = modelUtils.generate_compartments(model) + except: + compartments = None + + df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"]) + df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"]) + df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"}) + df_medium = medium.rename(columns = {"reaction": "ReactionID"}) + df_medium["InMedium"] = True + + merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer") + merged = merged.merge(df_bounds, on = "ReactionID", how = "outer") + + # Add compartments only if they exist and model name is ENGRO2 + if compartments is not None and hasattr(ARGS, 'name') and ARGS.name == "ENGRO2": + merged = merged.merge(compartments, on = "ReactionID", how = "outer") + + merged = merged.merge(df_medium, on = "ReactionID", how = "left") + merged["InMedium"] = merged["InMedium"].fillna(False) + merged = merged.sort_values(by = "InMedium", ascending = False) + + merged.to_csv(filepath, sep="\t", index=False) + + else: + # Standard COBRA formats + filepath = os.path.join(output_folder, f"{filename}.{file_format}") + + if file_format == 'xml': + cobra.io.write_sbml_model(model, filepath) + elif file_format == 'json': + cobra.io.save_json_model(model, filepath) + elif file_format == 'mat': + cobra.io.save_matlab_model(model, filepath) + elif file_format == 'yaml': + cobra.io.save_yaml_model(model, filepath) + else: + raise ValueError(f"Unsupported format: {file_format}") + + print(f"Model saved: {filepath}") + + except Exception as e: + warning(f"Error saving model {filename}: {str(e)}") + +def apply_bounds_to_model(model, bounds): + """ + Apply bounds from a DataFrame to a COBRA model. + + Args: + model (cobra.Model): The metabolic model to modify. + bounds (pd.DataFrame): DataFrame with reaction bounds. + + Returns: + cobra.Model: Modified model with new bounds. + """ + model_copy = model.copy() + for reaction_id in bounds.index: + try: + reaction = model_copy.reactions.get_by_id(reaction_id) + reaction.lower_bound = bounds.loc[reaction_id, "lower_bound"] + reaction.upper_bound = bounds.loc[reaction_id, "upper_bound"] + except KeyError: + # Reaction not found in model, skip + continue + return model_copy + +def process_ras_cell(cellName, ras_row, model, rxns_ids, output_folder, save_models=False, save_models_path='saved_models/', save_models_format='csv'): """ Process a single RAS cell, apply bounds, and save the bounds to a CSV file. @@ -153,6 +254,9 @@ model (cobra.Model): The metabolic model to be modified. rxns_ids (list of str): List of reaction IDs to which the scaling factors will be applied. output_folder (str): Folder path where the output CSV file will be saved. + save_models (bool): Whether to save models with applied bounds. + save_models_path (str): Path where to save models. + save_models_format (str): Format for saved models. Returns: None @@ -160,32 +264,30 @@ bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) new_bounds = apply_ras_bounds(bounds, ras_row) new_bounds.to_csv(output_folder + cellName + ".csv", sep='\t', index=True) - pass + + # Save model if requested + if save_models: + modified_model = apply_bounds_to_model(model, new_bounds) + save_model(modified_model, cellName, save_models_path, save_models_format) + + return -def generate_bounds(model: cobra.Model, medium: dict, ras=None, output_folder='output/') -> pd.DataFrame: +def generate_bounds_model(model: cobra.Model, ras=None, output_folder='output/', save_models=False, save_models_path='saved_models/', save_models_format='csv') -> pd.DataFrame: """ Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments. Args: model (cobra.Model): The metabolic model for which bounds will be generated. - medium (dict): A dictionary where keys are reaction IDs and values are the medium conditions. ras (pd.DataFrame, optional): RAS pandas dataframe. Defaults to None. output_folder (str, optional): Folder path where output CSV files will be saved. Defaults to 'output/'. + save_models (bool): Whether to save models with applied bounds. + save_models_path (str): Path where to save models. + save_models_format (str): Format for saved models. Returns: pd.DataFrame: DataFrame containing the bounds of reactions in the model. """ - rxns_ids = [rxn.id for rxn in model.reactions] - - # Set all reactions to zero in the medium - for rxn_id, _ in model.medium.items(): - model.reactions.get_by_id(rxn_id).lower_bound = float(0.0) - - # Set medium conditions - for reaction, value in medium.items(): - if value is not None: - model.reactions.get_by_id(reaction).lower_bound = -float(value) - + rxns_ids = [rxn.id for rxn in model.reactions] # Perform Flux Variability Analysis (FVA) on this medium df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8) @@ -196,19 +298,18 @@ model.reactions.get_by_id(reaction).upper_bound = float(df_FVA.loc[reaction, "maximum"]) if ras is not None: - Parallel(n_jobs=cpu_count())(delayed(process_ras_cell)(cellName, ras_row, model, rxns_ids, output_folder) for cellName, ras_row in ras.iterrows()) + Parallel(n_jobs=cpu_count())(delayed(process_ras_cell)( + cellName, ras_row, model, rxns_ids, output_folder, + save_models, save_models_path, save_models_format + ) for cellName, ras_row in ras.iterrows()) else: - bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) - newBounds = apply_ras_bounds(bounds, pd.Series([1]*len(rxns_ids), index=rxns_ids)) - newBounds.to_csv(output_folder + "bounds.csv", sep='\t', index=True) - pass - - + raise ValueError("RAS DataFrame is None. Cannot generate bounds without RAS data.") + return ############################# main ########################################### def main(args:List[str] = None) -> None: """ - Initializes everything and sets the program in motion based on the fronted input arguments. + Initialize and execute RAS-to-bounds pipeline based on the frontend input arguments. Returns: None @@ -216,71 +317,60 @@ if not os.path.exists('ras_to_bounds'): os.makedirs('ras_to_bounds') - global ARGS ARGS = process_args(args) - if(ARGS.ras_selector == True): - ras_file_list = ARGS.input_ras.split(",") - ras_file_names = ARGS.name.split(",") - if len(ras_file_names) != len(set(ras_file_names)): - error_message = "Duplicated file names in the uploaded RAS matrices." - warning(error_message) - raise ValueError(error_message) - pass - ras_class_names = [] - for file in ras_file_names: - ras_class_names.append(file.rsplit(".", 1)[0]) - ras_list = [] - class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"]) - for ras_matrix, ras_class_name in zip(ras_file_list, ras_class_names): - ras = read_dataset(ras_matrix, "ras dataset") - ras.replace("None", None, inplace=True) - ras.set_index("Reactions", drop=True, inplace=True) - ras = ras.T - ras = ras.astype(float) - if(len(ras_file_list)>1): - #append class name to patient id (dataframe index) - ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index] - else: - ras.index = [f"{idx}" for idx in ras.index] - ras_list.append(ras) - for patient_id in ras.index: - class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name] + + ras_file_list = ARGS.input_ras.split(",") + ras_file_names = ARGS.name.split(",") + if len(ras_file_names) != len(set(ras_file_names)): + error_message = "Duplicated file names in the uploaded RAS matrices." + warning(error_message) + raise ValueError(error_message) + ras_class_names = [] + for file in ras_file_names: + ras_class_names.append(file.rsplit(".", 1)[0]) + ras_list = [] + class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"]) + for ras_matrix, ras_class_name in zip(ras_file_list, ras_class_names): + ras = read_dataset(ras_matrix, "ras dataset") + ras.replace("None", None, inplace=True) + ras.set_index("Reactions", drop=True, inplace=True) + ras = ras.T + ras = ras.astype(float) + if(len(ras_file_list)>1): + # Append class name to patient id (DataFrame index) + ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index] + else: + ras.index = [f"{idx}" for idx in ras.index] + ras_list.append(ras) + for patient_id in ras.index: + class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name] + - # Concatenate all ras DataFrames into a single DataFrame + # Concatenate all RAS DataFrames into a single DataFrame ras_combined = pd.concat(ras_list, axis=0) - # Normalize the RAS values by max RAS + # Normalize RAS values column-wise by max RAS ras_combined = ras_combined.div(ras_combined.max(axis=0)) ras_combined.dropna(axis=1, how='all', inplace=True) + model = modelUtils.build_cobra_model_from_csv(ARGS.model_upload) - - model_type :utils.Model = ARGS.model_selector - if model_type is utils.Model.Custom: - model = model_type.getCOBRAmodel(customPath = utils.FilePath.fromStrPath(ARGS.model), customExtension = utils.FilePath.fromStrPath(ARGS.model_name).ext) - else: - model = model_type.getCOBRAmodel(toolDir=ARGS.tool_dir) + validation = modelUtils.validate_model(model) + + print("\n=== MODEL VALIDATION ===") + for key, value in validation.items(): + print(f"{key}: {value}") - if(ARGS.medium_selector == "Custom"): - medium = read_dataset(ARGS.medium, "medium dataset") - medium.set_index(medium.columns[0], inplace=True) - medium = medium.astype(float) - medium = medium[medium.columns[0]].to_dict() - else: - df_mediums = pd.read_csv(ARGS.tool_dir + "/local/medium/medium.csv", index_col = 0) - ARGS.medium_selector = ARGS.medium_selector.replace("_", " ") - medium = df_mediums[[ARGS.medium_selector]] - medium = medium[ARGS.medium_selector].to_dict() - if(ARGS.ras_selector == True): - generate_bounds(model, medium, ras = ras_combined, output_folder=ARGS.output_path) - class_assignments.to_csv(ARGS.cell_class, sep = '\t', index = False) - else: - generate_bounds(model, medium, output_folder=ARGS.output_path) + generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path, + save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, + save_models_format=ARGS.save_models_format) + class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False) - pass + + return ############################################################################## if __name__ == "__main__":
--- a/COBRAxy/ras_to_bounds.xml Tue Sep 23 13:48:24 2025 +0000 +++ b/COBRAxy/ras_to_bounds.xml Mon Sep 29 10:33:26 2025 +0000 @@ -16,52 +16,32 @@ <![CDATA[ python $__tool_directory__/ras_to_bounds.py --tool_dir $__tool_directory__ - --model_selector $cond_model.model_selector --cell_class $cell_class - #if $cond_model.model_selector == 'Custom' - --model $model - --model_name $model.element_identifier - #end if - --medium_selector $cond_medium.medium_selector - #if $cond_medium.medium_selector == 'Custom' - --medium $medium - #end if - --ras_selector $cond_ras.ras_choice + --model_upload $model_upload #set $names = "" - #if $cond_ras.ras_choice == "True" - --input_ras "${",".join(map(str, $cond_ras.input_ras))}" - #for $input_temp in $cond_ras.input_ras: - #set $names = $names + $input_temp.element_identifier + "," - #end for - #end if + --input_ras "${",".join(map(str, $input_ras))}" + #for $input_temp in $input_ras: + #set $names = $names + $input_temp.element_identifier + "," + #end for + + --save_models $save_models + --save_models_path saved_models/ --name "$names" --out_log $log ]]> </command> <inputs> - <conditional name="cond_model"> - <expand macro="options_ras_to_bounds_model"/> - <when value="Custom"> - <param name="model" argument="--model" type="data" format="json, xml" label="Custom model" /> - </when> - </conditional> + + <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular" multiple="false" + label="Model tabular file:" help="Upload a CSV/TSV file containing information generated by the Model Initialization tool." /> + + <param name="input_ras" argument="--input_ras" multiple="true" type="data" format="tabular, csv, tsv" label="RAS matrix:" /> - <conditional name="cond_ras"> - <param name="ras_choice" argument="--ras_choice" type="select" label="Do want to use RAS?"> - <option value="True" selected="true">Yes</option> - <option value="False">No</option> - </param> - <when value="True"> - <param name="input_ras" argument="--input_ras" multiple="true" type="data" format="tabular, csv, tsv" label="RAS matrix:" /> - </when> - </conditional> - - <conditional name="cond_medium"> - <expand macro="options_ras_to_bounds_medium"/> - <when value="Custom"> - <param name="medium" argument="--medium" type="data" format="tabular, csv, tsv" label="Custom medium" /> - </when> - </conditional> + + <param name="save_models" argument="--save_models" type="select" label="Save models with applied bounds?"> + <option value="False" selected="true">No</option> + <option value="True">Yes</option> + </param> </inputs> @@ -71,7 +51,10 @@ <collection name="ras_to_bounds" type="list" label="Ras to Bounds"> <discover_datasets name = "collection" pattern="__name_and_ext__" directory="ras_to_bounds"/> </collection> - + <collection name="saved_models" type="list" label="Saved Models (Tabular Format)"> + <filter>save_models == "True"</filter> + <discover_datasets name = "saved_models_collection" pattern="__name_and_ext__" directory="saved_models"/> + </collection> </outputs> <help> @@ -81,28 +64,14 @@ What it does ------------- -This tool generates the reactions bounds for a given metabolic model (JSON or XML format) both with and without the use of the Reaction Activity Scores (RAS) matrix generated by RAS generator. -Moreover, it enables to use custom/pre-defined growth mediums to constrain exchange reactions. For a custom medium, It is suggested to use the template file returned by the Custom Data Generator tool. -If the RAS matrix, generated by the RAS generator tool, is used, then a bounds file is generated for each cell. Otherwise, a single bounds file is returned. -By default, all reactions in model.medium that are not present in the medium file have lower bound set to 0.0 and not set to the default model value. +This tool generates the reaction bounds for a given tabular model created by the Metabolic Model Setting Tool and the Reaction Activity Scores (RAS) matrix generated by the RAS Generator. + Accepted files: - - A model: JSON or XML file reporting reactions and rules contained in the model. + - A tabular model: tab-separated file containing information about a metabolic model. - RAS matrix: tab-separated RAS file as returned by RAS generator. Multiple RAS files having different file name can be uploaded too (e.g. one RAS matrix for normal cells and one for cancer cells). Note that if multiple RAs matrices are uploaded, the bounds are normalzed across all cells. - - Medium: tab-separated file containing lower and upper-bounds of medium reactions. - -Example of custum growth medium file: - -+------------+----------------+----------------+ -| Reaction ID| lower_bound | upper_bound | -+============+================+================+ -| r1 | 0.123167 | 0.371355 | -+------------+----------------+----------------+ -| r2 | 0.268765 | 0.765567 | -+------------+----------------+----------------+ - - + Example for multiple RAS matrices: - cancer.csv and normal.csv generated by RAS generator tool (the two class names are 'cancer' and 'normal'). - This tool returns one unique collection of bounds files for both cancer and normal cells (normalization is performed across all cells). @@ -112,10 +81,13 @@ ------------- The tool generates: - - bounds: reporting the bounds of the model, or cells if RAS is used. Format: tab-separated. - - Classes: a file containing the class of each cell (only if multiple RAS matrices were uploaded). The class name of a RAS matrix corresponds to its file name. Format: tab-separated. + - A collection of tab files, one for each sample. Each file contains the lower and upper bounds computed from the RAS values and the FVA, used to perform flux sampling or optimization. + - Classes: a file containing the class of each sample. The class name of a RAS matrix corresponds to its file name. Format: tab-separated. - a log file (.txt). ]]> </help> <expand macro="citations" /> -</tool> \ No newline at end of file + +</tool> + +
--- a/COBRAxy/ras_to_bounds_beta.py Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,377 +0,0 @@ -""" -Apply RAS-based scaling to reaction bounds and optionally save updated models. - -Workflow: -- Read one or more RAS matrices (patients/samples x reactions) -- Normalize and merge them, optionally adding class suffixes to sample IDs -- Build a COBRA model from a tabular CSV -- Run FVA to initialize bounds, then scale per-sample based on RAS values -- Save bounds per sample and optionally export updated models in chosen formats -""" -import argparse -import utils.general_utils as utils -from typing import Optional, Dict, Set, List, Tuple, Union -import os -import numpy as np -import pandas as pd -import cobra -from cobra import Model -import sys -from joblib import Parallel, delayed, cpu_count -import utils.model_utils as modelUtils - -################################# process args ############################### -def process_args(args :List[str] = None) -> argparse.Namespace: - """ - Processes command-line arguments. - - Args: - args (list): List of command-line arguments. - - Returns: - Namespace: An object containing parsed arguments. - """ - parser = argparse.ArgumentParser(usage = '%(prog)s [options]', - description = 'process some value\'s') - - - parser.add_argument("-mo", "--model_upload", type = str, - help = "path to input file with custom rules, if provided") - - parser.add_argument('-ol', '--out_log', - help = "Output log") - - parser.add_argument('-td', '--tool_dir', - type = str, - required = True, - help = 'your tool directory') - - parser.add_argument('-ir', '--input_ras', - type=str, - required = False, - help = 'input ras') - - parser.add_argument('-rn', '--name', - type=str, - help = 'ras class names') - - parser.add_argument('-cc', '--cell_class', - type = str, - help = 'output of cell class') - parser.add_argument( - '-idop', '--output_path', - type = str, - default='ras_to_bounds/', - help = 'output path for maps') - - parser.add_argument('-sm', '--save_models', - type=utils.Bool("save_models"), - default=False, - help = 'whether to save models with applied bounds') - - parser.add_argument('-smp', '--save_models_path', - type = str, - default='saved_models/', - help = 'output path for saved models') - - parser.add_argument('-smf', '--save_models_format', - type = str, - default='csv', - help = 'format for saved models (csv, xml, json, mat, yaml, tabular)') - - - ARGS = parser.parse_args(args) - return ARGS - -########################### warning ########################################### -def warning(s :str) -> None: - """ - Log a warning message to an output log file and print it to the console. - - Args: - s (str): The warning message to be logged and printed. - - Returns: - None - """ - if ARGS.out_log: - with open(ARGS.out_log, 'a') as log: - log.write(s + "\n\n") - print(s) - -############################ dataset input #################################### -def read_dataset(data :str, name :str) -> pd.DataFrame: - """ - Read a dataset from a CSV file and return it as a pandas DataFrame. - - Args: - data (str): Path to the CSV file containing the dataset. - name (str): Name of the dataset, used in error messages. - - Returns: - pandas.DataFrame: DataFrame containing the dataset. - - Raises: - pd.errors.EmptyDataError: If the CSV file is empty. - sys.exit: If the CSV file has the wrong format, the execution is aborted. - """ - try: - dataset = pd.read_csv(data, sep = '\t', header = 0, engine='python') - except pd.errors.EmptyDataError: - sys.exit('Execution aborted: wrong format of ' + name + '\n') - if len(dataset.columns) < 2: - sys.exit('Execution aborted: wrong format of ' + name + '\n') - return dataset - - -def apply_ras_bounds(bounds, ras_row): - """ - Adjust the bounds of reactions in the model based on RAS values. - - Args: - bounds (pd.DataFrame): Model bounds. - ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds. - Returns: - new_bounds (pd.DataFrame): integrated bounds. - """ - new_bounds = bounds.copy() - for reaction in ras_row.index: - scaling_factor = ras_row[reaction] - if not np.isnan(scaling_factor): - lower_bound=bounds.loc[reaction, "lower_bound"] - upper_bound=bounds.loc[reaction, "upper_bound"] - valMax=float((upper_bound)*scaling_factor) - valMin=float((lower_bound)*scaling_factor) - if upper_bound!=0 and lower_bound==0: - new_bounds.loc[reaction, "upper_bound"] = valMax - if upper_bound==0 and lower_bound!=0: - new_bounds.loc[reaction, "lower_bound"] = valMin - if upper_bound!=0 and lower_bound!=0: - new_bounds.loc[reaction, "lower_bound"] = valMin - new_bounds.loc[reaction, "upper_bound"] = valMax - return new_bounds - - -def save_model(model, filename, output_folder, file_format='csv'): - """ - Save a COBRA model to file in the specified format. - - Args: - model (cobra.Model): The model to save. - filename (str): Base filename (without extension). - output_folder (str): Output directory. - file_format (str): File format ('xml', 'json', 'mat', 'yaml', 'tabular', 'csv'). - - Returns: - None - """ - if not os.path.exists(output_folder): - os.makedirs(output_folder) - - try: - if file_format == 'tabular' or file_format == 'csv': - # Special handling for tabular format using utils functions - filepath = os.path.join(output_folder, f"{filename}.csv") - - rules = modelUtils.generate_rules(model, asParsed = False) - reactions = modelUtils.generate_reactions(model, asParsed = False) - bounds = modelUtils.generate_bounds(model) - medium = modelUtils.get_medium(model) - - try: - compartments = modelUtils.generate_compartments(model) - except: - compartments = None - - df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"]) - df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"]) - df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"}) - df_medium = medium.rename(columns = {"reaction": "ReactionID"}) - df_medium["InMedium"] = True - - merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer") - merged = merged.merge(df_bounds, on = "ReactionID", how = "outer") - - # Add compartments only if they exist and model name is ENGRO2 - if compartments is not None and hasattr(ARGS, 'name') and ARGS.name == "ENGRO2": - merged = merged.merge(compartments, on = "ReactionID", how = "outer") - - merged = merged.merge(df_medium, on = "ReactionID", how = "left") - merged["InMedium"] = merged["InMedium"].fillna(False) - merged = merged.sort_values(by = "InMedium", ascending = False) - - merged.to_csv(filepath, sep="\t", index=False) - - else: - # Standard COBRA formats - filepath = os.path.join(output_folder, f"{filename}.{file_format}") - - if file_format == 'xml': - cobra.io.write_sbml_model(model, filepath) - elif file_format == 'json': - cobra.io.save_json_model(model, filepath) - elif file_format == 'mat': - cobra.io.save_matlab_model(model, filepath) - elif file_format == 'yaml': - cobra.io.save_yaml_model(model, filepath) - else: - raise ValueError(f"Unsupported format: {file_format}") - - print(f"Model saved: {filepath}") - - except Exception as e: - warning(f"Error saving model {filename}: {str(e)}") - -def apply_bounds_to_model(model, bounds): - """ - Apply bounds from a DataFrame to a COBRA model. - - Args: - model (cobra.Model): The metabolic model to modify. - bounds (pd.DataFrame): DataFrame with reaction bounds. - - Returns: - cobra.Model: Modified model with new bounds. - """ - model_copy = model.copy() - for reaction_id in bounds.index: - try: - reaction = model_copy.reactions.get_by_id(reaction_id) - reaction.lower_bound = bounds.loc[reaction_id, "lower_bound"] - reaction.upper_bound = bounds.loc[reaction_id, "upper_bound"] - except KeyError: - # Reaction not found in model, skip - continue - return model_copy - -def process_ras_cell(cellName, ras_row, model, rxns_ids, output_folder, save_models=False, save_models_path='saved_models/', save_models_format='csv'): - """ - Process a single RAS cell, apply bounds, and save the bounds to a CSV file. - - Args: - cellName (str): The name of the RAS cell (used for naming the output file). - ras_row (pd.Series): A row from a RAS DataFrame containing scaling factors for reaction bounds. - model (cobra.Model): The metabolic model to be modified. - rxns_ids (list of str): List of reaction IDs to which the scaling factors will be applied. - output_folder (str): Folder path where the output CSV file will be saved. - save_models (bool): Whether to save models with applied bounds. - save_models_path (str): Path where to save models. - save_models_format (str): Format for saved models. - - Returns: - None - """ - bounds = pd.DataFrame([(rxn.lower_bound, rxn.upper_bound) for rxn in model.reactions], index=rxns_ids, columns=["lower_bound", "upper_bound"]) - new_bounds = apply_ras_bounds(bounds, ras_row) - new_bounds.to_csv(output_folder + cellName + ".csv", sep='\t', index=True) - - # Save model if requested - if save_models: - modified_model = apply_bounds_to_model(model, new_bounds) - save_model(modified_model, cellName, save_models_path, save_models_format) - - return - -def generate_bounds_model(model: cobra.Model, ras=None, output_folder='output/', save_models=False, save_models_path='saved_models/', save_models_format='csv') -> pd.DataFrame: - """ - Generate reaction bounds for a metabolic model based on medium conditions and optional RAS adjustments. - - Args: - model (cobra.Model): The metabolic model for which bounds will be generated. - ras (pd.DataFrame, optional): RAS pandas dataframe. Defaults to None. - output_folder (str, optional): Folder path where output CSV files will be saved. Defaults to 'output/'. - save_models (bool): Whether to save models with applied bounds. - save_models_path (str): Path where to save models. - save_models_format (str): Format for saved models. - - Returns: - pd.DataFrame: DataFrame containing the bounds of reactions in the model. - """ - rxns_ids = [rxn.id for rxn in model.reactions] - - # Perform Flux Variability Analysis (FVA) on this medium - df_FVA = cobra.flux_analysis.flux_variability_analysis(model, fraction_of_optimum=0, processes=1).round(8) - - # Set FVA bounds - for reaction in rxns_ids: - model.reactions.get_by_id(reaction).lower_bound = float(df_FVA.loc[reaction, "minimum"]) - model.reactions.get_by_id(reaction).upper_bound = float(df_FVA.loc[reaction, "maximum"]) - - if ras is not None: - Parallel(n_jobs=cpu_count())(delayed(process_ras_cell)( - cellName, ras_row, model, rxns_ids, output_folder, - save_models, save_models_path, save_models_format - ) for cellName, ras_row in ras.iterrows()) - else: - raise ValueError("RAS DataFrame is None. Cannot generate bounds without RAS data.") - return - -############################# main ########################################### -def main(args:List[str] = None) -> None: - """ - Initialize and execute RAS-to-bounds pipeline based on the frontend input arguments. - - Returns: - None - """ - if not os.path.exists('ras_to_bounds'): - os.makedirs('ras_to_bounds') - - global ARGS - ARGS = process_args(args) - - - ras_file_list = ARGS.input_ras.split(",") - ras_file_names = ARGS.name.split(",") - if len(ras_file_names) != len(set(ras_file_names)): - error_message = "Duplicated file names in the uploaded RAS matrices." - warning(error_message) - raise ValueError(error_message) - - ras_class_names = [] - for file in ras_file_names: - ras_class_names.append(file.rsplit(".", 1)[0]) - ras_list = [] - class_assignments = pd.DataFrame(columns=["Patient_ID", "Class"]) - for ras_matrix, ras_class_name in zip(ras_file_list, ras_class_names): - ras = read_dataset(ras_matrix, "ras dataset") - ras.replace("None", None, inplace=True) - ras.set_index("Reactions", drop=True, inplace=True) - ras = ras.T - ras = ras.astype(float) - if(len(ras_file_list)>1): - # Append class name to patient id (DataFrame index) - ras.index = [f"{idx}_{ras_class_name}" for idx in ras.index] - else: - ras.index = [f"{idx}" for idx in ras.index] - ras_list.append(ras) - for patient_id in ras.index: - class_assignments.loc[class_assignments.shape[0]] = [patient_id, ras_class_name] - - - # Concatenate all RAS DataFrames into a single DataFrame - ras_combined = pd.concat(ras_list, axis=0) - # Normalize RAS values column-wise by max RAS - ras_combined = ras_combined.div(ras_combined.max(axis=0)) - ras_combined.dropna(axis=1, how='all', inplace=True) - - model = modelUtils.build_cobra_model_from_csv(ARGS.model_upload) - - validation = modelUtils.validate_model(model) - - print("\n=== MODEL VALIDATION ===") - for key, value in validation.items(): - print(f"{key}: {value}") - - - generate_bounds_model(model, ras=ras_combined, output_folder=ARGS.output_path, - save_models=ARGS.save_models, save_models_path=ARGS.save_models_path, - save_models_format=ARGS.save_models_format) - class_assignments.to_csv(ARGS.cell_class, sep='\t', index=False) - - - return - -############################################################################## -if __name__ == "__main__": - main() \ No newline at end of file
--- a/COBRAxy/ras_to_bounds_beta.xml Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,93 +0,0 @@ -<tool id="MaREA RAS to bounds - Beta" name="RAStoBounds - BETA" version="2.0.0"> - - <macros> - <import>marea_macros.xml</import> - </macros> - - <requirements> - <requirement type="package" version="1.24.4">numpy</requirement> - <requirement type="package" version="2.0.3">pandas</requirement> - <requirement type="package" version="0.29.0">cobra</requirement> - <requirement type="package" version="5.2.2">lxml</requirement> - <requirement type="package" version="1.4.2">joblib</requirement> - </requirements> - - <command detect_errors="exit_code"> - <![CDATA[ - python $__tool_directory__/ras_to_bounds_beta.py - --tool_dir $__tool_directory__ - --cell_class $cell_class - --model_upload $model_upload - #set $names = "" - --input_ras "${",".join(map(str, $input_ras))}" - #for $input_temp in $input_ras: - #set $names = $names + $input_temp.element_identifier + "," - #end for - - --save_models $save_models - --save_models_path saved_models/ - --name "$names" - --out_log $log - ]]> - </command> - <inputs> - - <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular" multiple="false" - label="Model tabular file:" help="Upload a CSV/TSV file containing information generated by the Model Initialization tool." /> - - <param name="input_ras" argument="--input_ras" multiple="true" type="data" format="tabular, csv, tsv" label="RAS matrix:" /> - - - <param name="save_models" argument="--save_models" type="select" label="Save models with applied bounds?"> - <option value="False" selected="true">No</option> - <option value="True">Yes</option> - </param> - - </inputs> - - <outputs> - <data format="txt" name="log" label="RAStoBounds- Log" /> - <data format="tabular" name="cell_class" label="RAStoBounds - Cells class" /> - <collection name="ras_to_bounds" type="list" label="Ras to Bounds"> - <discover_datasets name = "collection" pattern="__name_and_ext__" directory="ras_to_bounds"/> - </collection> - <collection name="saved_models" type="list" label="Saved Models (Tabular Format)"> - <filter>save_models == "True"</filter> - <discover_datasets name = "saved_models_collection" pattern="__name_and_ext__" directory="saved_models"/> - </collection> - </outputs> - - <help> - - <![CDATA[ - -What it does -------------- - -This tool generates the reaction bounds for a given tabular model created by the Metabolic Model Setting Tool and the Reaction Activity Scores (RAS) matrix generated by the RAS Generator. - - -Accepted files: - - A tabular model: tab-separated file containing information about a metabolic model. - - RAS matrix: tab-separated RAS file as returned by RAS generator. Multiple RAS files having different file name can be uploaded too (e.g. one RAS matrix for normal cells and one for cancer cells). Note that if multiple RAs matrices are uploaded, the bounds are normalzed across all cells. - - -Example for multiple RAS matrices: - - cancer.csv and normal.csv generated by RAS generator tool (the two class names are 'cancer' and 'normal'). - - This tool returns one unique collection of bounds files for both cancer and normal cells (normalization is performed across all cells). - - The association cell-class is reported in the 'cell_class' file that is useful to perform flux enrichment analysis based on class partenrship. - -Output: -------------- - -The tool generates: - - A collection of tab files, one for each sample. Each file contains the lower and upper bounds computed from the RAS values and the FVA, used to perform flux sampling or optimization. - - Classes: a file containing the class of each sample. The class name of a RAS matrix corresponds to its file name. Format: tab-separated. - - a log file (.txt). - ]]> - </help> - <expand macro="citations" /> - -</tool> - -
--- a/COBRAxy/rps_generator.py Tue Sep 23 13:48:24 2025 +0000 +++ b/COBRAxy/rps_generator.py Mon Sep 29 10:33:26 2025 +0000 @@ -1,3 +1,10 @@ +""" +Compute Reaction Propensity Scores (RPS) from metabolite abundances and reaction stoichiometry. + +Given a tabular dataset (metabolites x samples) and a reaction set, this script +maps metabolite names via synonyms, fills missing metabolites, and computes RPS +per reaction for each sample using a log-normalized formula. +""" import math import argparse @@ -22,17 +29,14 @@ Returns: Namespace: An object containing parsed arguments. """ - parser = argparse.ArgumentParser(usage = '%(prog)s [options]', - description = 'process some value\'s'+ - ' abundances and reactions to create RPS scores.') - parser.add_argument('-rc', '--reaction_choice', - type = str, - default = 'default', - choices = ['default','custom'], - help = 'chose which type of reaction dataset you want use') - parser.add_argument('-cm', '--custom', - type = str, - help='your dataset if you want custom reactions') + parser = argparse.ArgumentParser( + usage='%(prog)s [options]', + description='Process abundances and reactions to create RPS scores.' + ) + + parser.add_argument("-rl", "--model_upload", type = str, + help = "path to input file containing the reactions") + parser.add_argument('-td', '--tool_dir', type = str, required = True, @@ -57,8 +61,8 @@ Produces a unique name for a dataset based on what was provided by the user. The default name for any dataset is "Dataset", thus if the user didn't change it this function appends f"_{count}" to make it unique. Args: - name_data : name associated with the dataset (from frontend input params) - count : counter from 1 to make these names unique (external) + name_data: Name associated with the dataset (from frontend input params). + count: Counter starting at 1 to make names unique when default. Returns: str : the name made unique @@ -84,7 +88,7 @@ Returns None if the cell index is invalid. """ if cell_line_index < 0 or cell_line_index >= len(dataset.index): - print(f"Errore: This cell line index: '{cell_line_index}' is not valid.") + print(f"Error: cell line index '{cell_line_index}' is not valid.") return None cell_line_name = dataset.columns[cell_line_index] @@ -121,7 +125,8 @@ """ name = clean_metabolite_name(name) for id, synonyms in syn_dict.items(): - if name in synonyms: return id + if name in synonyms: + return id return "" @@ -131,7 +136,8 @@ Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly. Parameters: - reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values. + reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and + stoichiometric coefficients as values. dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines. cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites. @@ -139,7 +145,7 @@ list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1. Side effects: - dataset_by_rows : mut + dataset_by_rows: mutated to include missing metabolites with default abundances. """ missing_list = [] for reaction in reactions.values(): @@ -199,23 +205,27 @@ Returns: None """ + cell_lines = dataset[0][1:] abundances_dict = {} - translationIsApplied = ARGS.reaction_choice == "default" for row in dataset[1:]: - id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0] - if id: abundances_dict[id] = list(map(utils.Float(), row[1:])) - + id = get_metabolite_id(row[0], syn_dict) + if id: + abundances_dict[id] = list(map(utils.Float(), row[1:])) + missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines))) - + rps_scores :Dict[Dict[str, float]] = {} for pos, cell_line_name in enumerate(cell_lines): abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() } + rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable) df = pd.DataFrame.from_dict(rps_scores) - + df = df.loc[list(reactions.keys()),:] + # Optional preview: first 10 rows + # print(df.head(10)) df.index.name = 'Reactions' df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True) @@ -230,29 +240,44 @@ global ARGS ARGS = process_args(args) - # TODO:use utils functions vvv + # Load support data (black list and synonyms) with open(ARGS.tool_dir + '/local/pickle files/black_list.pickle', 'rb') as bl: black_list = pk.load(bl) with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd: syn_dict = pk.load(sd) - dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False) + dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader=False) + tmp_dict = None - if ARGS.reaction_choice == 'default': - reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb')) - substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb')) - - elif ARGS.reaction_choice == 'custom': - reactions = reactionUtils.parse_custom_reactions(ARGS.custom) - substrateFreqTable = {} - for _, substrates in reactions.items(): - for substrateName, _ in substrates.items(): - if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 - substrateFreqTable[substrateName] += 1 + # Parse custom reactions from uploaded file + reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload) + for r, s in reactions.items(): + tmp_list = list(s.keys()) + for k in tmp_list: + if k[-2] == '_': + s[k[:-2]] = s.pop(k) + substrateFreqTable = {} + for _, substrates in reactions.items(): + for substrateName, _ in substrates.items(): + if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 + substrateFreqTable[substrateName] += 1 + + # Debug prints (can be enabled during troubleshooting) + # print(f"Reactions: {reactions}") + # print(f"Substrate Frequencies: {substrateFreqTable}") + # print(f"Synonyms: {syn_dict}") + tmp_dict = {} + for metabName, freq in substrateFreqTable.items(): + tmp_metabName = clean_metabolite_name(metabName) + for syn_key, syn_list in syn_dict.items(): + if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key): + # print(f"Mapping {tmp_metabName} to {syn_key}") + tmp_dict[syn_key] = syn_list + tmp_dict[syn_key].append(tmp_metabName) rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable) - print('Execution succeded') + print('Execution succeeded') ############################################################################## if __name__ == "__main__": main()
--- a/COBRAxy/rps_generator.xml Tue Sep 23 13:48:24 2025 +0000 +++ b/COBRAxy/rps_generator.xml Mon Sep 29 10:33:26 2025 +0000 @@ -12,29 +12,19 @@ <command detect_errors="exit_code"> <![CDATA[ python $__tool_directory__/rps_generator.py - --input $input - --reaction_choice $cond_reactions.reaction_choice + --input $input --tool_dir $__tool_directory__ --out_log $log --rps_output $rps_output - #if $cond_reactions.reaction_choice == 'custom' - --custom $cond_reactions.Custom_react - #end if + --model_upload $model_upload ]]> </command> - <inputs> + <inputs> + <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular" + label="Model rules file:" help="Upload a CSV/TSV file containing reaction rules generated by the Model Initialization tool." /> + <param name="input" argument="--input" type="data" format="tabular, tsv, csv" label="Abundance dataset:" /> <param name="name" argument="--name" type="text" label="Dataset's name:" value="Dataset_RPS" help="Default: Dataset_RPS. Do not use white spaces or special symbols." /> - - <conditional name="cond_reactions"> - <param name="reaction_choice" argument="--reaction_choice" type="select" label="Choose reaction dataset:"> - <option value="default" selected="true">ENGRO2 reaction dataset </option> - <option value="custom">Custom reaction dataset</option> - </param> - <when value="custom"> - <param name="Custom_react" type="data" format="csv" label="Custom reactions" /> - </when> - </conditional> </inputs> <outputs>
--- a/COBRAxy/rps_generator_beta.py Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,283 +0,0 @@ -""" -Compute Reaction Propensity Scores (RPS) from metabolite abundances and reaction stoichiometry. - -Given a tabular dataset (metabolites x samples) and a reaction set, this script -maps metabolite names via synonyms, fills missing metabolites, and computes RPS -per reaction for each sample using a log-normalized formula. -""" -import math -import argparse - -import numpy as np -import pickle as pk -import pandas as pd - -from typing import Optional, List, Dict - -import utils.general_utils as utils -import utils.reaction_parsing as reactionUtils - -########################## argparse ########################################## -ARGS :argparse.Namespace -def process_args(args:List[str] = None) -> argparse.Namespace: - """ - Processes command-line arguments. - - Args: - args (list): List of command-line arguments. - - Returns: - Namespace: An object containing parsed arguments. - """ - parser = argparse.ArgumentParser( - usage='%(prog)s [options]', - description='Process abundances and reactions to create RPS scores.' - ) - - parser.add_argument("-rl", "--model_upload", type = str, - help = "path to input file containing the reactions") - - parser.add_argument('-td', '--tool_dir', - type = str, - required = True, - help = 'your tool directory') - parser.add_argument('-ol', '--out_log', - help = "Output log") - parser.add_argument('-id', '--input', - type = str, - required = True, - help = 'input dataset') - parser.add_argument('-rp', '--rps_output', - type = str, - required = True, - help = 'rps output') - - args = parser.parse_args(args) - return args - -############################ dataset name ##################################### -def name_dataset(name_data :str, count :int) -> str: - """ - Produces a unique name for a dataset based on what was provided by the user. The default name for any dataset is "Dataset", thus if the user didn't change it this function appends f"_{count}" to make it unique. - - Args: - name_data: Name associated with the dataset (from frontend input params). - count: Counter starting at 1 to make names unique when default. - - Returns: - str : the name made unique - """ - if str(name_data) == 'Dataset': - return str(name_data) + '_' + str(count) - else: - return str(name_data) - -############################ get_abund_data #################################### -def get_abund_data(dataset: pd.DataFrame, cell_line_index:int) -> Optional[pd.Series]: - """ - Extracts abundance data and turns it into a series for a specific cell line from the dataset, which rows are - metabolites and columns are cell lines. - - Args: - dataset (pandas.DataFrame): The DataFrame containing abundance data for all cell lines and metabolites. - cell_line_index (int): The index of the cell line of interest in the dataset. - - Returns: - pd.Series or None: A series containing abundance values for the specified cell line. - The name of the series is the name of the cell line. - Returns None if the cell index is invalid. - """ - if cell_line_index < 0 or cell_line_index >= len(dataset.index): - print(f"Error: cell line index '{cell_line_index}' is not valid.") - return None - - cell_line_name = dataset.columns[cell_line_index] - abundances_series = dataset[cell_line_name][1:] - - return abundances_series - -############################ clean_metabolite_name #################################### -def clean_metabolite_name(name :str) -> str: - """ - Removes some characters from a metabolite's name, provided as input, and makes it lowercase in order to simplify - the search of a match in the dictionary of synonyms. - - Args: - name : the metabolite's name, as given in the dataset. - - Returns: - str : a new string with the cleaned name. - """ - return "".join(ch for ch in name if ch not in ",;-_'([{ }])").lower() - -############################ get_metabolite_id #################################### -def get_metabolite_id(name :str, syn_dict :Dict[str, List[str]]) -> str: - """ - Looks through a dictionary of synonyms to find a match for a given metabolite's name. - - Args: - name : the metabolite's name, as given in the dataset. - syn_dict : the dictionary of synonyms, using unique identifiers as keys and lists of clean synonyms as values. - - Returns: - str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use. - An empty string is returned if a match isn't found. - """ - name = clean_metabolite_name(name) - for id, synonyms in syn_dict.items(): - if name in synonyms: - return id - - return "" - -############################ check_missing_metab #################################### -def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]: - """ - Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly. - - Parameters: - reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and - stoichiometric coefficients as values. - dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines. - cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites. - - Returns: - list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1. - - Side effects: - dataset_by_rows: mutated to include missing metabolites with default abundances. - """ - missing_list = [] - for reaction in reactions.values(): - for metabolite in reaction.keys(): - if metabolite not in dataset_by_rows: - dataset_by_rows[metabolite] = [1] * cell_lines_amt - missing_list.append(metabolite) - - return missing_list - -############################ calculate_rps #################################### -def calculate_rps(reactions: Dict[str, Dict[str, int]], abundances: Dict[str, float], black_list: List[str], missing_list: List[str], substrateFreqTable: Dict[str, int]) -> Dict[str, float]: - """ - Calculate the Reaction Propensity scores (RPS) based on the availability of reaction substrates, for (ideally) each input model reaction and for each sample. - The score is computed as the product of the concentrations of the reacting substances, with each concentration raised to a power equal to its stoichiometric coefficient - for each reaction using the provided coefficient and abundance values. The value is then normalized, based on how frequent the metabolite is in the selected model's reactions, - and log-transformed. - - Parameters: - reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values. - abundances (dict): A dictionary representing metabolite abundances where keys are metabolite names and values are their corresponding abundances. - black_list (list): A list containing metabolite names that should be excluded from the RPS calculation. - missing_list (list): A list containing metabolite names that were missing in the original abundances dictionary and thus their values were set to 1. - substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value). - - Returns: - dict: A dictionary containing Reaction Propensity Scores (RPS) where keys are reaction names and values are the corresponding RPS scores. - """ - rps_scores = {} - - for reaction_name, substrates in reactions.items(): - total_contribution = 0 - metab_significant = False - for metabolite, stoichiometry in substrates.items(): - abundance = 1 if math.isnan(abundances[metabolite]) else abundances[metabolite] - if metabolite not in black_list and metabolite not in missing_list: - metab_significant = True - - total_contribution += math.log((abundance + np.finfo(float).eps) / substrateFreqTable[metabolite]) * stoichiometry - - rps_scores[reaction_name] = total_contribution if metab_significant else math.nan - - return rps_scores - -############################ rps_for_cell_lines #################################### -def rps_for_cell_lines(dataset: List[List[str]], reactions: Dict[str, Dict[str, int]], black_list: List[str], syn_dict: Dict[str, List[str]], substrateFreqTable: Dict[str, int]) -> None: - """ - Calculate Reaction Propensity Scores (RPS) for each cell line represented in the dataframe and creates an output file. - - Parameters: - dataset : the dataset's data, by rows - reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values. - black_list (list): A list containing metabolite names that should be excluded from the RPS calculation. - syn_dict (dict): A dictionary where keys are general metabolite names and values are lists of possible synonyms. - substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value). - - Returns: - None - """ - - cell_lines = dataset[0][1:] - abundances_dict = {} - - for row in dataset[1:]: - id = get_metabolite_id(row[0], syn_dict) - if id: - abundances_dict[id] = list(map(utils.Float(), row[1:])) - - missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines))) - - rps_scores :Dict[Dict[str, float]] = {} - for pos, cell_line_name in enumerate(cell_lines): - abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() } - - rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable) - - df = pd.DataFrame.from_dict(rps_scores) - df = df.loc[list(reactions.keys()),:] - # Optional preview: first 10 rows - # print(df.head(10)) - df.index.name = 'Reactions' - df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True) - -############################ main #################################### -def main(args:List[str] = None) -> None: - """ - Initializes everything and sets the program in motion based on the fronted input arguments. - - Returns: - None - """ - global ARGS - ARGS = process_args(args) - - # Load support data (black list and synonyms) - with open(ARGS.tool_dir + '/local/pickle files/black_list.pickle', 'rb') as bl: - black_list = pk.load(bl) - - with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd: - syn_dict = pk.load(sd) - - dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader=False) - tmp_dict = None - - # Parse custom reactions from uploaded file - reactions = reactionUtils.parse_custom_reactions(ARGS.model_upload) - for r, s in reactions.items(): - tmp_list = list(s.keys()) - for k in tmp_list: - if k[-2] == '_': - s[k[:-2]] = s.pop(k) - substrateFreqTable = {} - for _, substrates in reactions.items(): - for substrateName, _ in substrates.items(): - if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0 - substrateFreqTable[substrateName] += 1 - - # Debug prints (can be enabled during troubleshooting) - # print(f"Reactions: {reactions}") - # print(f"Substrate Frequencies: {substrateFreqTable}") - # print(f"Synonyms: {syn_dict}") - tmp_dict = {} - for metabName, freq in substrateFreqTable.items(): - tmp_metabName = clean_metabolite_name(metabName) - for syn_key, syn_list in syn_dict.items(): - if tmp_metabName in syn_list or tmp_metabName == clean_metabolite_name(syn_key): - # print(f"Mapping {tmp_metabName} to {syn_key}") - tmp_dict[syn_key] = syn_list - tmp_dict[syn_key].append(tmp_metabName) - - rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable) - print('Execution succeeded') - -############################################################################## -if __name__ == "__main__": main()
--- a/COBRAxy/rps_generator_beta.xml Tue Sep 23 13:48:24 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -<tool id="MaREA RPS Generator - Beta" name="Expression2RPS - BETA" version="2.0.0"> - <description>- Reaction Propensity Scores computation</description> - <macros> - <import>marea_macros.xml</import> - </macros> - <requirements> - <requirement type="package" version="1.24.4">numpy</requirement> - <requirement type="package" version="2.0.3">pandas</requirement> - <requirement type="package" version="5.2.2">lxml</requirement> - <requirement type="package" version="0.29.0">cobra</requirement> - </requirements> - <command detect_errors="exit_code"> - <![CDATA[ - python $__tool_directory__/rps_generator_beta.py - --input $input - --tool_dir $__tool_directory__ - --out_log $log - --rps_output $rps_output - --model_upload $model_upload - ]]> - </command> - <inputs> - <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular" - label="Model rules file:" help="Upload a CSV/TSV file containing reaction rules generated by the Model Initialization tool." /> - - <param name="input" argument="--input" type="data" format="tabular, tsv, csv" label="Abundance dataset:" /> - <param name="name" argument="--name" type="text" label="Dataset's name:" value="Dataset_RPS" help="Default: Dataset_RPS. Do not use white spaces or special symbols." /> - </inputs> - - <outputs> - <data format="txt" name="log" label="Expression2RPS - $name - Log" /> - <data format="tabular" name="rps_output" label="$name"/> - </outputs> - - <help> -<![CDATA[ - -What it does -------------- - -This tool computes Reaction Propensity Scores based on the availability of reaction substrates, for (ideally) each input model reaction and for each sample. -The score is computed as the product of the concentrations of the reacting substances, with each concentration raised to a power equal to its stoichiometric coefficient. According to themass action law, the rate of any chemical reaction is indeed proportional to this product. -This assumption holds as long as the substrate is in significant excess over the enzyme constant KM. -If a metabolite is either missing in the model provided with respect to its reactions or it is present in our "black list", the RPS score is set to NaN. -This "black list" of metabolites contains those substrates that are present in too many reactions to be significant. It is defined in the file black_list.pickle and cannot be modified by the user. - -Accepted files: - - An abundance dataset: Tab-separated text file reporting the abundance value of each metabolite for each cell line in the dataset. - Column header: cell line ID. - Row header: metabolite ID. - - -Optional files: - - Custom reaction dataset: .csv file specifying for each reaction ID the corresponding formula. - First column: reaction ID - Second column: reaction formula. - - -Output: -------------- - -The tool generates: - - a tab-separated file(.csv): reporting the RPS values for each reaction and each cell line in the dataset. - - a log file (.txt). -]]> - </help> -<expand macro="citations" /> -</tool> -
