Mercurial > repos > bimib > cobraxy
comparison COBRAxy/custom_data_generator.py @ 343:4165326259cc draft
Uploaded
author | francesco_lapi |
---|---|
date | Thu, 04 Sep 2025 22:09:42 +0000 |
parents | 3fca9b568faf |
children | 25862d166f88 |
comparison
equal
deleted
inserted
replaced
342:057909a104d9 | 343:4165326259cc |
---|---|
8 import utils.rule_parsing as rulesUtils | 8 import utils.rule_parsing as rulesUtils |
9 from typing import Optional, Tuple, Union, List, Dict | 9 from typing import Optional, Tuple, Union, List, Dict |
10 import utils.reaction_parsing as reactionUtils | 10 import utils.reaction_parsing as reactionUtils |
11 | 11 |
12 ARGS : argparse.Namespace | 12 ARGS : argparse.Namespace |
13 def process_args(args:List[str] = None) -> argparse.Namespace: | 13 def process_args(args: List[str] = None) -> argparse.Namespace: |
14 """ | 14 """ |
15 Interfaces the script of a module with its frontend, making the user's choices for | 15 Parse command-line arguments for CustomDataGenerator. |
16 various parameters available as values in code. | 16 """ |
17 | 17 |
18 Args: | |
19 args : Always obtained (in file) from sys.argv | |
20 | |
21 Returns: | |
22 Namespace : An object containing the parsed arguments | |
23 """ | |
24 parser = argparse.ArgumentParser( | 18 parser = argparse.ArgumentParser( |
25 usage = "%(prog)s [options]", | 19 usage="%(prog)s [options]", |
26 description = "generate custom data from a given model") | 20 description="Generate custom data from a given model" |
27 | 21 ) |
28 parser.add_argument("-ol", "--out_log", type = str, required = True, help = "Output log") | 22 |
29 | 23 parser.add_argument("--out_log", type=str, required=True, |
30 parser.add_argument("-orules", "--out_rules", type = str, required = True, help = "Output rules") | 24 help="Output log file") |
31 parser.add_argument("-orxns", "--out_reactions", type = str, required = True, help = "Output reactions") | 25 parser.add_argument("--out_data", type=str, required=True, |
32 parser.add_argument("-omedium", "--out_medium", type = str, required = True, help = "Output medium") | 26 help="Single output dataset (CSV or Excel)") |
33 parser.add_argument("-obnds", "--out_bounds", type = str, required = True, help = "Output bounds") | 27 |
34 | 28 parser.add_argument("--model", type=str, |
35 parser.add_argument("-id", "--input", type = str, required = True, help = "Input model") | 29 help="Built-in model identifier (e.g., ENGRO2, Recon, HMRcore)") |
36 parser.add_argument("-mn", "--name", type = str, required = True, help = "Input model name") | 30 parser.add_argument("--input", type=str, |
37 # ^ I need this because galaxy converts my files into .dat but I need to know what extension they were in | 31 help="Custom model file (JSON or XML)") |
38 parser.add_argument('-idop', '--output_path', type = str, default='result', help = 'output path for maps') | 32 parser.add_argument("--name", type=str, required=True, |
39 argsNamespace = parser.parse_args(args) | 33 help="Model name (default or custom)") |
40 # ^ can't get this one to work from xml, there doesn't seem to be a way to get the directory attribute from the collection | 34 |
41 | 35 parser.add_argument("--medium_selector", type=str, required=True, |
42 return argsNamespace | 36 help="Medium selection option (default/custom)") |
37 parser.add_argument("--medium", type=str, | |
38 help="Custom medium file if medium_selector=Custom") | |
39 | |
40 parser.add_argument("--output_format", type=str, choices=["tabular", "xlsx"], required=True, | |
41 help="Output format: CSV (tabular) or Excel (xlsx)") | |
42 | |
43 parser.add_argument('-idop', '--output_path', type = str, default='result', | |
44 help = 'output path for the result files (default: result)') | |
45 | |
46 | |
47 return parser.parse_args(args) | |
43 | 48 |
44 ################################- INPUT DATA LOADING -################################ | 49 ################################- INPUT DATA LOADING -################################ |
45 def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model: | 50 def load_custom_model(file_path :utils.FilePath, ext :Optional[utils.FileFormat] = None) -> cobra.Model: |
46 """ | 51 """ |
47 Loads a custom model from a file, either in JSON or XML format. | 52 Loads a custom model from a file, either in JSON or XML format. |
193 # get args from frontend (related xml) | 198 # get args from frontend (related xml) |
194 global ARGS | 199 global ARGS |
195 ARGS = process_args(args) | 200 ARGS = process_args(args) |
196 | 201 |
197 # this is the worst thing I've seen so far, congrats to the former MaREA devs for suggesting this! | 202 # this is the worst thing I've seen so far, congrats to the former MaREA devs for suggesting this! |
198 if os.path.isdir(ARGS.output_path) == False: os.makedirs(ARGS.output_path) | 203 if os.path.isdir(ARGS.output_path) == False: |
199 | 204 os.makedirs(ARGS.output_path) |
200 # load custom model | 205 |
201 model = load_custom_model( | 206 if ARGS.input != "None": |
202 utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext) | 207 # load custom model |
208 model = load_custom_model( | |
209 utils.FilePath.fromStrPath(ARGS.input), utils.FilePath.fromStrPath(ARGS.name).ext) | |
210 else: | |
211 # load built-in model | |
212 | |
213 try: | |
214 model_enum = utils.Model[ARGS.model] # e.g., Model['ENGRO2'] | |
215 except KeyError: | |
216 raise utils.ArgsErr("model", "one of Recon/ENGRO2/HMRcore/Custom_model", ARGS.model) | |
217 | |
218 # Load built-in model (Model.getCOBRAmodel uses tool_dir to locate local models) | |
219 try: | |
220 model = model_enum.getCOBRAmodel(toolDir='../../COBRAxy') | |
221 except Exception as e: | |
222 # Wrap/normalize load errors as DataErr for consistency | |
223 raise utils.DataErr(ARGS.model, f"failed loading built-in model: {e}") | |
224 | |
225 # Determine final model name: explicit --name overrides, otherwise use the model id | |
226 model_name = ARGS.name if ARGS.name else ARGS.model | |
203 | 227 |
204 # generate data | 228 # generate data |
205 rules = generate_rules(model, asParsed = False) | 229 rules = generate_rules(model, asParsed = False) |
206 reactions = generate_reactions(model, asParsed = False) | 230 reactions = generate_reactions(model, asParsed = False) |
207 bounds = generate_bounds(model) | 231 bounds = generate_bounds(model) |
208 medium = get_medium(model) | 232 medium = get_medium(model) |
209 | 233 |
210 # save files out of collection: path coming from xml | 234 df_rules = pd.DataFrame(list(rules.items()), columns = ["ReactionID", "Rule"]) |
211 save_as_csv(rules, ARGS.out_rules, ("ReactionID", "Rule")) | 235 df_reactions = pd.DataFrame(list(reactions.items()), columns = ["ReactionID", "Reaction"]) |
212 save_as_csv(reactions, ARGS.out_reactions, ("ReactionID", "Reaction")) | 236 |
213 bounds.to_csv(ARGS.out_bounds, sep = '\t') | 237 df_bounds = bounds.reset_index().rename(columns = {"index": "ReactionID"}) |
214 medium.to_csv(ARGS.out_medium, sep = '\t') | 238 df_medium = medium.rename(columns = {"reaction": "ReactionID"}) |
239 df_medium["InMedium"] = True # flag per indicare la presenza nel medium | |
240 | |
241 merged = df_reactions.merge(df_rules, on = "ReactionID", how = "outer") | |
242 merged = merged.merge(df_bounds, on = "ReactionID", how = "outer") | |
243 | |
244 merged = merged.merge(df_medium, on = "ReactionID", how = "left") | |
245 | |
246 merged["InMedium"] = merged["InMedium"].fillna(False) | |
247 | |
248 merged = merged.sort_values(by = "InMedium", ascending = False) | |
249 | |
250 out_file = os.path.join(ARGS.output_path, f"{os.path.basename(ARGS.name).split('.')[0]}_custom_data") | |
251 | |
252 #merged.to_csv(out_file, sep = '\t', index = False) | |
253 | |
254 | |
255 #### | |
256 out_data_path = out_file #ARGS.out_data | |
257 | |
258 # If Galaxy provided a .dat name, ensure a correct extension according to output_format | |
259 if ARGS.output_format == "xlsx": | |
260 if not out_data_path.lower().endswith(".xlsx"): | |
261 out_data_path = out_data_path + ".xlsx" | |
262 merged.to_excel(out_data_path, index=False) | |
263 else: | |
264 # 'tabular' -> tab-separated, extension .csv is fine and common for Galaxy tabular | |
265 if not (out_data_path.lower().endswith(".csv") or out_data_path.lower().endswith(".tsv")): | |
266 out_data_path = out_data_path + ".csv" | |
267 merged.to_csv(out_data_path, sep="\t", index=False) | |
268 | |
215 | 269 |
216 if __name__ == '__main__': | 270 if __name__ == '__main__': |
217 main() | 271 main() |