comparison COBRAxy/ras_generator.py @ 512:f32d3c9089fc draft

Uploaded
author francesco_lapi
date Wed, 08 Oct 2025 12:05:27 +0000
parents c17c6c9d112c
children b02cfa3b36dd
comparison
equal deleted inserted replaced
511:0cb727788cae 512:f32d3c9089fc
295 # read dataset and remove versioning from gene names 295 # read dataset and remove versioning from gene names
296 dataset = read_dataset(ARGS.input, "dataset") 296 dataset = read_dataset(ARGS.input, "dataset")
297 orig_gene_list=dataset.index.copy() 297 orig_gene_list=dataset.index.copy()
298 dataset.index = [str(el.split(".")[0]) for el in dataset.index] 298 dataset.index = [str(el.split(".")[0]) for el in dataset.index]
299 299
300 if any(dataset.index.duplicated(keep=False)):
301 list_str=", ".join(orig_gene_list[dataset.index.duplicated(keep=False)])
302 raise ValueError(f"ERROR: Duplicate entries in the gene dataset. The following genes are duplicated: "+list_str)
303
304 #load GPR rules 300 #load GPR rules
305 rules = load_custom_rules() 301 rules = load_custom_rules()
306 302
307 #create a list of all the gpr 303 #create a list of all the gpr
308 rules_total_string="" 304 rules_total_string=""
309 for id,rule in rules.items(): 305 for id,rule in rules.items():
310 rules_total_string+=rule.replace("(","").replace(")","") + " " 306 rules_total_string+=rule.replace("(","").replace(")","") + " "
311 rules_total_string=list(set(rules_total_string.split(" "))) 307 rules_total_string=list(set(rules_total_string.split(" ")))
308
309 if any(dataset.index.duplicated(keep=False)):
310 genes_duplicates=orig_gene_list[dataset.index.duplicated(keep=False)]
311 genes_duplicates_in_model=[elem for elem in genes_duplicates if elem in rules_total_string]
312 if len(genes_duplicates_in_model)>0:#metabolic genes have duplicated entries in the dataset
313 list_str=", ".join(genes_duplicates_in_model)
314 raise ValueError(f"ERROR: Duplicate entries in the gene dataset present in one or more GPR. The following metabolic genes are duplicated: "+list_str)
312 315
313 #check if nan value must be ignored in the GPR 316 #check if nan value must be ignored in the GPR
314 if ARGS.none: 317 if ARGS.none:
315 # #e.g. (A or nan --> A) 318 # #e.g. (A or nan --> A)
316 ignore_nan = True 319 ignore_nan = True