Mercurial > repos > bimib > cobraxy
comparison COBRAxy/ras_generator.py @ 512:f32d3c9089fc draft
Uploaded
author | francesco_lapi |
---|---|
date | Wed, 08 Oct 2025 12:05:27 +0000 |
parents | c17c6c9d112c |
children | b02cfa3b36dd |
comparison
equal
deleted
inserted
replaced
511:0cb727788cae | 512:f32d3c9089fc |
---|---|
295 # read dataset and remove versioning from gene names | 295 # read dataset and remove versioning from gene names |
296 dataset = read_dataset(ARGS.input, "dataset") | 296 dataset = read_dataset(ARGS.input, "dataset") |
297 orig_gene_list=dataset.index.copy() | 297 orig_gene_list=dataset.index.copy() |
298 dataset.index = [str(el.split(".")[0]) for el in dataset.index] | 298 dataset.index = [str(el.split(".")[0]) for el in dataset.index] |
299 | 299 |
300 if any(dataset.index.duplicated(keep=False)): | |
301 list_str=", ".join(orig_gene_list[dataset.index.duplicated(keep=False)]) | |
302 raise ValueError(f"ERROR: Duplicate entries in the gene dataset. The following genes are duplicated: "+list_str) | |
303 | |
304 #load GPR rules | 300 #load GPR rules |
305 rules = load_custom_rules() | 301 rules = load_custom_rules() |
306 | 302 |
307 #create a list of all the gpr | 303 #create a list of all the gpr |
308 rules_total_string="" | 304 rules_total_string="" |
309 for id,rule in rules.items(): | 305 for id,rule in rules.items(): |
310 rules_total_string+=rule.replace("(","").replace(")","") + " " | 306 rules_total_string+=rule.replace("(","").replace(")","") + " " |
311 rules_total_string=list(set(rules_total_string.split(" "))) | 307 rules_total_string=list(set(rules_total_string.split(" "))) |
308 | |
309 if any(dataset.index.duplicated(keep=False)): | |
310 genes_duplicates=orig_gene_list[dataset.index.duplicated(keep=False)] | |
311 genes_duplicates_in_model=[elem for elem in genes_duplicates if elem in rules_total_string] | |
312 if len(genes_duplicates_in_model)>0:#metabolic genes have duplicated entries in the dataset | |
313 list_str=", ".join(genes_duplicates_in_model) | |
314 raise ValueError(f"ERROR: Duplicate entries in the gene dataset present in one or more GPR. The following metabolic genes are duplicated: "+list_str) | |
312 | 315 |
313 #check if nan value must be ignored in the GPR | 316 #check if nan value must be ignored in the GPR |
314 if ARGS.none: | 317 if ARGS.none: |
315 # #e.g. (A or nan --> A) | 318 # #e.g. (A or nan --> A) |
316 ignore_nan = True | 319 ignore_nan = True |