Mercurial > repos > bimib > marea
comparison Marea/marea_cluster.py @ 0:23ac9cf12788 draft
Uploaded
| author | bimib |
|---|---|
| date | Tue, 06 Nov 2018 03:16:21 -0500 |
| parents | |
| children | 5721182715a7 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:23ac9cf12788 |
|---|---|
| 1 | |
| 2 from __future__ import division | |
| 3 import os | |
| 4 import sys | |
| 5 import pandas as pd | |
| 6 import collections | |
| 7 import pickle as pk | |
| 8 import argparse | |
| 9 from sklearn.cluster import KMeans | |
| 10 import matplotlib.pyplot as plt | |
| 11 | |
| 12 ########################## argparse ########################################### | |
| 13 | |
| 14 def process_args(args): | |
| 15 parser = argparse.ArgumentParser(usage = '%(prog)s [options]', | |
| 16 description = 'process some value\'s' + | |
| 17 ' genes to create class.') | |
| 18 parser.add_argument('-rs', '--rules_selector', | |
| 19 type = str, | |
| 20 default = 'HMRcore', | |
| 21 choices = ['HMRcore', 'Recon', 'Custom'], | |
| 22 help = 'chose which type of dataset you want use') | |
| 23 parser.add_argument('-cr', '--custom', | |
| 24 type = str, | |
| 25 help='your dataset if you want custom rules') | |
| 26 parser.add_argument('-ch', '--cond_hier', | |
| 27 type = str, | |
| 28 default = 'no', | |
| 29 choices = ['no', 'yes'], | |
| 30 help = 'chose if you wanna hierical dendrogram') | |
| 31 parser.add_argument('-lk', '--k_min', | |
| 32 type = int, | |
| 33 help = 'min number of cluster') | |
| 34 parser.add_argument('-uk', '--k_max', | |
| 35 type = int, | |
| 36 help = 'max number of cluster') | |
| 37 parser.add_argument('-li', '--linkage', | |
| 38 type = str, | |
| 39 choices = ['single', 'complete', 'average'], | |
| 40 help='linkage hierarchical cluster') | |
| 41 parser.add_argument('-d', '--data', | |
| 42 type = str, | |
| 43 required = True, | |
| 44 help = 'input dataset') | |
| 45 parser.add_argument('-n', '--none', | |
| 46 type = str, | |
| 47 default = 'true', | |
| 48 choices = ['true', 'false'], | |
| 49 help = 'compute Nan values') | |
| 50 parser.add_argument('-td', '--tool_dir', | |
| 51 type = str, | |
| 52 required = True, | |
| 53 help = 'your tool directory') | |
| 54 parser.add_argument('-na', '--name', | |
| 55 type = str, | |
| 56 help = 'name of dataset') | |
| 57 parser.add_argument('-de', '--dendro', | |
| 58 help = "Dendrogram out") | |
| 59 parser.add_argument('-ol', '--out_log', | |
| 60 help = "Output log") | |
| 61 parser.add_argument('-el', '--elbow', | |
| 62 help = "Out elbow") | |
| 63 args = parser.parse_args() | |
| 64 return args | |
| 65 | |
| 66 ########################### warning ########################################### | |
| 67 | |
| 68 def warning(s): | |
| 69 args = process_args(sys.argv) | |
| 70 with open(args.out_log, 'a') as log: | |
| 71 log.write(s) | |
| 72 | |
| 73 ############################ dataset input #################################### | |
| 74 | |
| 75 def read_dataset(data, name): | |
| 76 try: | |
| 77 dataset = pd.read_csv(data, sep = '\t', header = 0) | |
| 78 except pd.errors.EmptyDataError: | |
| 79 sys.exit('Execution aborted: wrong format of '+name+'\n') | |
| 80 if len(dataset.columns) < 2: | |
| 81 sys.exit('Execution aborted: wrong format of '+name+'\n') | |
| 82 return dataset | |
| 83 | |
| 84 ############################ dataset name ##################################### | |
| 85 | |
| 86 def name_dataset(name_data, count): | |
| 87 if str(name_data) == 'Dataset': | |
| 88 return str(name_data) + '_' + str(count) | |
| 89 else: | |
| 90 return str(name_data) | |
| 91 | |
| 92 ############################ load id e rules ################################## | |
| 93 | |
| 94 def load_id_rules(reactions): | |
| 95 ids, rules = [], [] | |
| 96 for key, value in reactions.items(): | |
| 97 ids.append(key) | |
| 98 rules.append(value) | |
| 99 return (ids, rules) | |
| 100 | |
| 101 ############################ check_methods #################################### | |
| 102 | |
| 103 def gene_type(l, name): | |
| 104 if check_hgnc(l): | |
| 105 return 'hugo_id' | |
| 106 elif check_ensembl(l): | |
| 107 return 'ensembl_gene_id' | |
| 108 elif check_symbol(l): | |
| 109 return 'symbol' | |
| 110 elif check_entrez(l): | |
| 111 return 'entrez_id' | |
| 112 else: | |
| 113 sys.exit('Execution aborted:\n' + | |
| 114 'gene ID type in ' + name + ' not supported. Supported ID' + | |
| 115 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n') | |
| 116 | |
| 117 def check_hgnc(l): | |
| 118 if len(l) > 5: | |
| 119 if (l.upper()).startswith('HGNC:'): | |
| 120 return l[5:].isdigit() | |
| 121 else: | |
| 122 return False | |
| 123 else: | |
| 124 return False | |
| 125 | |
| 126 def check_ensembl(l): | |
| 127 if len(l) == 15: | |
| 128 if (l.upper()).startswith('ENS'): | |
| 129 return l[4:].isdigit() | |
| 130 else: | |
| 131 return False | |
| 132 else: | |
| 133 return False | |
| 134 | |
| 135 def check_symbol(l): | |
| 136 if len(l) > 0: | |
| 137 if l[0].isalpha() and l[1:].isalnum(): | |
| 138 return True | |
| 139 else: | |
| 140 return False | |
| 141 else: | |
| 142 return False | |
| 143 | |
| 144 def check_entrez(l): | |
| 145 if len(l) > 0: | |
| 146 return l.isdigit() | |
| 147 else: | |
| 148 return False | |
| 149 | |
| 150 def check_bool(b): | |
| 151 if b == 'true': | |
| 152 return True | |
| 153 elif b == 'false': | |
| 154 return False | |
| 155 | |
| 156 ############################ make recon ####################################### | |
| 157 | |
| 158 def check_and_doWord(l): | |
| 159 tmp = [] | |
| 160 tmp_genes = [] | |
| 161 count = 0 | |
| 162 while l: | |
| 163 if count >= 0: | |
| 164 if l[0] == '(': | |
| 165 count += 1 | |
| 166 tmp.append(l[0]) | |
| 167 l.pop(0) | |
| 168 elif l[0] == ')': | |
| 169 count -= 1 | |
| 170 tmp.append(l[0]) | |
| 171 l.pop(0) | |
| 172 elif l[0] == ' ': | |
| 173 l.pop(0) | |
| 174 else: | |
| 175 word = [] | |
| 176 while l: | |
| 177 if l[0] in [' ', '(', ')']: | |
| 178 break | |
| 179 else: | |
| 180 word.append(l[0]) | |
| 181 l.pop(0) | |
| 182 word = ''.join(word) | |
| 183 tmp.append(word) | |
| 184 if not(word in ['or', 'and']): | |
| 185 tmp_genes.append(word) | |
| 186 else: | |
| 187 return False | |
| 188 if count == 0: | |
| 189 return (tmp, tmp_genes) | |
| 190 else: | |
| 191 return False | |
| 192 | |
| 193 def brackets_to_list(l): | |
| 194 tmp = [] | |
| 195 while l: | |
| 196 if l[0] == '(': | |
| 197 l.pop(0) | |
| 198 tmp.append(resolve_brackets(l)) | |
| 199 else: | |
| 200 tmp.append(l[0]) | |
| 201 l.pop(0) | |
| 202 return tmp | |
| 203 | |
| 204 def resolve_brackets(l): | |
| 205 tmp = [] | |
| 206 while l[0] != ')': | |
| 207 if l[0] == '(': | |
| 208 l.pop(0) | |
| 209 tmp.append(resolve_brackets(l)) | |
| 210 else: | |
| 211 tmp.append(l[0]) | |
| 212 l.pop(0) | |
| 213 l.pop(0) | |
| 214 return tmp | |
| 215 | |
| 216 def priorityAND(l): | |
| 217 tmp = [] | |
| 218 flag = True | |
| 219 while l: | |
| 220 if len(l) == 1: | |
| 221 if isinstance(l[0], list): | |
| 222 tmp.append(priorityAND(l[0])) | |
| 223 else: | |
| 224 tmp.append(l[0]) | |
| 225 l = l[1:] | |
| 226 elif l[0] == 'or': | |
| 227 tmp.append(l[0]) | |
| 228 flag = False | |
| 229 l = l[1:] | |
| 230 elif l[1] == 'or': | |
| 231 if isinstance(l[0], list): | |
| 232 tmp.append(priorityAND(l[0])) | |
| 233 else: | |
| 234 tmp.append(l[0]) | |
| 235 tmp.append(l[1]) | |
| 236 flag = False | |
| 237 l = l[2:] | |
| 238 elif l[1] == 'and': | |
| 239 tmpAnd = [] | |
| 240 if isinstance(l[0], list): | |
| 241 tmpAnd.append(priorityAND(l[0])) | |
| 242 else: | |
| 243 tmpAnd.append(l[0]) | |
| 244 tmpAnd.append(l[1]) | |
| 245 if isinstance(l[2], list): | |
| 246 tmpAnd.append(priorityAND(l[2])) | |
| 247 else: | |
| 248 tmpAnd.append(l[2]) | |
| 249 l = l[3:] | |
| 250 while l: | |
| 251 if l[0] == 'and': | |
| 252 tmpAnd.append(l[0]) | |
| 253 if isinstance(l[1], list): | |
| 254 tmpAnd.append(priorityAND(l[1])) | |
| 255 else: | |
| 256 tmpAnd.append(l[1]) | |
| 257 l = l[2:] | |
| 258 elif l[0] == 'or': | |
| 259 flag = False | |
| 260 break | |
| 261 if flag == True: #se ci sono solo AND nella lista | |
| 262 tmp.extend(tmpAnd) | |
| 263 elif flag == False: | |
| 264 tmp.append(tmpAnd) | |
| 265 return tmp | |
| 266 | |
| 267 def checkRule(l): | |
| 268 if len(l) == 1: | |
| 269 if isinstance(l[0], list): | |
| 270 if checkRule(l[0]) is False: | |
| 271 return False | |
| 272 elif len(l) > 2: | |
| 273 if checkRule2(l) is False: | |
| 274 return False | |
| 275 else: | |
| 276 return False | |
| 277 return True | |
| 278 | |
| 279 def checkRule2(l): | |
| 280 while l: | |
| 281 if len(l) == 1: | |
| 282 return False | |
| 283 elif isinstance(l[0], list) and l[1] in ['and', 'or']: | |
| 284 if checkRule(l[0]) is False: | |
| 285 return False | |
| 286 if isinstance(l[2], list): | |
| 287 if checkRule(l[2]) is False: | |
| 288 return False | |
| 289 l = l[3:] | |
| 290 elif l[1] in ['and', 'or']: | |
| 291 if isinstance(l[2], list): | |
| 292 if checkRule(l[2]) is False: | |
| 293 return False | |
| 294 l = l[3:] | |
| 295 elif l[0] in ['and', 'or']: | |
| 296 if isinstance(l[1], list): | |
| 297 if checkRule(l[1]) is False: | |
| 298 return False | |
| 299 l = l[2:] | |
| 300 else: | |
| 301 return False | |
| 302 return True | |
| 303 | |
| 304 def do_rules(rules): | |
| 305 split_rules = [] | |
| 306 err_rules = [] | |
| 307 tmp_gene_in_rule = [] | |
| 308 for i in range(len(rules)): | |
| 309 tmp = list(rules[i]) | |
| 310 if tmp: | |
| 311 tmp, tmp_genes = check_and_doWord(tmp) | |
| 312 tmp_gene_in_rule.extend(tmp_genes) | |
| 313 if tmp is False: | |
| 314 split_rules.append([]) | |
| 315 err_rules.append(rules[i]) | |
| 316 else: | |
| 317 tmp = brackets_to_list(tmp) | |
| 318 if checkRule(tmp): | |
| 319 split_rules.append(priorityAND(tmp)) | |
| 320 else: | |
| 321 split_rules.append([]) | |
| 322 err_rules.append(rules[i]) | |
| 323 else: | |
| 324 split_rules.append([]) | |
| 325 if err_rules: | |
| 326 warning('Warning: wrong format rule in ' + str(err_rules) + '\n') | |
| 327 return (split_rules, list(set(tmp_gene_in_rule))) | |
| 328 | |
| 329 def make_recon(data): | |
| 330 try: | |
| 331 import cobra as cb | |
| 332 import warnings | |
| 333 with warnings.catch_warnings(): | |
| 334 warnings.simplefilter('ignore') | |
| 335 recon = cb.io.read_sbml_model(data) | |
| 336 react = recon.reactions | |
| 337 rules = [react[i].gene_reaction_rule for i in range(len(react))] | |
| 338 ids = [react[i].id for i in range(len(react))] | |
| 339 except cb.io.sbml3.CobraSBMLError: | |
| 340 try: | |
| 341 data = (pd.read_csv(data, sep = '\t', dtype = str)).fillna('') | |
| 342 if len(data.columns) < 2: | |
| 343 sys.exit('Execution aborted: wrong format of ' + | |
| 344 'custom GPR rules\n') | |
| 345 if not len(data.columns) == 2: | |
| 346 warning('WARNING: more than 2 columns in custom GPR rules.\n' + | |
| 347 'Extra columns have been disregarded\n') | |
| 348 ids = list(data.iloc[:, 0]) | |
| 349 rules = list(data.iloc[:, 1]) | |
| 350 except pd.errors.EmptyDataError: | |
| 351 sys.exit('Execution aborted: wrong format of custom GPR rules\n') | |
| 352 except pd.errors.ParserError: | |
| 353 sys.exit('Execution aborted: wrong format of custom GPR rules\n') | |
| 354 split_rules, tmp_genes = do_rules(rules) | |
| 355 gene_in_rule = {} | |
| 356 for i in tmp_genes: | |
| 357 gene_in_rule[i] = 'ok' | |
| 358 return (ids, split_rules, gene_in_rule) | |
| 359 | |
| 360 ############################ resolve_methods ################################## | |
| 361 | |
| 362 def replace_gene_value(l, d): | |
| 363 tmp = [] | |
| 364 err = [] | |
| 365 while l: | |
| 366 if isinstance(l[0], list): | |
| 367 tmp_rules, tmp_err = replace_gene_value(l[0], d) | |
| 368 tmp.append(tmp_rules) | |
| 369 err.extend(tmp_err) | |
| 370 else: | |
| 371 value = replace_gene(l[0],d) | |
| 372 tmp.append(value) | |
| 373 if value == None: | |
| 374 err.append(l[0]) | |
| 375 l = l[1:] | |
| 376 return (tmp, err) | |
| 377 | |
| 378 def replace_gene(l, d): | |
| 379 if l =='and' or l == 'or': | |
| 380 return l | |
| 381 else: | |
| 382 value = d.get(l, None) | |
| 383 if not(value == None or isinstance(value, (int, float))): | |
| 384 sys.exit('Execution aborted: ' + value + ' value not valid\n') | |
| 385 return value | |
| 386 | |
| 387 def compute(val1, op, val2, cn): | |
| 388 if val1 != None and val2 != None: | |
| 389 if op == 'and': | |
| 390 return min(val1, val2) | |
| 391 else: | |
| 392 return val1 + val2 | |
| 393 elif op == 'and': | |
| 394 if cn is True: | |
| 395 if val1 != None: | |
| 396 return val1 | |
| 397 elif val2 != None: | |
| 398 return val2 | |
| 399 else: | |
| 400 return None | |
| 401 else: | |
| 402 return None | |
| 403 else: | |
| 404 if val1 != None: | |
| 405 return val1 | |
| 406 elif val2 != None: | |
| 407 return val2 | |
| 408 else: | |
| 409 return None | |
| 410 | |
| 411 def control(ris, l, cn): | |
| 412 if len(l) == 1: | |
| 413 if isinstance(l[0], (float, int)) or l[0] == None: | |
| 414 return l[0] | |
| 415 elif isinstance(l[0], list): | |
| 416 return control(None, l[0], cn) | |
| 417 else: | |
| 418 return False | |
| 419 elif len(l) > 2: | |
| 420 return control_list(ris, l, cn) | |
| 421 else: | |
| 422 return False | |
| 423 | |
| 424 def control_list(ris, l, cn): | |
| 425 while l: | |
| 426 if len(l) == 1: | |
| 427 return False | |
| 428 elif (isinstance(l[0], (float, int)) or | |
| 429 l[0] == None) and l[1] in ['and', 'or']: | |
| 430 if isinstance(l[2], (float, int)) or l[2] == None: | |
| 431 ris = compute(l[0], l[1], l[2], cn) | |
| 432 elif isinstance(l[2], list): | |
| 433 tmp = control(None, l[2], cn) | |
| 434 if tmp is False: | |
| 435 return False | |
| 436 else: | |
| 437 ris = compute(l[0], l[1], tmp, cn) | |
| 438 else: | |
| 439 return False | |
| 440 l = l[3:] | |
| 441 elif l[0] in ['and', 'or']: | |
| 442 if isinstance(l[1], (float, int)) or l[1] == None: | |
| 443 ris = compute(ris, l[0], l[1], cn) | |
| 444 elif isinstance(l[1], list): | |
| 445 tmp = control(None,l[1], cn) | |
| 446 if tmp is False: | |
| 447 return False | |
| 448 else: | |
| 449 ris = compute(ris, l[0], tmp, cn) | |
| 450 else: | |
| 451 return False | |
| 452 l = l[2:] | |
| 453 elif isinstance(l[0], list) and l[1] in ['and', 'or']: | |
| 454 if isinstance(l[2], (float, int)) or l[2] == None: | |
| 455 tmp = control(None, l[0], cn) | |
| 456 if tmp is False: | |
| 457 return False | |
| 458 else: | |
| 459 ris = compute(tmp, l[1], l[2], cn) | |
| 460 elif isinstance(l[2], list): | |
| 461 tmp = control(None, l[0], cn) | |
| 462 tmp2 = control(None, l[2], cn) | |
| 463 if tmp is False or tmp2 is False: | |
| 464 return False | |
| 465 else: | |
| 466 ris = compute(tmp, l[1], tmp2, cn) | |
| 467 else: | |
| 468 return False | |
| 469 l = l[3:] | |
| 470 else: | |
| 471 return False | |
| 472 return ris | |
| 473 | |
| 474 ############################ gene ############################################# | |
| 475 | |
| 476 def data_gene(gene, type_gene, name, gene_custom): | |
| 477 args = process_args(sys.argv) | |
| 478 for i in range(len(gene)): | |
| 479 tmp = gene.iloc[i, 0] | |
| 480 if tmp.startswith(' ') or tmp.endswith(' '): | |
| 481 gene.iloc[i, 0] = (tmp.lstrip()).rstrip() | |
| 482 gene_dup = [item for item, count in | |
| 483 collections.Counter(gene[gene.columns[0]]).items() if count > 1] | |
| 484 pat_dup = [item for item, count in | |
| 485 collections.Counter(list(gene.columns)).items() if count > 1] | |
| 486 if gene_dup: | |
| 487 if gene_custom == None: | |
| 488 if args.rules_selector == 'HMRcore': | |
| 489 gene_in_rule = pk.load(open(args.tool_dir + | |
| 490 '/local/HMRcore_genes.p', 'rb')) | |
| 491 elif args.rules_selector == 'Recon': | |
| 492 gene_in_rule = pk.load(open(args.tool_dir + | |
| 493 '/local/Recon_genes.p', 'rb')) | |
| 494 gene_in_rule = gene_in_rule.get(type_gene) | |
| 495 else: | |
| 496 gene_in_rule = gene_custom | |
| 497 tmp = [] | |
| 498 for i in gene_dup: | |
| 499 if gene_in_rule.get(i) == 'ok': | |
| 500 tmp.append(i) | |
| 501 if tmp: | |
| 502 sys.exit('Execution aborted because gene ID ' | |
| 503 + str(tmp) + ' in ' + name + ' is duplicated\n') | |
| 504 if pat_dup: | |
| 505 sys.exit('Execution aborted: duplicated label\n' | |
| 506 + str(pat_dup) + 'in ' + name + '\n') | |
| 507 return (gene.set_index(gene.columns[0])).to_dict() | |
| 508 | |
| 509 ############################ resolve ########################################## | |
| 510 | |
| 511 def resolve(genes, rules, ids, resolve_none, name): | |
| 512 resolve_rules = {} | |
| 513 not_found = [] | |
| 514 flag = False | |
| 515 for key, value in genes.items(): | |
| 516 tmp_resolve = [] | |
| 517 for i in range(len(rules)): | |
| 518 tmp = rules[i] | |
| 519 if tmp: | |
| 520 tmp, err = replace_gene_value(tmp, value) | |
| 521 if err: | |
| 522 not_found.extend(err) | |
| 523 ris = control(None, tmp, resolve_none) | |
| 524 if ris is False or ris == None: | |
| 525 tmp_resolve.append(None) | |
| 526 else: | |
| 527 tmp_resolve.append(ris) | |
| 528 flag = True | |
| 529 else: | |
| 530 tmp_resolve.append(None) | |
| 531 resolve_rules[key] = tmp_resolve | |
| 532 if flag is False: | |
| 533 sys.exit('Execution aborted: no computable score' + | |
| 534 ' (due to missing gene values) for class ' | |
| 535 + name + ', the class has been disregarded\n') | |
| 536 return (resolve_rules, list(set(not_found))) | |
| 537 | |
| 538 ################################# clustering ################################## | |
| 539 | |
| 540 def f_cluster(resolve_rules): | |
| 541 os.makedirs('cluster_out') | |
| 542 args = process_args(sys.argv) | |
| 543 cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = 'index') | |
| 544 for i in cluster_data.columns: | |
| 545 tmp = cluster_data[i][0] | |
| 546 if tmp == None: | |
| 547 cluster_data = cluster_data.drop(columns=[i]) | |
| 548 distorsion = [] | |
| 549 for i in range(args.k_min, args.k_max+1): | |
| 550 tmp_kmeans = KMeans(n_clusters = i, | |
| 551 n_init = 100, | |
| 552 max_iter = 300, | |
| 553 random_state = 0).fit(cluster_data) | |
| 554 distorsion.append(tmp_kmeans.inertia_) | |
| 555 predict = tmp_kmeans.predict(cluster_data) | |
| 556 predict = [x+1 for x in predict] | |
| 557 classe = (pd.DataFrame(zip(cluster_data.index, predict))).astype(str) | |
| 558 dest = 'cluster_out/K=' + str(i) + '_' + args.name+'.tsv' | |
| 559 classe.to_csv(dest, sep = '\t', index = False, | |
| 560 header = ['Patient_ID', 'Class']) | |
| 561 plt.figure(0) | |
| 562 plt.plot(range(args.k_min, args.k_max+1), distorsion, marker = 'o') | |
| 563 plt.xlabel('Number of cluster') | |
| 564 plt.ylabel('Distorsion') | |
| 565 plt.savefig(args.elbow, dpi = 240, format = 'pdf') | |
| 566 if args.cond_hier == 'yes': | |
| 567 import scipy.cluster.hierarchy as hier | |
| 568 lin = hier.linkage(cluster_data, args.linkage) | |
| 569 plt.figure(1) | |
| 570 plt.figure(figsize=(10, 5)) | |
| 571 hier.dendrogram(lin, leaf_font_size = 2, labels = cluster_data.index) | |
| 572 plt.savefig(args.dendro, dpi = 480, format = 'pdf') | |
| 573 return None | |
| 574 | |
| 575 ################################# main ######################################## | |
| 576 | |
| 577 def main(): | |
| 578 args = process_args(sys.argv) | |
| 579 if args.k_min > args.k_max: | |
| 580 sys.exit('Execution aborted: max cluster > min cluster') | |
| 581 if args.rules_selector == 'HMRcore': | |
| 582 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) | |
| 583 elif args.rules_selector == 'Recon': | |
| 584 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) | |
| 585 elif args.rules_selector == 'Custom': | |
| 586 ids, rules, gene_in_rule = make_recon(args.custom) | |
| 587 resolve_none = check_bool(args.none) | |
| 588 dataset = read_dataset(args.data, args.name) | |
| 589 dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str) | |
| 590 type_gene = gene_type(dataset.iloc[0, 0], args.name) | |
| 591 if args.rules_selector != 'Custom': | |
| 592 genes = data_gene(dataset, type_gene, args.name, None) | |
| 593 ids, rules = load_id_rules(recon.get(type_gene)) | |
| 594 elif args.rules_selector == 'Custom': | |
| 595 genes = data_gene(dataset, type_gene, args.name, gene_in_rule) | |
| 596 resolve_rules, err = resolve(genes, rules, ids, resolve_none, args.name) | |
| 597 if err: | |
| 598 warning('WARNING: gene\n' + str(err) + '\nnot found in class ' | |
| 599 + args.name + ', the expression level for this gene ' + | |
| 600 'will be considered NaN\n') | |
| 601 f_cluster(resolve_rules) | |
| 602 warning('Execution succeeded') | |
| 603 return None | |
| 604 | |
| 605 ############################################################################### | |
| 606 | |
| 607 if __name__ == "__main__": | |
| 608 main() |
