comparison Marea/marea_cluster.py @ 10:2405255d6a09 draft

Uploaded
author bimib
date Wed, 13 Feb 2019 04:46:14 -0500
parents 7c76e8e319c2
children 3d77287caf22
comparison
equal deleted inserted replaced
9:7c76e8e319c2 10:2405255d6a09
538 ################################# clustering ################################## 538 ################################# clustering ##################################
539 539
540 def f_cluster(resolve_rules): 540 def f_cluster(resolve_rules):
541 os.makedirs('cluster_out') 541 os.makedirs('cluster_out')
542 args = process_args(sys.argv) 542 args = process_args(sys.argv)
543 k_min = args.k_min
544 k_max = args.k_max
545 if k_min > k_max:
546 warning('k range boundaries inverted.\n')
547 tmp = k_min
548 k_min = k_max
549 k_max = tmp
550 else:
551 warning('k range correct.\n')
543 cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = 'index') 552 cluster_data = pd.DataFrame.from_dict(resolve_rules, orient = 'index')
544 for i in cluster_data.columns: 553 for i in cluster_data.columns:
545 tmp = cluster_data[i][0] 554 tmp = cluster_data[i][0]
546 if tmp == None: 555 if tmp == None:
547 cluster_data = cluster_data.drop(columns=[i]) 556 cluster_data = cluster_data.drop(columns=[i])
548 distorsion = [] 557 distorsion = []
549 for i in range(args.k_min, args.k_max+1): 558 for i in range(k_min, k_max+1):
550 tmp_kmeans = KMeans(n_clusters = i, 559 tmp_kmeans = KMeans(n_clusters = i,
551 n_init = 100, 560 n_init = 100,
552 max_iter = 300, 561 max_iter = 300,
553 random_state = 0).fit(cluster_data) 562 random_state = 0).fit(cluster_data)
554 distorsion.append(tmp_kmeans.inertia_) 563 distorsion.append(tmp_kmeans.inertia_)
557 classe = (pd.DataFrame(list(zip(cluster_data.index, predict)))).astype(str) 566 classe = (pd.DataFrame(list(zip(cluster_data.index, predict)))).astype(str)
558 dest = 'cluster_out/K=' + str(i) + '_' + args.name+'.tsv' 567 dest = 'cluster_out/K=' + str(i) + '_' + args.name+'.tsv'
559 classe.to_csv(dest, sep = '\t', index = False, 568 classe.to_csv(dest, sep = '\t', index = False,
560 header = ['Patient_ID', 'Class']) 569 header = ['Patient_ID', 'Class'])
561 plt.figure(0) 570 plt.figure(0)
562 plt.plot(range(args.k_min, args.k_max+1), distorsion, marker = 'o') 571 plt.plot(range(k_min, k_max+1), distorsion, marker = 'o')
563 plt.xlabel('Number of cluster') 572 plt.xlabel('Number of cluster')
564 plt.ylabel('Distorsion') 573 plt.ylabel('Distorsion')
565 plt.savefig(args.elbow, dpi = 240, format = 'pdf') 574 plt.savefig(args.elbow, dpi = 240, format = 'pdf')
566 if args.cond_hier == 'yes': 575 if args.cond_hier == 'yes':
567 import scipy.cluster.hierarchy as hier 576 import scipy.cluster.hierarchy as hier
574 583
575 ################################# main ######################################## 584 ################################# main ########################################
576 585
577 def main(): 586 def main():
578 args = process_args(sys.argv) 587 args = process_args(sys.argv)
579 if args.k_min > args.k_max:
580 warning('k range boundaries inverted.')
581 tmp = args.k_min
582 args.k_min = args.k_max
583 args.k_max = tmp
584 else:
585 warning('k range correct.')
586 if args.rules_selector == 'HMRcore': 588 if args.rules_selector == 'HMRcore':
587 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb')) 589 recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
588 elif args.rules_selector == 'Recon': 590 elif args.rules_selector == 'Recon':
589 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb')) 591 recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))
590 elif args.rules_selector == 'Custom': 592 elif args.rules_selector == 'Custom':