Mercurial > repos > bimib > marea
diff Marea/marea_cluster.py @ 34:1a97d1537623 draft
Lot of bug fixes
author | bimib |
---|---|
date | Sat, 26 Oct 2019 07:49:31 -0400 |
parents | abf0bfe01c78 |
children | 94c51690d40c |
line wrap: on
line diff
--- a/Marea/marea_cluster.py Wed Oct 16 16:25:56 2019 -0400 +++ b/Marea/marea_cluster.py Sat Oct 26 07:49:31 2019 -0400 @@ -208,12 +208,7 @@ classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str) classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class']) - - if davies: - with np.errstate(divide='ignore', invalid='ignore'): - davies_bouldin = davies_bouldin_score(dataset, all_labels[i]) - warning("\nFor n_clusters = " + str(i + k_min) + - " The average davies bouldin score is: " + str(davies_bouldin)) + if silhouette: @@ -329,8 +324,6 @@ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) - ##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL - labels = labels predict = [x+1 for x in labels] classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str) @@ -339,7 +332,7 @@ ########################## hierachical ####################################### -def hierachical_agglomerative(dataset, k_min, k_max, best_cluster): +def hierachical_agglomerative(dataset, k_min, k_max, best_cluster, silhouette): if not os.path.exists('clustering'): os.makedirs('clustering') @@ -354,18 +347,22 @@ scores = [] labels = [] + for n_clusters in range_n_clusters: cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward') cluster.fit_predict(dataset) cluster_labels = cluster.labels_ labels.append(cluster_labels) - silhouette_avg = silhouette_score(dataset, cluster_labels) write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv') - scores.append(silhouette_avg) - #warning("For n_clusters =", n_clusters, - #"The average silhouette_score is :", silhouette_avg) best = max_index(scores) + k_min + + for i in range(len(labels)): + prefix = '' + if (i + k_min == best): + prefix = '_BEST' + if silhouette == 'true': + silihouette_draw(dataset, labels[i], i + k_min, 'clustering/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png') for i in range(len(labels)): if (i + k_min == best): @@ -373,11 +370,7 @@ predict = [x+1 for x in labels] classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str) classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class']) - - - - - + ############################# main ########################################### @@ -408,7 +401,7 @@ dbscan(X, args.eps, args.min_samples, args.best_cluster) if args.cluster_type == 'hierarchy': - hierachical_agglomerative(X, args.k_min, args.k_max, args.best_cluster) + hierachical_agglomerative(X, args.k_min, args.k_max, args.best_cluster, args.silhouette) ##############################################################################