diff Marea/marea_cluster.py @ 34:1a97d1537623 draft

Lot of bug fixes
author bimib
date Sat, 26 Oct 2019 07:49:31 -0400
parents abf0bfe01c78
children 94c51690d40c
line wrap: on
line diff
--- a/Marea/marea_cluster.py	Wed Oct 16 16:25:56 2019 -0400
+++ b/Marea/marea_cluster.py	Sat Oct 26 07:49:31 2019 -0400
@@ -208,12 +208,7 @@
             classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
             classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
             
-            
-        if davies:
-            with np.errstate(divide='ignore', invalid='ignore'):
-                davies_bouldin = davies_bouldin_score(dataset, all_labels[i])
-            warning("\nFor n_clusters = " + str(i + k_min) +
-                  " The average davies bouldin score is: " + str(davies_bouldin))
+          
         
        
         if silhouette:
@@ -329,8 +324,6 @@
     n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
     
     
-    ##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL
-    
     labels = labels
     predict = [x+1 for x in labels]
     classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
@@ -339,7 +332,7 @@
     
 ########################## hierachical #######################################
     
-def hierachical_agglomerative(dataset, k_min, k_max, best_cluster):
+def hierachical_agglomerative(dataset, k_min, k_max, best_cluster, silhouette):
 
     if not os.path.exists('clustering'):
         os.makedirs('clustering')
@@ -354,18 +347,22 @@
 
     scores = []
     labels = []
+    
     for n_clusters in range_n_clusters:    
         cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward')  
         cluster.fit_predict(dataset)  
         cluster_labels = cluster.labels_
         labels.append(cluster_labels)
-        silhouette_avg = silhouette_score(dataset, cluster_labels)
         write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv')
-        scores.append(silhouette_avg)
-        #warning("For n_clusters =", n_clusters,
-              #"The average silhouette_score is :", silhouette_avg)
               
     best = max_index(scores) + k_min
+    
+    for i in range(len(labels)):
+        prefix = ''
+        if (i + k_min == best):
+            prefix = '_BEST'
+        if silhouette == 'true':
+            silihouette_draw(dataset, labels[i], i + k_min, 'clustering/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png')
      
     for i in range(len(labels)):
         if (i + k_min == best):
@@ -373,11 +370,7 @@
             predict = [x+1 for x in labels]
             classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
             classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
-     
-        
-        
-       
-
+            
     
 ############################# main ###########################################
 
@@ -408,7 +401,7 @@
         dbscan(X, args.eps, args.min_samples, args.best_cluster)
         
     if args.cluster_type == 'hierarchy':
-        hierachical_agglomerative(X, args.k_min, args.k_max, args.best_cluster)
+        hierachical_agglomerative(X, args.k_min, args.k_max, args.best_cluster, args.silhouette)
         
 ##############################################################################