changeset 42:b3f9e13bf15f draft

Uploaded
author bimib
date Tue, 03 Dec 2019 12:34:30 -0500
parents 9e02d127887a
children 89047227bedc
files Marea/marea_cluster.py Marea/marea_cluster.xml
diffstat 2 files changed, 14 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/Marea/marea_cluster.py	Mon Nov 25 12:05:33 2019 -0500
+++ b/Marea/marea_cluster.py	Tue Dec 03 12:34:30 2019 -0500
@@ -9,7 +9,7 @@
 import os
 from sklearn.datasets import make_blobs
 from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
-from sklearn.metrics import silhouette_samples, silhouette_score, davies_bouldin_score, cluster
+from sklearn.metrics import silhouette_samples, silhouette_score, cluster
 import matplotlib
 matplotlib.use('agg')
 import matplotlib.pyplot as plt
@@ -328,7 +328,7 @@
         os.makedirs('clustering')
     
     plt.figure(figsize=(10, 7))  
-    plt.title("Classes Dendogram")  
+    plt.title("Customer Dendograms")  
     shc.dendrogram(shc.linkage(dataset, method='ward'), labels=dataset.index.values.tolist())  
     fig = plt.gcf()
     fig.savefig('clustering/dendogram.png', dpi=200)
@@ -338,13 +338,15 @@
     scores = []
     labels = []
     
-    for n_clusters in range_n_clusters:    
+    n_classi = dataset.shape[0]
+    
+    for n_clusters in range_n_clusters:  
         cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward')  
         cluster.fit_predict(dataset)  
         cluster_labels = cluster.labels_
         labels.append(cluster_labels)
         write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv')
-              
+        
     best = max_index(scores) + k_min
     
     for i in range(len(labels)):
@@ -382,6 +384,13 @@
         tmp = X[i][0]
         if tmp == None:
             X = X.drop(columns=[i])
+            
+    if args.k_max != None:
+       numero_classi = X.shape[0]
+       while args.k_max >= numero_classi:
+          err = 'Skipping k = ' + str(args.k_max) + ' since it is >= number of classes of dataset'
+          warning(err)
+          args.k_max = args.k_max - 1
     
     
     if args.cluster_type == 'kmeans':
--- a/Marea/marea_cluster.xml	Mon Nov 25 12:05:33 2019 -0500
+++ b/Marea/marea_cluster.xml	Tue Dec 03 12:34:30 2019 -0500
@@ -1,4 +1,4 @@
-<tool id="MaREA_cluester" name="Cluster Analysis" version="1.1.1">
+<tool id="MaREA_cluester" name="Cluster Analysis" version="1.1.2">
     <description></description>
     <macros>
         <import>marea_macros.xml</import>