Mercurial > repos > bimib > marea
comparison Marea/marea_cluster.py @ 37:2495c7772ca8 draft
Uploaded
| author | bimib |
|---|---|
| date | Mon, 25 Nov 2019 11:57:57 -0500 |
| parents | 94c51690d40c |
| children | 2a082b4aed02 |
comparison
equal
deleted
inserted
replaced
| 36:94c51690d40c | 37:2495c7772ca8 |
|---|---|
| 32 type = str, | 32 type = str, |
| 33 help = 'input dataset') | 33 help = 'input dataset') |
| 34 | 34 |
| 35 parser.add_argument('-cy', '--cluster_type', | 35 parser.add_argument('-cy', '--cluster_type', |
| 36 type = str, | 36 type = str, |
| 37 choices = ['kmeans', 'meanshift', 'dbscan', 'hierarchy'], | 37 choices = ['kmeans', 'dbscan', 'hierarchy'], |
| 38 default = 'kmeans', | 38 default = 'kmeans', |
| 39 help = 'choose clustering algorythm') | 39 help = 'choose clustering algorythm') |
| 40 | 40 |
| 41 parser.add_argument('-k1', '--k_min', | 41 parser.add_argument('-k1', '--k_min', |
| 42 type = int, | 42 type = int, |
| 57 parser.add_argument('-si', '--silhouette', | 57 parser.add_argument('-si', '--silhouette', |
| 58 type = str, | 58 type = str, |
| 59 default = 'false', | 59 default = 'false', |
| 60 choices = ['true', 'false'], | 60 choices = ['true', 'false'], |
| 61 help = 'choose if you want silhouette plots') | 61 help = 'choose if you want silhouette plots') |
| 62 | |
| 63 parser.add_argument('-db', '--davies', | |
| 64 type = str, | |
| 65 default = 'false', | |
| 66 choices = ['true', 'false'], | |
| 67 help = 'choose if you want davies bouldin scores') | |
| 68 | 62 |
| 69 parser.add_argument('-td', '--tool_dir', | 63 parser.add_argument('-td', '--tool_dir', |
| 70 type = str, | 64 type = str, |
| 71 required = True, | 65 required = True, |
| 72 help = 'your tool directory') | 66 help = 'your tool directory') |
| 150 | 144 |
| 151 return best_index | 145 return best_index |
| 152 | 146 |
| 153 ################################ kmeans ##################################### | 147 ################################ kmeans ##################################### |
| 154 | 148 |
| 155 def kmeans (k_min, k_max, dataset, elbow, silhouette, davies, best_cluster): | 149 def kmeans (k_min, k_max, dataset, elbow, silhouette, best_cluster): |
| 156 if not os.path.exists('clustering'): | 150 if not os.path.exists('clustering'): |
| 157 os.makedirs('clustering') | 151 os.makedirs('clustering') |
| 158 | 152 |
| 159 | 153 |
| 160 if elbow == 'true': | 154 if elbow == 'true': |
| 165 if silhouette == 'true': | 159 if silhouette == 'true': |
| 166 silhouette = True | 160 silhouette = True |
| 167 else: | 161 else: |
| 168 silhouette = False | 162 silhouette = False |
| 169 | 163 |
| 170 if davies == 'true': | |
| 171 davies = True | |
| 172 else: | |
| 173 davies = False | |
| 174 | |
| 175 | |
| 176 range_n_clusters = [i for i in range(k_min, k_max+1)] | 164 range_n_clusters = [i for i in range(k_min, k_max+1)] |
| 177 distortions = [] | 165 distortions = [] |
| 178 scores = [] | 166 scores = [] |
| 179 all_labels = [] | 167 all_labels = [] |
| 180 | 168 |
| 339 if not os.path.exists('clustering'): | 327 if not os.path.exists('clustering'): |
| 340 os.makedirs('clustering') | 328 os.makedirs('clustering') |
| 341 | 329 |
| 342 plt.figure(figsize=(10, 7)) | 330 plt.figure(figsize=(10, 7)) |
| 343 plt.title("Customer Dendograms") | 331 plt.title("Customer Dendograms") |
| 344 shc.dendrogram(shc.linkage(dataset, method='ward')) | 332 shc.dendrogram(shc.linkage(dataset, method='ward'), labels=dataset.index.values.tolist()) |
| 345 fig = plt.gcf() | 333 fig = plt.gcf() |
| 346 fig.savefig('clustering/dendogram.png', dpi=200) | 334 fig.savefig('clustering/dendogram.png', dpi=200) |
| 347 | 335 |
| 348 range_n_clusters = [i for i in range(k_min, k_max+1)] | 336 range_n_clusters = [i for i in range(k_min, k_max+1)] |
| 349 | 337 |
| 395 if tmp == None: | 383 if tmp == None: |
| 396 X = X.drop(columns=[i]) | 384 X = X.drop(columns=[i]) |
| 397 | 385 |
| 398 | 386 |
| 399 if args.cluster_type == 'kmeans': | 387 if args.cluster_type == 'kmeans': |
| 400 kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies, args.best_cluster) | 388 kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.best_cluster) |
| 401 | 389 |
| 402 if args.cluster_type == 'dbscan': | 390 if args.cluster_type == 'dbscan': |
| 403 dbscan(X, args.eps, args.min_samples, args.best_cluster) | 391 dbscan(X, args.eps, args.min_samples, args.best_cluster) |
| 404 | 392 |
| 405 if args.cluster_type == 'hierarchy': | 393 if args.cluster_type == 'hierarchy': |
