| 
16
 | 
     1 # -*- coding: utf-8 -*-
 | 
| 
 | 
     2 """
 | 
| 
 | 
     3 Created on Mon Jun 3 19:51:00 2019
 | 
| 
 | 
     4 @author: Narger
 | 
| 
 | 
     5 """
 | 
| 
 | 
     6 
 | 
| 
0
 | 
     7 import sys
 | 
| 
 | 
     8 import argparse
 | 
| 
16
 | 
     9 import os
 | 
| 
 | 
    10 from sklearn.datasets import make_blobs
 | 
| 
 | 
    11 from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
 | 
| 
42
 | 
    12 from sklearn.metrics import silhouette_samples, silhouette_score, cluster
 | 
| 
25
 | 
    13 import matplotlib
 | 
| 
 | 
    14 matplotlib.use('agg')
 | 
| 
0
 | 
    15 import matplotlib.pyplot as plt
 | 
| 
16
 | 
    16 import scipy.cluster.hierarchy as shc   
 | 
| 
 | 
    17 import matplotlib.cm as cm
 | 
| 
 | 
    18 import numpy as np
 | 
| 
 | 
    19 import pandas as pd
 | 
| 
0
 | 
    20 
 | 
| 
16
 | 
    21 ################################# process args ###############################
 | 
| 
0
 | 
    22 
 | 
| 
 | 
    23 def process_args(args):
 | 
| 
 | 
    24     parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
 | 
| 
 | 
    25                                      description = 'process some value\'s' +
 | 
| 
 | 
    26                                      ' genes to create class.')
 | 
| 
16
 | 
    27 
 | 
| 
 | 
    28     parser.add_argument('-ol', '--out_log', 
 | 
| 
 | 
    29                         help = "Output log")
 | 
| 
 | 
    30     
 | 
| 
 | 
    31     parser.add_argument('-in', '--input',
 | 
| 
0
 | 
    32                         type = str,
 | 
| 
16
 | 
    33                         help = 'input dataset')
 | 
| 
 | 
    34     
 | 
| 
 | 
    35     parser.add_argument('-cy', '--cluster_type',
 | 
| 
0
 | 
    36                         type = str,
 | 
| 
37
 | 
    37                         choices = ['kmeans', 'dbscan', 'hierarchy'],
 | 
| 
16
 | 
    38                         default = 'kmeans',
 | 
| 
 | 
    39                         help = 'choose clustering algorythm')
 | 
| 
 | 
    40     
 | 
| 
 | 
    41     parser.add_argument('-k1', '--k_min', 
 | 
| 
 | 
    42                         type = int,
 | 
| 
 | 
    43                         default = 2,
 | 
| 
 | 
    44                         help = 'choose minimun cluster number to be generated')
 | 
| 
 | 
    45     
 | 
| 
 | 
    46     parser.add_argument('-k2', '--k_max', 
 | 
| 
0
 | 
    47                         type = int,
 | 
| 
16
 | 
    48                         default = 7,
 | 
| 
 | 
    49                         help = 'choose maximum cluster number to be generated')
 | 
| 
 | 
    50     
 | 
| 
 | 
    51     parser.add_argument('-el', '--elbow', 
 | 
| 
 | 
    52                         type = str,
 | 
| 
 | 
    53                         default = 'false',
 | 
| 
 | 
    54                         choices = ['true', 'false'],
 | 
| 
 | 
    55                         help = 'choose if you want to generate an elbow plot for kmeans')
 | 
| 
 | 
    56     
 | 
| 
 | 
    57     parser.add_argument('-si', '--silhouette', 
 | 
| 
0
 | 
    58                         type = str,
 | 
| 
16
 | 
    59                         default = 'false',
 | 
| 
 | 
    60                         choices = ['true', 'false'],
 | 
| 
 | 
    61                         help = 'choose if you want silhouette plots')
 | 
| 
 | 
    62     
 | 
| 
0
 | 
    63     parser.add_argument('-td', '--tool_dir',
 | 
| 
 | 
    64                         type = str,
 | 
| 
 | 
    65                         required = True,
 | 
| 
 | 
    66                         help = 'your tool directory')
 | 
| 
16
 | 
    67                         
 | 
| 
 | 
    68     parser.add_argument('-ms', '--min_samples',
 | 
| 
33
 | 
    69                         type = float,
 | 
| 
16
 | 
    70                         help = 'min samples for dbscan (optional)')
 | 
| 
 | 
    71                         
 | 
| 
 | 
    72     parser.add_argument('-ep', '--eps',
 | 
| 
33
 | 
    73                         type = float,
 | 
| 
16
 | 
    74                         help = 'eps for dbscan (optional)')
 | 
| 
28
 | 
    75                         
 | 
| 
 | 
    76     parser.add_argument('-bc', '--best_cluster',
 | 
| 
 | 
    77                         type = str,
 | 
| 
 | 
    78                         help = 'output of best cluster tsv')
 | 
| 
 | 
    79     				
 | 
| 
16
 | 
    80     
 | 
| 
 | 
    81     
 | 
| 
0
 | 
    82     args = parser.parse_args()
 | 
| 
 | 
    83     return args
 | 
| 
 | 
    84 
 | 
| 
 | 
    85 ########################### warning ###########################################
 | 
| 
 | 
    86 
 | 
| 
 | 
    87 def warning(s):
 | 
| 
 | 
    88     args = process_args(sys.argv)
 | 
| 
 | 
    89     with open(args.out_log, 'a') as log:
 | 
| 
16
 | 
    90         log.write(s + "\n\n")
 | 
| 
 | 
    91     print(s)
 | 
| 
0
 | 
    92 
 | 
| 
16
 | 
    93 ########################## read dataset ######################################
 | 
| 
 | 
    94     
 | 
| 
 | 
    95 def read_dataset(dataset):
 | 
| 
0
 | 
    96     try:
 | 
| 
16
 | 
    97         dataset = pd.read_csv(dataset, sep = '\t', header = 0)
 | 
| 
0
 | 
    98     except pd.errors.EmptyDataError:
 | 
| 
16
 | 
    99         sys.exit('Execution aborted: wrong format of dataset\n')
 | 
| 
0
 | 
   100     if len(dataset.columns) < 2:
 | 
| 
16
 | 
   101         sys.exit('Execution aborted: wrong format of dataset\n')
 | 
| 
 | 
   102     return dataset
 | 
| 
 | 
   103 
 | 
| 
 | 
   104 ############################ rewrite_input ###################################
 | 
| 
 | 
   105     
 | 
| 
 | 
   106 def rewrite_input(dataset):
 | 
| 
 | 
   107     #Riscrivo il dataset come dizionario di liste, 
 | 
| 
 | 
   108     #non come dizionario di dizionari
 | 
| 
 | 
   109     
 | 
| 
36
 | 
   110     dataset.pop('Reactions', None)
 | 
| 
 | 
   111     
 | 
| 
16
 | 
   112     for key, val in dataset.items():
 | 
| 
 | 
   113         l = []
 | 
| 
 | 
   114         for i in val:
 | 
| 
 | 
   115             if i == 'None':
 | 
| 
 | 
   116                 l.append(None)
 | 
| 
 | 
   117             else:
 | 
| 
 | 
   118                 l.append(float(i))
 | 
| 
 | 
   119    
 | 
| 
 | 
   120         dataset[key] = l
 | 
| 
 | 
   121     
 | 
| 
0
 | 
   122     return dataset
 | 
| 
 | 
   123 
 | 
| 
16
 | 
   124 ############################## write to csv ##################################
 | 
| 
0
 | 
   125     
 | 
| 
16
 | 
   126 def write_to_csv (dataset, labels, name):
 | 
| 
23
 | 
   127     #labels = predict
 | 
| 
 | 
   128     predict = [x+1 for x in labels]
 | 
| 
 | 
   129   
 | 
| 
 | 
   130     classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
 | 
| 
0
 | 
   131 
 | 
| 
23
 | 
   132     dest = name
 | 
| 
 | 
   133     classe.to_csv(dest, sep = '\t', index = False,
 | 
| 
 | 
   134                       header = ['Patient_ID', 'Class'])
 | 
| 
28
 | 
   135    
 | 
| 
16
 | 
   136 ########################### trova il massimo in lista ########################
 | 
| 
 | 
   137 def max_index (lista):
 | 
| 
 | 
   138     best = -1
 | 
| 
 | 
   139     best_index = 0
 | 
| 
 | 
   140     for i in range(len(lista)):
 | 
| 
 | 
   141         if lista[i] > best:
 | 
| 
 | 
   142             best = lista [i]
 | 
| 
 | 
   143             best_index = i
 | 
| 
 | 
   144             
 | 
| 
 | 
   145     return best_index
 | 
| 
 | 
   146     
 | 
| 
 | 
   147 ################################ kmeans #####################################
 | 
| 
 | 
   148     
 | 
| 
37
 | 
   149 def kmeans (k_min, k_max, dataset, elbow, silhouette, best_cluster):
 | 
| 
23
 | 
   150     if not os.path.exists('clustering'):
 | 
| 
 | 
   151         os.makedirs('clustering')
 | 
| 
16
 | 
   152     
 | 
| 
 | 
   153         
 | 
| 
 | 
   154     if elbow == 'true':
 | 
| 
 | 
   155         elbow = True
 | 
| 
 | 
   156     else:
 | 
| 
 | 
   157         elbow = False
 | 
| 
 | 
   158         
 | 
| 
 | 
   159     if silhouette == 'true':
 | 
| 
 | 
   160         silhouette = True
 | 
| 
 | 
   161     else:
 | 
| 
 | 
   162         silhouette = False
 | 
| 
 | 
   163         
 | 
| 
 | 
   164     range_n_clusters = [i for i in range(k_min, k_max+1)]
 | 
| 
 | 
   165     distortions = []
 | 
| 
 | 
   166     scores = []
 | 
| 
 | 
   167     all_labels = []
 | 
| 
 | 
   168     
 | 
| 
31
 | 
   169     clusterer = KMeans(n_clusters=1, random_state=10)
 | 
| 
 | 
   170     distortions.append(clusterer.fit(dataset).inertia_)
 | 
| 
 | 
   171     
 | 
| 
 | 
   172     
 | 
| 
16
 | 
   173     for n_clusters in range_n_clusters:
 | 
| 
 | 
   174         clusterer = KMeans(n_clusters=n_clusters, random_state=10)
 | 
| 
 | 
   175         cluster_labels = clusterer.fit_predict(dataset)
 | 
| 
 | 
   176         
 | 
| 
 | 
   177         all_labels.append(cluster_labels)
 | 
| 
28
 | 
   178         if n_clusters == 1:
 | 
| 
 | 
   179         	silhouette_avg = 0
 | 
| 
 | 
   180         else:
 | 
| 
 | 
   181             silhouette_avg = silhouette_score(dataset, cluster_labels)
 | 
| 
16
 | 
   182         scores.append(silhouette_avg)
 | 
| 
 | 
   183         distortions.append(clusterer.fit(dataset).inertia_)
 | 
| 
 | 
   184         
 | 
| 
 | 
   185     best = max_index(scores) + k_min
 | 
| 
 | 
   186         
 | 
| 
 | 
   187     for i in range(len(all_labels)):
 | 
| 
 | 
   188         prefix = ''
 | 
| 
 | 
   189         if (i + k_min == best):
 | 
| 
 | 
   190             prefix = '_BEST'
 | 
| 
 | 
   191             
 | 
| 
23
 | 
   192         write_to_csv(dataset, all_labels[i], 'clustering/kmeans_with_' + str(i + k_min) + prefix + '_clusters.tsv')
 | 
| 
28
 | 
   193         
 | 
| 
 | 
   194         
 | 
| 
 | 
   195         if (prefix == '_BEST'):
 | 
| 
 | 
   196             labels = all_labels[i]
 | 
| 
 | 
   197             predict = [x+1 for x in labels]
 | 
| 
 | 
   198             classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
 | 
| 
 | 
   199             classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
 | 
| 
 | 
   200             
 | 
| 
34
 | 
   201           
 | 
| 
16
 | 
   202         
 | 
| 
 | 
   203        
 | 
| 
 | 
   204         if silhouette:
 | 
| 
23
 | 
   205             silihouette_draw(dataset, all_labels[i], i + k_min, 'clustering/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png')
 | 
| 
16
 | 
   206         
 | 
| 
 | 
   207         
 | 
| 
 | 
   208     if elbow:
 | 
| 
 | 
   209         elbow_plot(distortions, k_min,k_max) 
 | 
| 
0
 | 
   210 
 | 
| 
16
 | 
   211    
 | 
| 
 | 
   212     
 | 
| 
 | 
   213     
 | 
| 
0
 | 
   214 
 | 
| 
16
 | 
   215 ############################## elbow_plot ####################################
 | 
| 
 | 
   216     
 | 
| 
 | 
   217 def elbow_plot (distortions, k_min, k_max):
 | 
| 
 | 
   218     plt.figure(0)
 | 
| 
31
 | 
   219     x = list(range(k_min, k_max + 1))
 | 
| 
 | 
   220     x.insert(0, 1)
 | 
| 
 | 
   221     plt.plot(x, distortions, marker = 'o')
 | 
| 
 | 
   222     plt.xlabel('Number of clusters (k)')
 | 
| 
16
 | 
   223     plt.ylabel('Distortion')
 | 
| 
23
 | 
   224     s = 'clustering/elbow_plot.png'
 | 
| 
16
 | 
   225     fig = plt.gcf()
 | 
| 
 | 
   226     fig.set_size_inches(18.5, 10.5, forward = True)
 | 
| 
 | 
   227     fig.savefig(s, dpi=100)
 | 
| 
 | 
   228     
 | 
| 
 | 
   229     
 | 
| 
 | 
   230 ############################## silhouette plot ###############################
 | 
| 
 | 
   231 def silihouette_draw(dataset, labels, n_clusters, path):
 | 
| 
28
 | 
   232     if n_clusters == 1:
 | 
| 
 | 
   233         return None
 | 
| 
 | 
   234         
 | 
| 
16
 | 
   235     silhouette_avg = silhouette_score(dataset, labels)
 | 
| 
 | 
   236     warning("For n_clusters = " + str(n_clusters) +
 | 
| 
 | 
   237           " The average silhouette_score is: " + str(silhouette_avg))
 | 
| 
 | 
   238            
 | 
| 
 | 
   239     plt.close('all')
 | 
| 
 | 
   240     # Create a subplot with 1 row and 2 columns
 | 
| 
 | 
   241     fig, (ax1) = plt.subplots(1, 1)
 | 
| 
 | 
   242     
 | 
| 
 | 
   243     fig.set_size_inches(18, 7)
 | 
| 
 | 
   244         
 | 
| 
 | 
   245     # The 1st subplot is the silhouette plot
 | 
| 
 | 
   246     # The silhouette coefficient can range from -1, 1 but in this example all
 | 
| 
 | 
   247     # lie within [-0.1, 1]
 | 
| 
 | 
   248     ax1.set_xlim([-1, 1])
 | 
| 
 | 
   249     # The (n_clusters+1)*10 is for inserting blank space between silhouette
 | 
| 
 | 
   250     # plots of individual clusters, to demarcate them clearly.
 | 
| 
 | 
   251     ax1.set_ylim([0, len(dataset) + (n_clusters + 1) * 10])
 | 
| 
 | 
   252     
 | 
| 
 | 
   253     # Compute the silhouette scores for each sample
 | 
| 
 | 
   254     sample_silhouette_values = silhouette_samples(dataset, labels)
 | 
| 
 | 
   255         
 | 
| 
 | 
   256     y_lower = 10
 | 
| 
 | 
   257     for i in range(n_clusters):
 | 
| 
 | 
   258         # Aggregate the silhouette scores for samples belonging to
 | 
| 
 | 
   259         # cluster i, and sort them
 | 
| 
 | 
   260         ith_cluster_silhouette_values = \
 | 
| 
 | 
   261         sample_silhouette_values[labels == i]
 | 
| 
 | 
   262         
 | 
| 
 | 
   263         ith_cluster_silhouette_values.sort()
 | 
| 
 | 
   264     
 | 
| 
 | 
   265         size_cluster_i = ith_cluster_silhouette_values.shape[0]
 | 
| 
 | 
   266         y_upper = y_lower + size_cluster_i
 | 
| 
 | 
   267     
 | 
| 
 | 
   268         color = cm.nipy_spectral(float(i) / n_clusters)
 | 
| 
 | 
   269         ax1.fill_betweenx(np.arange(y_lower, y_upper),
 | 
| 
 | 
   270                           0, ith_cluster_silhouette_values,
 | 
| 
 | 
   271                                      facecolor=color, edgecolor=color, alpha=0.7)
 | 
| 
 | 
   272         
 | 
| 
 | 
   273         # Label the silhouette plots with their cluster numbers at the middle
 | 
| 
 | 
   274         ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
 | 
| 
 | 
   275         
 | 
| 
 | 
   276         # Compute the new y_lower for next plot
 | 
| 
 | 
   277         y_lower = y_upper + 10  # 10 for the 0 samples
 | 
| 
 | 
   278     
 | 
| 
 | 
   279         ax1.set_title("The silhouette plot for the various clusters.")
 | 
| 
 | 
   280         ax1.set_xlabel("The silhouette coefficient values")
 | 
| 
 | 
   281         ax1.set_ylabel("Cluster label")
 | 
| 
 | 
   282         
 | 
| 
 | 
   283         # The vertical line for average silhouette score of all the values
 | 
| 
 | 
   284         ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
 | 
| 
 | 
   285     
 | 
| 
 | 
   286         ax1.set_yticks([])  # Clear the yaxis labels / ticks
 | 
| 
 | 
   287         ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
 | 
| 
 | 
   288         
 | 
| 
 | 
   289         
 | 
| 
 | 
   290         plt.suptitle(("Silhouette analysis for clustering on sample data "
 | 
| 
 | 
   291                           "with n_clusters = " + str(n_clusters) + "\nAverage silhouette_score = " + str(silhouette_avg)), fontsize=12, fontweight='bold')
 | 
| 
 | 
   292             
 | 
| 
 | 
   293             
 | 
| 
 | 
   294         plt.savefig(path, bbox_inches='tight')
 | 
| 
 | 
   295             
 | 
| 
 | 
   296 ######################## dbscan ##############################################
 | 
| 
 | 
   297     
 | 
| 
33
 | 
   298 def dbscan(dataset, eps, min_samples, best_cluster):
 | 
| 
23
 | 
   299     if not os.path.exists('clustering'):
 | 
| 
 | 
   300         os.makedirs('clustering')
 | 
| 
16
 | 
   301         
 | 
| 
 | 
   302     if eps is not None:
 | 
| 
 | 
   303     	clusterer = DBSCAN(eps = eps, min_samples = min_samples)
 | 
| 
0
 | 
   304     else:
 | 
| 
16
 | 
   305     	clusterer = DBSCAN()
 | 
| 
 | 
   306     
 | 
| 
 | 
   307     clustering = clusterer.fit(dataset)
 | 
| 
 | 
   308     
 | 
| 
 | 
   309     core_samples_mask = np.zeros_like(clustering.labels_, dtype=bool)
 | 
| 
 | 
   310     core_samples_mask[clustering.core_sample_indices_] = True
 | 
| 
 | 
   311     labels = clustering.labels_
 | 
| 
0
 | 
   312 
 | 
| 
16
 | 
   313     # Number of clusters in labels, ignoring noise if present.
 | 
| 
 | 
   314     n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
 | 
| 
 | 
   315     
 | 
| 
 | 
   316     
 | 
| 
33
 | 
   317     labels = labels
 | 
| 
 | 
   318     predict = [x+1 for x in labels]
 | 
| 
 | 
   319     classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
 | 
| 
 | 
   320     classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
 | 
| 
 | 
   321   
 | 
| 
16
 | 
   322     
 | 
| 
 | 
   323 ########################## hierachical #######################################
 | 
| 
 | 
   324     
 | 
| 
34
 | 
   325 def hierachical_agglomerative(dataset, k_min, k_max, best_cluster, silhouette):
 | 
| 
0
 | 
   326 
 | 
| 
23
 | 
   327     if not os.path.exists('clustering'):
 | 
| 
 | 
   328         os.makedirs('clustering')
 | 
| 
16
 | 
   329     
 | 
| 
 | 
   330     plt.figure(figsize=(10, 7))  
 | 
| 
42
 | 
   331     plt.title("Customer Dendograms")  
 | 
| 
37
 | 
   332     shc.dendrogram(shc.linkage(dataset, method='ward'), labels=dataset.index.values.tolist())  
 | 
| 
16
 | 
   333     fig = plt.gcf()
 | 
| 
23
 | 
   334     fig.savefig('clustering/dendogram.png', dpi=200)
 | 
| 
16
 | 
   335     
 | 
| 
 | 
   336     range_n_clusters = [i for i in range(k_min, k_max+1)]
 | 
| 
0
 | 
   337 
 | 
| 
33
 | 
   338     scores = []
 | 
| 
 | 
   339     labels = []
 | 
| 
34
 | 
   340     
 | 
| 
42
 | 
   341     n_classi = dataset.shape[0]
 | 
| 
 | 
   342     
 | 
| 
 | 
   343     for n_clusters in range_n_clusters:  
 | 
| 
16
 | 
   344         cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward')  
 | 
| 
 | 
   345         cluster.fit_predict(dataset)  
 | 
| 
 | 
   346         cluster_labels = cluster.labels_
 | 
| 
33
 | 
   347         labels.append(cluster_labels)
 | 
| 
23
 | 
   348         write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv')
 | 
| 
42
 | 
   349         
 | 
| 
33
 | 
   350     best = max_index(scores) + k_min
 | 
| 
34
 | 
   351     
 | 
| 
 | 
   352     for i in range(len(labels)):
 | 
| 
 | 
   353         prefix = ''
 | 
| 
 | 
   354         if (i + k_min == best):
 | 
| 
 | 
   355             prefix = '_BEST'
 | 
| 
 | 
   356         if silhouette == 'true':
 | 
| 
 | 
   357             silihouette_draw(dataset, labels[i], i + k_min, 'clustering/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png')
 | 
| 
33
 | 
   358      
 | 
| 
 | 
   359     for i in range(len(labels)):
 | 
| 
 | 
   360         if (i + k_min == best):
 | 
| 
 | 
   361             labels = labels[i]
 | 
| 
 | 
   362             predict = [x+1 for x in labels]
 | 
| 
 | 
   363             classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str)
 | 
| 
 | 
   364             classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class'])
 | 
| 
34
 | 
   365             
 | 
| 
16
 | 
   366     
 | 
| 
 | 
   367 ############################# main ###########################################
 | 
| 
0
 | 
   368 
 | 
| 
 | 
   369 
 | 
| 
 | 
   370 def main():
 | 
| 
16
 | 
   371     if not os.path.exists('clustering'):
 | 
| 
 | 
   372         os.makedirs('clustering')
 | 
| 
 | 
   373 
 | 
| 
0
 | 
   374     args = process_args(sys.argv)
 | 
| 
16
 | 
   375     
 | 
| 
 | 
   376     #Data read
 | 
| 
 | 
   377     
 | 
| 
 | 
   378     X = read_dataset(args.input)
 | 
| 
 | 
   379     X = pd.DataFrame.to_dict(X, orient='list')
 | 
| 
 | 
   380     X = rewrite_input(X)
 | 
| 
 | 
   381     X = pd.DataFrame.from_dict(X, orient = 'index')
 | 
| 
 | 
   382     
 | 
| 
 | 
   383     for i in X.columns:
 | 
| 
 | 
   384         tmp = X[i][0]
 | 
| 
 | 
   385         if tmp == None:
 | 
| 
 | 
   386             X = X.drop(columns=[i])
 | 
| 
42
 | 
   387             
 | 
| 
 | 
   388     if args.k_max != None:
 | 
| 
 | 
   389        numero_classi = X.shape[0]
 | 
| 
 | 
   390        while args.k_max >= numero_classi:
 | 
| 
 | 
   391           err = 'Skipping k = ' + str(args.k_max) + ' since it is >= number of classes of dataset'
 | 
| 
 | 
   392           warning(err)
 | 
| 
 | 
   393           args.k_max = args.k_max - 1
 | 
| 
16
 | 
   394     
 | 
| 
 | 
   395     
 | 
| 
 | 
   396     if args.cluster_type == 'kmeans':
 | 
| 
37
 | 
   397         kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.best_cluster)
 | 
| 
16
 | 
   398     
 | 
| 
 | 
   399     if args.cluster_type == 'dbscan':
 | 
| 
33
 | 
   400         dbscan(X, args.eps, args.min_samples, args.best_cluster)
 | 
| 
16
 | 
   401         
 | 
| 
 | 
   402     if args.cluster_type == 'hierarchy':
 | 
| 
34
 | 
   403         hierachical_agglomerative(X, args.k_min, args.k_max, args.best_cluster, args.silhouette)
 | 
| 
16
 | 
   404         
 | 
| 
 | 
   405 ##############################################################################
 | 
| 
0
 | 
   406 
 | 
| 
 | 
   407 if __name__ == "__main__":
 | 
| 
28
 | 
   408     main()
 |