Mercurial > repos > george-weingart > micropita
view src/breadcrumbs/src/KMedoids.py @ 0:2f4f6f08c8c4 draft
Uploaded
author | george-weingart |
---|---|
date | Tue, 13 May 2014 21:58:57 -0400 |
parents | |
children |
line wrap: on
line source
## Included from MLPY build 2.2.0 ## Attempts were made to contact Davide Albanese on 08-10-2012 and 09-19-2012 at albanese@fbk.eu ## This code is written by Davide Albanese, <albanese@fbk.eu> ## (C) 2009 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY. ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program. If not, see <http://www.gnu.org/licenses/>. __all__= ['Kmedoids', 'Minkowski'] import numpy as np import matplotlib matplotlib.use( "Agg" ) import mlpy def kmedoids_core(x, med, oth, clust, cost, dist): """ * for each mediod m * for each non-mediod data point n Swap m and n and compute the total cost of the configuration Select the configuration with the lowest cost """ d = np.empty((oth.shape[0], med.shape[0]), dtype=float) med_n = np.empty_like(med) oth_n = np.empty_like(oth) idx = np.arange(oth.shape[0]) med_cur = med.copy() oth_cur = oth.copy() clust_cur = clust.copy() cost_cur = cost for i, m in enumerate(med): for j, n in enumerate(oth[clust == i]): med_n, oth_n = med.copy(), oth.copy() med_n[i] = n tmp = oth_n[clust == i] tmp[j] = m oth_n[clust == i] = tmp for ii, nn in enumerate(oth_n): for jj, mm in enumerate(med_n): d[ii, jj] = dist.compute(x[mm], x[nn]) clust_n = np.argmin(d, axis=1) # clusters cost_n = np.sum(d[idx, clust_n]) # total cost of configuration if cost_n <= cost_cur: med_cur = med_n.copy() oth_cur = oth_n.copy() clust_cur = clust_n.copy() cost_cur = cost_n return med_cur, oth_cur, clust_cur, cost_cur class Kmedoids: """k-medoids algorithm. """ def __init__(self, k, dist, maxloops=100, rs=0): """Initialize Kmedoids. :Parameters: k : int Number of clusters/medoids dist : class class with a .compute(x, y) method which returns a distance maxloops : int maximum number of loops rs : int random seed Example: >>> import numpy as np >>> import mlpy >>> x = np.array([[ 1. , 1.5], ... [ 1.1, 1.8], ... [ 2. , 2.8], ... [ 3.2, 3.1], ... [ 3.4, 3.2]]) >>> dtw = mlpy.Dtw(onlydist=True) >>> km = mlpy.Kmedoids(k=3, dist=dtw) >>> km.compute(x) (array([4, 0, 2]), array([3, 1]), array([0, 1]), 0.072499999999999981) Samples 4, 0, 2 are medoids and represent cluster 0, 1, 2 respectively. * cluster 0: samples 4 (medoid) and 3 * cluster 1: samples 0 (medoid) and 1 * cluster 2: sample 2 (medoid) """ self.__k = k self.__maxloops = maxloops self.__rs = rs self.__dist = dist np.random.seed(self.__rs) def compute(self, x): """Compute Kmedoids. :Parameters: x : ndarray An 2-dimensional vector (sample x features). :Returns: m : ndarray (1-dimensional vector) medoids indexes n : ndarray (1-dimensional vector) non-medoids indexes cl : ndarray 1-dimensional vector) cluster membership for non-medoids. Groups are in 0, ..., k-1 co : double total cost of configuration """ # randomly select k of the n data points as the mediods idx = np.arange(x.shape[0]) np.random.shuffle(idx) med = idx[0:self.__k] oth = idx[self.__k::] # compute distances d = np.empty((oth.shape[0], med.shape[0]), dtype=float) for i, n in enumerate(oth): for j, m in enumerate(med): d[i, j] = self.__dist.compute(x[m], x[n]) # associate each data point to the closest medoid clust = np.argmin(d, axis=1) # total cost of configuration cost = np.sum(d[np.arange(d.shape[0]), clust]) # repeat kmedoids_core until there is no change in the medoid for l in range(self.__maxloops): med_n, oth_n, clust_n, cost_n = kmedoids_core(x, med, oth, clust, cost, self.__dist) if (cost_n < cost): med, oth, clust, cost = med_n, oth_n, clust_n, cost_n else: break return med, oth, clust, cost class Minkowski: """ Computes the Minkowski distance between two vectors ``x`` and ``y``. .. math:: {||x-y||}_p = (\sum{|x_i - y_i|^p})^{1/p}. """ def __init__(self, p): """ Initialize Minkowski class. :Parameters: p : float The norm of the difference :math:`{||x-y||}_p` """ self.__p = p def compute(self, x, y): """ Compute Minkowski distance :Parameters: x : ndarray An 1-dimensional vector. y : ndarray An 1-dimensional vector. :Returns: d : float The Minkowski distance between vectors ``x`` and ``y`` """ return (abs(x - y)**self.__p).sum() ** (1.0 / self.__p)