From Data Smart by John Foreman
http://www.wiley.com/WileyCDA/WileyTitle/productCd-111866146X.html.

Spherical K-Means

First check if skmeans is available, install if not.

if('skmeans' %in% rownames(installed.packages()) == FALSE) {
 install.packages('skmeans', repos='http://cran.us.r-project.org', dependencies=TRUE)
}

library(skmeans)
## Warning: package 'skmeans' was built under R version 3.1.2

Now get crackin

#getwd()
winedata <- read.csv('data/WineKMC.csv')
ncol(winedata)
## [1] 107
nrow(winedata)
## [1] 32
winedata[is.na(winedata)] <- 0

winedata[1:5,1:10]
##   Offer Mth   Varietal MinQty Disc Origin PastPeak Adams Allen Anders
## 1     1 Jan     Malbec     72   56 France    FALSE     0     0      0
## 2     2 Jan Pinot Noir     72   17 France    FALSE     0     0      0
## 3     3 Feb  Espumante    144   32 Oregon     TRUE     0     0      0
## 4     4 Feb  Champagne     72   48 France     TRUE     0     0      0
## 5     5 Feb Cab. Sauv.    144   44     NZ     TRUE     0     0      0
winedata.transposed <- t(winedata[,8:107])
winedata.transposed[1:10, 1:10]
##         [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## Adams      0    0    0    0    0    0    0    0    0     0
## Allen      0    0    0    0    0    0    0    0    1     0
## Anders     0    0    0    0    0    0    0    0    0     0
## Bailey     0    0    0    0    0    0    1    0    0     0
## Baker      0    0    0    0    0    0    1    0    0     1
## Barnes     0    0    0    0    0    0    0    0    0     1
## Bell       0    1    0    0    0    0    0    0    0     0
## Bennett    0    0    0    0    0    0    0    1    0     0
## Brooks     0    0    1    0    0    0    0    1    0     0
## Brown      0    0    0    0    0    0    1    0    0     0

Call skmeans on datatset, specifying five means, and use of genetic algorithm.

winedata.clusters <- skmeans(winedata.transposed, 5, method='genetic')
winedata.clusters
## A hard spherical k-means partition of 100 objects into 5 classes.
## Class sizes: 17, 17, 34, 16, 16
## Call: skmeans(x = winedata.transposed, k = 5, method = "genetic")

Using str, see that actual cluster assignments are stored within ‘cluster’ list of object.

str(winedata.clusters)
## List of 7
##  $ prototypes: num [1:5, 1:32] 0.1295 0.09 0 0.0974 0.1458 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:5] "1" "2" "3" "4" ...
##   .. ..$ : NULL
##  $ membership: NULL
##  $ cluster   : Named int [1:100] 3 5 2 3 4 4 2 3 1 3 ...
##   ..- attr(*, "names")= chr [1:100] "Adams" "Allen" "Anders" "Bailey" ...
##  $ family    :List of 7
##   ..$ description: chr "spherical k-means"
##   ..$ D          :function (x, prototypes)  
##   ..$ C          :function (x, weights, control)  
##   ..$ init       :function (x, k)  
##   ..$ e          : num 1
##   ..$ .modify    : NULL
##   ..$ .subset    : NULL
##   ..- attr(*, "class")= chr "pclust_family"
##  $ m         : num 1
##  $ value     : num 38.3
##  $ call      : language skmeans(x = winedata.transposed, k = 5, method = "genetic")
##  - attr(*, "class")= chr [1:2] "skmeans" "pclust"
winedata.clusters$cluster[4]
## Bailey 
##      3
winedata.clusters$cluster[which(row.names(winedata.transposed) == 'Wright')]
## Wright 
##      5
aggregate(winedata.transposed, by=list(winedata.clusters$cluster), sum)
##   Group.1 V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18
## 1       1  3  3  4  3  0  3  2  2  0   0   8   3   0   1   1   0   0   0
## 2       2  2  7  0  0  0  0  0  0  0   1   0   1   0   0   0   1   7   0
## 3       3  0  0  1  0  0  1 15 16  0   2   0   1   6   0   0   0   0  13
## 4       4  2  0  1  3  0  5  2  2  2   4   2   0   0   2   2   1   0   1
## 5       5  3  0  0  6  4  3  0  0  8   0   3   0   0   6   3   3   0   0
##   V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32
## 1   2   1   1  13   0   0   4   0   3   4   0   4   1   1
## 2   0   0   0   0   1  12   0  12   1   0   0   0   0   0
## 3   0   0   1   0   0   0   0   0   1   1  16  16   0   0
## 4   3   2   1   6   1   0   0   0   1   1   1   1  16   2
## 5   0   3   1   2   3   0   2   3   3   0   0   1   0   1
winedata.clustercounts <- t(aggregate(winedata.transposed, 
                                      by=list(winedata.clusters$cluster), 
                                      sum) [,2:33])
winedata.clustercounts
##     [,1] [,2] [,3] [,4] [,5]
## V1     3    2    0    2    3
## V2     3    7    0    0    0
## V3     4    0    1    1    0
## V4     3    0    0    3    6
## V5     0    0    0    0    4
## V6     3    0    1    5    3
## V7     2    0   15    2    0
## V8     2    0   16    2    0
## V9     0    0    0    2    8
## V10    0    1    2    4    0
## V11    8    0    0    2    3
## V12    3    1    1    0    0
## V13    0    0    6    0    0
## V14    1    0    0    2    6
## V15    1    0    0    2    3
## V16    0    1    0    1    3
## V17    0    7    0    0    0
## V18    0    0   13    1    0
## V19    2    0    0    3    0
## V20    1    0    0    2    3
## V21    1    0    1    1    1
## V22   13    0    0    6    2
## V23    0    1    0    1    3
## V24    0   12    0    0    0
## V25    4    0    0    0    2
## V26    0   12    0    0    3
## V27    3    1    1    1    3
## V28    4    0    1    1    0
## V29    0    0   16    1    0
## V30    4    0   16    1    1
## V31    1    0    0   16    0
## V32    1    0    0    2    1
winedata.desc.plus.counts <- cbind(winedata[,1:7], winedata.clustercounts)
winedata.desc.plus.counts
##     Offer  Mth   Varietal MinQty Disc    Origin PastPeak  1  2  3  4 5
## V1      1  Jan     Malbec     72   56    France    FALSE  3  2  0  2 3
## V2      2  Jan Pinot Noir     72   17    France    FALSE  3  7  0  0 0
## V3      3  Feb  Espumante    144   32    Oregon     TRUE  4  0  1  1 0
## V4      4  Feb  Champagne     72   48    France     TRUE  3  0  0  3 6
## V5      5  Feb Cab. Sauv.    144   44        NZ     TRUE  0  0  0  0 4
## V6      6  Mar   Prosecco    144   86     Chile    FALSE  3  0  1  5 3
## V7      7  Mar   Prosecco      6   40 Australia     TRUE  2  0 15  2 0
## V8      8  Mar  Espumante      6   45 S. Africa    FALSE  2  0 16  2 0
## V9      9  Apr Chardonnay    144   57     Chile    FALSE  0  0  0  2 8
## V10    10  Apr   Prosecco     72   52        CA    FALSE  0  1  2  4 0
## V11    11  May  Champagne     72   85    France    FALSE  8  0  0  2 3
## V12    12  May   Prosecco     72   83 Australia    FALSE  3  1  1  0 0
## V13    13  May     Merlot      6   43     Chile    FALSE  0  0  6  0 0
## V14    14  Jun     Merlot     72   64     Chile    FALSE  1  0  0  2 6
## V15    15  Jun Cab. Sauv.    144   19     Italy    FALSE  1  0  0  2 3
## V16    16  Jun     Merlot     72   88        CA    FALSE  0  1  0  1 3
## V17    17  Jul Pinot Noir     12   47   Germany    FALSE  0  7  0  0 0
## V18    18  Jul  Espumante      6   50    Oregon    FALSE  0  0 13  1 0
## V19    19  Jul  Champagne     12   66   Germany    FALSE  2  0  0  3 0
## V20    20  Aug Cab. Sauv.     72   82     Italy    FALSE  1  0  0  2 3
## V21    21  Aug  Champagne     12   50        CA    FALSE  1  0  1  1 1
## V22    22  Aug  Champagne     72   63    France    FALSE 13  0  0  6 2
## V23    23 Sept Chardonnay    144   39 S. Africa    FALSE  0  1  0  1 3
## V24    24 Sept Pinot Noir      6   34     Italy    FALSE  0 12  0  0 0
## V25    25  Oct Cab. Sauv.     72   59    Oregon     TRUE  4  0  0  0 2
## V26    26  Oct Pinot Noir    144   83 Australia    FALSE  0 12  0  0 3
## V27    27  Oct  Champagne     72   88        NZ    FALSE  3  1  1  1 3
## V28    28  Nov Cab. Sauv.     12   56    France     TRUE  4  0  1  1 0
## V29    29  Nov  P. Grigio      6   87    France    FALSE  0  0 16  1 0
## V30    30  Dec     Malbec      6   54    France    FALSE  4  0 16  1 1
## V31    31  Dec  Champagne     72   89    France    FALSE  1  0  0 16 0
## V32    32  Dec Cab. Sauv.     72   45   Germany     TRUE  1  0  0  2 1
winedata.desc.plus.counts[order(-winedata.desc.plus.counts[,8]),]
##     Offer  Mth   Varietal MinQty Disc    Origin PastPeak  1  2  3  4 5
## V22    22  Aug  Champagne     72   63    France    FALSE 13  0  0  6 2
## V11    11  May  Champagne     72   85    France    FALSE  8  0  0  2 3
## V3      3  Feb  Espumante    144   32    Oregon     TRUE  4  0  1  1 0
## V25    25  Oct Cab. Sauv.     72   59    Oregon     TRUE  4  0  0  0 2
## V28    28  Nov Cab. Sauv.     12   56    France     TRUE  4  0  1  1 0
## V30    30  Dec     Malbec      6   54    France    FALSE  4  0 16  1 1
## V1      1  Jan     Malbec     72   56    France    FALSE  3  2  0  2 3
## V2      2  Jan Pinot Noir     72   17    France    FALSE  3  7  0  0 0
## V4      4  Feb  Champagne     72   48    France     TRUE  3  0  0  3 6
## V6      6  Mar   Prosecco    144   86     Chile    FALSE  3  0  1  5 3
## V12    12  May   Prosecco     72   83 Australia    FALSE  3  1  1  0 0
## V27    27  Oct  Champagne     72   88        NZ    FALSE  3  1  1  1 3
## V7      7  Mar   Prosecco      6   40 Australia     TRUE  2  0 15  2 0
## V8      8  Mar  Espumante      6   45 S. Africa    FALSE  2  0 16  2 0
## V19    19  Jul  Champagne     12   66   Germany    FALSE  2  0  0  3 0
## V14    14  Jun     Merlot     72   64     Chile    FALSE  1  0  0  2 6
## V15    15  Jun Cab. Sauv.    144   19     Italy    FALSE  1  0  0  2 3
## V20    20  Aug Cab. Sauv.     72   82     Italy    FALSE  1  0  0  2 3
## V21    21  Aug  Champagne     12   50        CA    FALSE  1  0  1  1 1
## V31    31  Dec  Champagne     72   89    France    FALSE  1  0  0 16 0
## V32    32  Dec Cab. Sauv.     72   45   Germany     TRUE  1  0  0  2 1
## V5      5  Feb Cab. Sauv.    144   44        NZ     TRUE  0  0  0  0 4
## V9      9  Apr Chardonnay    144   57     Chile    FALSE  0  0  0  2 8
## V10    10  Apr   Prosecco     72   52        CA    FALSE  0  1  2  4 0
## V13    13  May     Merlot      6   43     Chile    FALSE  0  0  6  0 0
## V16    16  Jun     Merlot     72   88        CA    FALSE  0  1  0  1 3
## V17    17  Jul Pinot Noir     12   47   Germany    FALSE  0  7  0  0 0
## V18    18  Jul  Espumante      6   50    Oregon    FALSE  0  0 13  1 0
## V23    23 Sept Chardonnay    144   39 S. Africa    FALSE  0  1  0  1 3
## V24    24 Sept Pinot Noir      6   34     Italy    FALSE  0 12  0  0 0
## V26    26  Oct Pinot Noir    144   83 Australia    FALSE  0 12  0  0 3
## V29    29  Nov  P. Grigio      6   87    France    FALSE  0  0 16  1 0