Tài liệu phục vụ Seminar TKUD26, chi tiết tại [tại đây] (https://sites.google.com/view/tkud/home?authuser=1)

library(tidyverse)  # data manipulation
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(cluster)    # clustering algorithms
library(factoextra) # clustering algorithms & visualization
## Warning: package 'factoextra' was built under R version 4.0.5
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
Arrest <- USArrests
Arrest <- na.omit(Arrest)
head(Arrest)
##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7
Arrest <- scale(Arrest)
head(Arrest)
##                Murder   Assault   UrbanPop         Rape
## Alabama    1.24256408 0.7828393 -0.5209066 -0.003416473
## Alaska     0.50786248 1.1068225 -1.2117642  2.484202941
## Arizona    0.07163341 1.4788032  0.9989801  1.042878388
## Arkansas   0.23234938 0.2308680 -1.0735927 -0.184916602
## California 0.27826823 1.2628144  1.7589234  2.067820292
## Colorado   0.02571456 0.3988593  0.8608085  1.864967207
distance <- factoextra::get_dist(Arrest)
head(distance)
## [1] 2.703754 2.293520 1.289810 3.263110 2.651067 3.215297
fviz_dist(distance, gradient = list(low = "green", mid = "yellow", high = "red"))

k2 <- kmeans(Arrest, centers = 2, nstart = 25)
str(k2)
## List of 9
##  $ cluster     : Named int [1:50] 2 2 2 1 2 2 1 1 2 2 ...
##   ..- attr(*, "names")= chr [1:50] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ centers     : num [1:2, 1:4] -0.67 1.005 -0.676 1.014 -0.132 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "1" "2"
##   .. ..$ : chr [1:4] "Murder" "Assault" "UrbanPop" "Rape"
##  $ totss       : num 196
##  $ withinss    : num [1:2] 56.1 46.7
##  $ tot.withinss: num 103
##  $ betweenss   : num 93.1
##  $ size        : int [1:2] 30 20
##  $ iter        : int 1
##  $ ifault      : int 0
##  - attr(*, "class")= chr "kmeans"
k2
## K-means clustering with 2 clusters of sizes 30, 20
## 
## Cluster means:
##      Murder    Assault   UrbanPop       Rape
## 1 -0.669956 -0.6758849 -0.1317235 -0.5646433
## 2  1.004934  1.0138274  0.1975853  0.8469650
## 
## Clustering vector:
##        Alabama         Alaska        Arizona       Arkansas     California 
##              2              2              2              1              2 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              2              1              1              2              2 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              1              1              2              1              1 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              1              1              2              1              2 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              1              2              1              2              2 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              1              1              2              1              1 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              2              2              2              1              1 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              1              1              1              1              2 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              1              2              2              1              1 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              1              1              1              1              1 
## 
## Within cluster sum of squares by cluster:
## [1] 56.11445 46.74796
##  (between_SS / total_SS =  47.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
fviz_cluster(k2, data = Arrest)

k3 <- kmeans(Arrest, centers = 3, nstart = 25)
fviz_cluster(k3, data = Arrest)

k3
## K-means clustering with 3 clusters of sizes 13, 20, 17
## 
## Cluster means:
##       Murder    Assault   UrbanPop       Rape
## 1 -0.9615407 -1.1066010 -0.9301069 -0.9667633
## 2  1.0049340  1.0138274  0.1975853  0.8469650
## 3 -0.4469795 -0.3465138  0.4788049 -0.2571398
## 
## Clustering vector:
##        Alabama         Alaska        Arizona       Arkansas     California 
##              2              2              2              3              2 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              2              3              3              2              2 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              3              1              2              3              1 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              3              1              2              1              2 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              3              2              1              2              2 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              1              1              2              1              3 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              2              2              2              1              3 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              3              3              3              3              2 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              1              2              2              3              1 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              3              3              1              1              3 
## 
## Within cluster sum of squares by cluster:
## [1] 11.95246 46.74796 19.62285
##  (between_SS / total_SS =  60.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
Arrest %>%
  as_tibble() %>%
  mutate(cluster = k2$cluster,
         state = row.names(USArrests)) %>%
  ggplot(aes(UrbanPop, Murder, color = factor(cluster), label = state)) +
  geom_text()

k3 <- kmeans(Arrest, centers = 3, nstart = 25)
k4 <- kmeans(Arrest, centers = 4, nstart = 25)
k5 <- kmeans(Arrest, centers = 5, nstart = 25)

# plots to compare
p1 <- fviz_cluster(k2, geom = "point", data = Arrest) + ggtitle("k = 2")
p2 <- fviz_cluster(k3, geom = "point",  data = Arrest) + ggtitle("k = 3")
p3 <- fviz_cluster(k4, geom = "point",  data = Arrest) + ggtitle("k = 4")
p4 <- fviz_cluster(k5, geom = "point",  data = Arrest) + ggtitle("k = 5")

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.arrange(p1, p2, p3, p4, nrow = 2)

set.seed(100)

# function to compute total within-cluster sum of square (Wss)
wss <- function(k) {
  kmeans(Arrest, k, nstart = 10 )$tot.withinss
}

# Compute and plot wss for k = 1 to k = 15
k.values <- 1:15

# extract wss for 2-15 clusters
wss_values <- map_dbl(k.values, wss)

plot(k.values, wss_values,
       type="b", pch = 19, frame = FALSE, 
       xlab="Number of clusters K",
       ylab="Total within-clusters sum of squares")

require(dplyr)
#install.packages("nFactors")
library(nFactors)
## Warning: package 'nFactors' was built under R version 4.0.5
## Loading required package: lattice
## 
## Attaching package: 'nFactors'
## The following object is masked from 'package:lattice':
## 
##     parallel
set.seed(10)
ev <- eigen(cor(Arrest)) # get eigenvalues
ev
## eigen() decomposition
## $values
## [1] 2.4802416 0.9897652 0.3565632 0.1734301
## 
## $vectors
##            [,1]       [,2]       [,3]        [,4]
## [1,] -0.5358995  0.4181809 -0.3412327  0.64922780
## [2,] -0.5831836  0.1879856 -0.2681484 -0.74340748
## [3,] -0.2781909 -0.8728062 -0.3780158  0.13387773
## [4,] -0.5434321 -0.1673186  0.8177779  0.08902432
ap <- parallel(subject=nrow(Arrest),var=ncol(Arrest),
               rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

set.seed(100)

fviz_nbclust(Arrest, kmeans, method = "wss")

str(Arrest)
##  num [1:50, 1:4] 1.2426 0.5079 0.0716 0.2323 0.2783 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:50] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##   ..$ : chr [1:4] "Murder" "Assault" "UrbanPop" "Rape"
##  - attr(*, "scaled:center")= Named num [1:4] 7.79 170.76 65.54 21.23
##   ..- attr(*, "names")= chr [1:4] "Murder" "Assault" "UrbanPop" "Rape"
##  - attr(*, "scaled:scale")= Named num [1:4] 4.36 83.34 14.47 9.37
##   ..- attr(*, "names")= chr [1:4] "Murder" "Assault" "UrbanPop" "Rape"
# function to compute average silhouette for k clusters
avg_sil <- function(k) {
  km.res <- kmeans(Arrest, centers = k, nstart = 25)
  ss <- silhouette(km.res$cluster, dist(Arrest))
  mean(ss[, 3])
}

# Compute and plot wss for k = 2 to k = 15
k.values <- 2:15

# extract avg silhouette for 2-15 clusters
avg_sil_values <- map_dbl(k.values, avg_sil)

plot(k.values, avg_sil_values,
       type = "b", pch = 19, frame = FALSE, 
       xlab = "Number of clusters K",
       ylab = "Average Silhouettes")

fviz_nbclust(Arrest, kmeans, method = "silhouette")

#fviz_nbclust(irisB, kmeans, method = "silhouette")
# compute gap statistic
set.seed(100)
gap_stat <- clusGap(Arrest, FUN = kmeans, nstart = 25,
                    K.max = 10, B = 50)
# Print the result
print(gap_stat, method = "firstmax")
## Clustering Gap statistic ["clusGap"] from call:
## clusGap(x = Arrest, FUNcluster = kmeans, K.max = 10, B = 50,     nstart = 25)
## B=50 simulated reference sets, k = 1..10; spaceH0="scaledPCA"
##  --> Number of clusters (method 'firstmax'): 4
##           logW   E.logW       gap     SE.sim
##  [1,] 3.458369 3.630860 0.1724909 0.03625722
##  [2,] 3.135112 3.359114 0.2240025 0.03543637
##  [3,] 2.977727 3.224930 0.2472034 0.03342621
##  [4,] 2.826221 3.109910 0.2836891 0.03615581
##  [5,] 2.738868 3.011982 0.2731140 0.03645021
##  [6,] 2.666967 2.926076 0.2591085 0.03602029
##  [7,] 2.612957 2.846616 0.2336583 0.03447295
##  [8,] 2.545027 2.774382 0.2293546 0.03397513
##  [9,] 2.468162 2.706808 0.2386457 0.03383600
## [10,] 2.397200 2.640321 0.2431213 0.03479945
fviz_gap_stat(gap_stat)

library(fpc)
## Warning: package 'fpc' was built under R version 4.0.5
library(cluster)
irisB <- iris
names(irisB)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
summary(irisB$Species)
##     setosa versicolor  virginica 
##         50         50         50
str(irisB)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
irisB$Species <- NULL
kmeans.result <- kmeans(irisB, 3)
kmeans.result
## K-means clustering with 3 clusters of sizes 50, 62, 38
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.006000    3.428000     1.462000    0.246000
## 2     5.901613    2.748387     4.393548    1.433871
## 3     6.850000    3.073684     5.742105    2.071053
## 
## Clustering vector:
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [75] 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 3 3 3 3 2 3 3 3 3
## [112] 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 2 3 3 3 3 3 2 3 3 3 3 2 3 3 3 2 3 3 3 2 3
## [149] 3 2
## 
## Within cluster sum of squares by cluster:
## [1] 15.15100 39.82097 23.87947
##  (between_SS / total_SS =  88.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
gap_stat2<-clusGap(irisB,FUN = kmeans,K.max = 10, B = 50, nstart = 25)
fviz_gap_stat(gap_stat2)

set.seed(100)
final <- kmeans(Arrest, 4, nstart = 25)
print(final)
## K-means clustering with 4 clusters of sizes 8, 16, 13, 13
## 
## Cluster means:
##       Murder    Assault   UrbanPop        Rape
## 1  1.4118898  0.8743346 -0.8145211  0.01927104
## 2 -0.4894375 -0.3826001  0.5758298 -0.26165379
## 3 -0.9615407 -1.1066010 -0.9301069 -0.96676331
## 4  0.6950701  1.0394414  0.7226370  1.27693964
## 
## Clustering vector:
##        Alabama         Alaska        Arizona       Arkansas     California 
##              1              4              4              1              4 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              4              2              2              4              1 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              2              3              4              2              3 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              2              3              1              3              4 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              2              4              3              1              4 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              3              3              4              3              2 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              4              4              1              3              2 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              2              2              2              2              1 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              3              1              4              2              3 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              2              2              3              3              2 
## 
## Within cluster sum of squares by cluster:
## [1]  8.316061 16.212213 11.952463 19.922437
##  (between_SS / total_SS =  71.2 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
fviz_cluster(final, data = Arrest)

USArrests %>%
  mutate(Cluster = final$cluster) %>%
  group_by(Cluster) %>%
  summarise_all("mean")
## # A tibble: 4 x 5
##   Cluster Murder Assault UrbanPop  Rape
##     <int>  <dbl>   <dbl>    <dbl> <dbl>
## 1       1  13.9    244.      53.8  21.4
## 2       2   5.66   139.      73.9  18.8
## 3       3   3.6     78.5     52.1  12.2
## 4       4  10.8    257.      76    33.2

##K-Medoids with US Arrest

#install.packages(c("cluster", "factoextra"))
library(cluster)
library(fpc)
library(factoextra)
fviz_nbclust(Arrest, kmeans, method = "silhouette")

pam2 <- pam(Arrest, 2,metric = "euclidean", stand = FALSE)
print(pam2)
## Medoids:
##            ID     Murder    Assault   UrbanPop       Rape
## New Mexico 31  0.8292944  1.3708088  0.3081225  1.1603196
## Nebraska   27 -0.8008247 -0.8250772 -0.2445636 -0.5052109
## Clustering vector:
##        Alabama         Alaska        Arizona       Arkansas     California 
##              1              1              1              2              1 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              1              2              2              1              1 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              2              2              1              2              2 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              2              2              1              2              1 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              2              1              2              1              1 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              2              2              1              2              2 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              1              1              1              2              2 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              2              2              2              2              1 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              2              1              1              2              2 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              2              2              2              2              2 
## Objective function:
##    build     swap 
## 1.441358 1.368969 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"
dd <- cbind(Arrest, cluster = pam2$cluster)
head(dd, n = 10)
##                  Murder    Assault   UrbanPop         Rape cluster
## Alabama      1.24256408  0.7828393 -0.5209066 -0.003416473       1
## Alaska       0.50786248  1.1068225 -1.2117642  2.484202941       1
## Arizona      0.07163341  1.4788032  0.9989801  1.042878388       1
## Arkansas     0.23234938  0.2308680 -1.0735927 -0.184916602       2
## California   0.27826823  1.2628144  1.7589234  2.067820292       1
## Colorado     0.02571456  0.3988593  0.8608085  1.864967207       1
## Connecticut -1.03041900 -0.7290821  0.7917228 -1.081740768       2
## Delaware    -0.43347395  0.8068381  0.4462940 -0.579946294       2
## Florida      1.74767144  1.9707777  0.9989801  1.138966691       1
## Georgia      2.20685994  0.4828549 -0.3827351  0.487701523       1
pam2$medoids
##                Murder    Assault   UrbanPop       Rape
## New Mexico  0.8292944  1.3708088  0.3081225  1.1603196
## Nebraska   -0.8008247 -0.8250772 -0.2445636 -0.5052109
# Cluster numbers
head(pam2$clustering)
##    Alabama     Alaska    Arizona   Arkansas California   Colorado 
##          1          1          1          2          1          1
pam4 <- pam(Arrest, 4,metric = "euclidean", stand = FALSE)
print(pam4)
## Medoids:
##               ID     Murder    Assault   UrbanPop         Rape
## Alabama        1  1.2425641  0.7828393 -0.5209066 -0.003416473
## Michigan      22  0.9900104  1.0108275  0.5844655  1.480613993
## Oklahoma      36 -0.2727580 -0.2371077  0.1699510 -0.131534211
## New Hampshire 29 -1.3059321 -1.3650491 -0.6590781 -1.252564419
## Clustering vector:
##        Alabama         Alaska        Arizona       Arkansas     California 
##              1              2              2              1              2 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##              2              3              3              2              1 
##         Hawaii          Idaho       Illinois        Indiana           Iowa 
##              3              4              2              3              4 
##         Kansas       Kentucky      Louisiana          Maine       Maryland 
##              3              3              1              4              2 
##  Massachusetts       Michigan      Minnesota    Mississippi       Missouri 
##              3              2              4              1              3 
##        Montana       Nebraska         Nevada  New Hampshire     New Jersey 
##              3              3              2              4              3 
##     New Mexico       New York North Carolina   North Dakota           Ohio 
##              2              2              1              4              3 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
##              3              3              3              3              1 
##   South Dakota      Tennessee          Texas           Utah        Vermont 
##              4              1              2              3              4 
##       Virginia     Washington  West Virginia      Wisconsin        Wyoming 
##              3              3              4              4              3 
## Objective function:
##    build     swap 
## 1.035116 1.027102 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"
pam4$medoids
##                   Murder    Assault   UrbanPop         Rape
## Alabama        1.2425641  0.7828393 -0.5209066 -0.003416473
## Michigan       0.9900104  1.0108275  0.5844655  1.480613993
## Oklahoma      -0.2727580 -0.2371077  0.1699510 -0.131534211
## New Hampshire -1.3059321 -1.3650491 -0.6590781 -1.252564419
head(pam4$clustering)
##    Alabama     Alaska    Arizona   Arkansas California   Colorado 
##          1          2          2          1          2          2
## import data
USA_clara <- USArrests

## run CLARA
clarax <- clara(USA_clara[1:4], 3)

## print components of clarax
print(clarax)
## Call:     clara(x = USA_clara[1:4], k = 3) 
## Medoids:
##          Murder Assault UrbanPop Rape
## Michigan   12.1     255       74 35.1
## Missouri    9.0     178       70 28.2
## Nebraska    4.3     102       62 16.5
## Objective function:   29.31019
## Clustering vector:    Named int [1:50] 1 1 1 2 1 2 3 1 1 2 3 3 1 3 3 3 3 1 ...
##  - attr(*, "names")= chr [1:50] "Alabama" "Alaska" "Arizona" "Arkansas" "California" "Colorado" "Connecticut" ...
## Cluster sizes:            16 14 20 
## Best sample:
##  [1] Alabama        Alaska         Arizona        Arkansas       California    
##  [6] Colorado       Delaware       Florida        Georgia        Idaho         
## [11] Illinois       Indiana        Iowa           Kansas         Kentucky      
## [16] Louisiana      Maine          Maryland       Massachusetts  Michigan      
## [21] Minnesota      Mississippi    Missouri       Montana        Nebraska      
## [26] Nevada         New Hampshire  New York       North Carolina North Dakota  
## [31] Ohio           Oklahoma       Oregon         Pennsylvania   Rhode Island  
## [36] South Carolina South Dakota   Tennessee      Texas          Utah          
## [41] Vermont        Virginia       Washington     West Virginia  Wisconsin     
## [46] Wyoming       
## 
## Available components:
##  [1] "sample"     "medoids"    "i.med"      "clustering" "objective" 
##  [6] "clusinfo"   "diss"       "call"       "silinfo"    "data"
## plot clusters
plot(USA_clara, col = clarax$cluster)
## plot centers
points(clarax$centers, col = 1:2, pch = 8)

LS0tDQp0aXRsZTogJ0NsdXN0ZXIgay1tZWFuIGstbWVkb2lkIF8gVVMgQXJyZXN0Jw0KYXV0aG9yOiAiRGFvIFRoaSBUaGFuaCBCaW5oIC0gSEFOVSINCmRhdGU6ICIyNCBOb3YgMjAyMSINCm91dHB1dDogDQogIGh0bWxfZG9jdW1lbnQ6IA0KICAgIGNvZGVfZG93bmxvYWQ6IHRydWUNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCiAgICBoaWdobGlnaHQ6IHB5Z21lbnRzDQogICAgIyBudW1iZXJfc2VjdGlvbnM6IHllcw0KICAgIHRoZW1lOiAiZmxhdGx5Ig0KICAgIHRvYzogVFJVRQ0KICAgIHRvY19mbG9hdDogVFJVRQ0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0KYGBgDQoNClTDoGkgbGnhu4d1IHBo4bulYyB24bulIFNlbWluYXIgVEtVRDI2LCBjaGkgdGnhur90IHThuqFpIFt04bqhaSDEkcOieV0NCihodHRwczovL3NpdGVzLmdvb2dsZS5jb20vdmlldy90a3VkL2hvbWU/YXV0aHVzZXI9MSkNCg0KYGBge3J9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkgICMgZGF0YSBtYW5pcHVsYXRpb24NCmxpYnJhcnkoY2x1c3RlcikgICAgIyBjbHVzdGVyaW5nIGFsZ29yaXRobXMNCmxpYnJhcnkoZmFjdG9leHRyYSkgIyBjbHVzdGVyaW5nIGFsZ29yaXRobXMgJiB2aXN1YWxpemF0aW9uDQpgYGANCg0KYGBge3J9DQpBcnJlc3QgPC0gVVNBcnJlc3RzDQpBcnJlc3QgPC0gbmEub21pdChBcnJlc3QpDQpoZWFkKEFycmVzdCkNCmBgYA0KDQoNCmBgYHtyfQ0KDQpBcnJlc3QgPC0gc2NhbGUoQXJyZXN0KQ0KaGVhZChBcnJlc3QpDQpgYGANCg0KYGBge3J9DQpkaXN0YW5jZSA8LSBmYWN0b2V4dHJhOjpnZXRfZGlzdChBcnJlc3QpDQpoZWFkKGRpc3RhbmNlKQ0KZnZpel9kaXN0KGRpc3RhbmNlLCBncmFkaWVudCA9IGxpc3QobG93ID0gImdyZWVuIiwgbWlkID0gInllbGxvdyIsIGhpZ2ggPSAicmVkIikpDQpgYGANCg0KYGBge3J9DQprMiA8LSBrbWVhbnMoQXJyZXN0LCBjZW50ZXJzID0gMiwgbnN0YXJ0ID0gMjUpDQpzdHIoazIpDQpgYGANCg0KDQpgYGB7cn0NCmsyDQpgYGANCg0KYGBge3J9DQpmdml6X2NsdXN0ZXIoazIsIGRhdGEgPSBBcnJlc3QpDQoNCmBgYA0KDQpgYGB7cn0NCmszIDwtIGttZWFucyhBcnJlc3QsIGNlbnRlcnMgPSAzLCBuc3RhcnQgPSAyNSkNCmZ2aXpfY2x1c3RlcihrMywgZGF0YSA9IEFycmVzdCkNCmszDQpgYGANCg0KDQpgYGB7cn0NCkFycmVzdCAlPiUNCiAgYXNfdGliYmxlKCkgJT4lDQogIG11dGF0ZShjbHVzdGVyID0gazIkY2x1c3RlciwNCiAgICAgICAgIHN0YXRlID0gcm93Lm5hbWVzKFVTQXJyZXN0cykpICU+JQ0KICBnZ3Bsb3QoYWVzKFVyYmFuUG9wLCBNdXJkZXIsIGNvbG9yID0gZmFjdG9yKGNsdXN0ZXIpLCBsYWJlbCA9IHN0YXRlKSkgKw0KICBnZW9tX3RleHQoKQ0KYGBgDQoNCmBgYHtyfQ0KazMgPC0ga21lYW5zKEFycmVzdCwgY2VudGVycyA9IDMsIG5zdGFydCA9IDI1KQ0KazQgPC0ga21lYW5zKEFycmVzdCwgY2VudGVycyA9IDQsIG5zdGFydCA9IDI1KQ0KazUgPC0ga21lYW5zKEFycmVzdCwgY2VudGVycyA9IDUsIG5zdGFydCA9IDI1KQ0KDQojIHBsb3RzIHRvIGNvbXBhcmUNCnAxIDwtIGZ2aXpfY2x1c3RlcihrMiwgZ2VvbSA9ICJwb2ludCIsIGRhdGEgPSBBcnJlc3QpICsgZ2d0aXRsZSgiayA9IDIiKQ0KcDIgPC0gZnZpel9jbHVzdGVyKGszLCBnZW9tID0gInBvaW50IiwgIGRhdGEgPSBBcnJlc3QpICsgZ2d0aXRsZSgiayA9IDMiKQ0KcDMgPC0gZnZpel9jbHVzdGVyKGs0LCBnZW9tID0gInBvaW50IiwgIGRhdGEgPSBBcnJlc3QpICsgZ2d0aXRsZSgiayA9IDQiKQ0KcDQgPC0gZnZpel9jbHVzdGVyKGs1LCBnZW9tID0gInBvaW50IiwgIGRhdGEgPSBBcnJlc3QpICsgZ2d0aXRsZSgiayA9IDUiKQ0KDQpsaWJyYXJ5KGdyaWRFeHRyYSkNCmdyaWQuYXJyYW5nZShwMSwgcDIsIHAzLCBwNCwgbnJvdyA9IDIpDQpgYGANCmBgYHtyfQ0Kc2V0LnNlZWQoMTAwKQ0KDQojIGZ1bmN0aW9uIHRvIGNvbXB1dGUgdG90YWwgd2l0aGluLWNsdXN0ZXIgc3VtIG9mIHNxdWFyZSAoV3NzKQ0Kd3NzIDwtIGZ1bmN0aW9uKGspIHsNCiAga21lYW5zKEFycmVzdCwgaywgbnN0YXJ0ID0gMTAgKSR0b3Qud2l0aGluc3MNCn0NCg0KIyBDb21wdXRlIGFuZCBwbG90IHdzcyBmb3IgayA9IDEgdG8gayA9IDE1DQprLnZhbHVlcyA8LSAxOjE1DQoNCiMgZXh0cmFjdCB3c3MgZm9yIDItMTUgY2x1c3RlcnMNCndzc192YWx1ZXMgPC0gbWFwX2RibChrLnZhbHVlcywgd3NzKQ0KDQpwbG90KGsudmFsdWVzLCB3c3NfdmFsdWVzLA0KICAgICAgIHR5cGU9ImIiLCBwY2ggPSAxOSwgZnJhbWUgPSBGQUxTRSwgDQogICAgICAgeGxhYj0iTnVtYmVyIG9mIGNsdXN0ZXJzIEsiLA0KICAgICAgIHlsYWI9IlRvdGFsIHdpdGhpbi1jbHVzdGVycyBzdW0gb2Ygc3F1YXJlcyIpDQpgYGANCg0KYGBge3J9DQpyZXF1aXJlKGRwbHlyKQ0KI2luc3RhbGwucGFja2FnZXMoIm5GYWN0b3JzIikNCmxpYnJhcnkobkZhY3RvcnMpDQpzZXQuc2VlZCgxMCkNCmV2IDwtIGVpZ2VuKGNvcihBcnJlc3QpKSAjIGdldCBlaWdlbnZhbHVlcw0KZXYNCg0KYXAgPC0gcGFyYWxsZWwoc3ViamVjdD1ucm93KEFycmVzdCksdmFyPW5jb2woQXJyZXN0KSwNCiAgICAgICAgICAgICAgIHJlcD0xMDAsY2VudD0uMDUpDQpuUyA8LSBuU2NyZWUoeD1ldiR2YWx1ZXMsIGFwYXJhbGxlbD1hcCRlaWdlbiRxZXZwZWEpDQpwbG90blNjcmVlKG5TKQ0KDQoNCmBgYA0KDQpgYGB7cn0NCnNldC5zZWVkKDEwMCkNCg0KZnZpel9uYmNsdXN0KEFycmVzdCwga21lYW5zLCBtZXRob2QgPSAid3NzIikNCnN0cihBcnJlc3QpDQpgYGANCg0KDQoNCmBgYHtyfQ0KIyBmdW5jdGlvbiB0byBjb21wdXRlIGF2ZXJhZ2Ugc2lsaG91ZXR0ZSBmb3IgayBjbHVzdGVycw0KYXZnX3NpbCA8LSBmdW5jdGlvbihrKSB7DQogIGttLnJlcyA8LSBrbWVhbnMoQXJyZXN0LCBjZW50ZXJzID0gaywgbnN0YXJ0ID0gMjUpDQogIHNzIDwtIHNpbGhvdWV0dGUoa20ucmVzJGNsdXN0ZXIsIGRpc3QoQXJyZXN0KSkNCiAgbWVhbihzc1ssIDNdKQ0KfQ0KDQojIENvbXB1dGUgYW5kIHBsb3Qgd3NzIGZvciBrID0gMiB0byBrID0gMTUNCmsudmFsdWVzIDwtIDI6MTUNCg0KIyBleHRyYWN0IGF2ZyBzaWxob3VldHRlIGZvciAyLTE1IGNsdXN0ZXJzDQphdmdfc2lsX3ZhbHVlcyA8LSBtYXBfZGJsKGsudmFsdWVzLCBhdmdfc2lsKQ0KDQpwbG90KGsudmFsdWVzLCBhdmdfc2lsX3ZhbHVlcywNCiAgICAgICB0eXBlID0gImIiLCBwY2ggPSAxOSwgZnJhbWUgPSBGQUxTRSwgDQogICAgICAgeGxhYiA9ICJOdW1iZXIgb2YgY2x1c3RlcnMgSyIsDQogICAgICAgeWxhYiA9ICJBdmVyYWdlIFNpbGhvdWV0dGVzIikNCg0KYGBgDQpgYGB7cn0NCmZ2aXpfbmJjbHVzdChBcnJlc3QsIGttZWFucywgbWV0aG9kID0gInNpbGhvdWV0dGUiKQ0KYGBgDQoNCg0KYGBge3J9DQojZnZpel9uYmNsdXN0KGlyaXNCLCBrbWVhbnMsIG1ldGhvZCA9ICJzaWxob3VldHRlIikNCmBgYA0KDQpgYGB7cn0NCiMgY29tcHV0ZSBnYXAgc3RhdGlzdGljDQpzZXQuc2VlZCgxMDApDQpnYXBfc3RhdCA8LSBjbHVzR2FwKEFycmVzdCwgRlVOID0ga21lYW5zLCBuc3RhcnQgPSAyNSwNCiAgICAgICAgICAgICAgICAgICAgSy5tYXggPSAxMCwgQiA9IDUwKQ0KIyBQcmludCB0aGUgcmVzdWx0DQpwcmludChnYXBfc3RhdCwgbWV0aG9kID0gImZpcnN0bWF4IikNCmBgYA0KDQoNCmBgYHtyfQ0KZnZpel9nYXBfc3RhdChnYXBfc3RhdCkNCmBgYA0KDQoNCmBgYHtyfQ0KbGlicmFyeShmcGMpDQpsaWJyYXJ5KGNsdXN0ZXIpDQppcmlzQiA8LSBpcmlzDQpuYW1lcyhpcmlzQikNCg0Kc3VtbWFyeShpcmlzQiRTcGVjaWVzKQ0Kc3RyKGlyaXNCKQ0KaXJpc0IkU3BlY2llcyA8LSBOVUxMDQpgYGANCg0KDQpgYGB7cn0NCmttZWFucy5yZXN1bHQgPC0ga21lYW5zKGlyaXNCLCAzKQ0Ka21lYW5zLnJlc3VsdA0KDQpnYXBfc3RhdDI8LWNsdXNHYXAoaXJpc0IsRlVOID0ga21lYW5zLEsubWF4ID0gMTAsIEIgPSA1MCwgbnN0YXJ0ID0gMjUpDQpgYGANCg0KYGBge3J9DQpmdml6X2dhcF9zdGF0KGdhcF9zdGF0MikNCmBgYA0KDQpgYGB7cn0NCnNldC5zZWVkKDEwMCkNCmZpbmFsIDwtIGttZWFucyhBcnJlc3QsIDQsIG5zdGFydCA9IDI1KQ0KcHJpbnQoZmluYWwpDQpgYGANCg0KDQpgYGB7cn0NCmZ2aXpfY2x1c3RlcihmaW5hbCwgZGF0YSA9IEFycmVzdCkNCmBgYA0KDQpgYGB7cn0NClVTQXJyZXN0cyAlPiUNCiAgbXV0YXRlKENsdXN0ZXIgPSBmaW5hbCRjbHVzdGVyKSAlPiUNCiAgZ3JvdXBfYnkoQ2x1c3RlcikgJT4lDQogIHN1bW1hcmlzZV9hbGwoIm1lYW4iKQ0KDQpgYGANCg0KIyNLLU1lZG9pZHMgd2l0aCBVUyBBcnJlc3QNCg0KYGBge3J9DQoNCiNpbnN0YWxsLnBhY2thZ2VzKGMoImNsdXN0ZXIiLCAiZmFjdG9leHRyYSIpKQ0KbGlicmFyeShjbHVzdGVyKQ0KbGlicmFyeShmcGMpDQpsaWJyYXJ5KGZhY3RvZXh0cmEpDQpgYGANCg0KDQpgYGB7cn0NCmZ2aXpfbmJjbHVzdChBcnJlc3QsIGttZWFucywgbWV0aG9kID0gInNpbGhvdWV0dGUiKQ0KcGFtMiA8LSBwYW0oQXJyZXN0LCAyLG1ldHJpYyA9ICJldWNsaWRlYW4iLCBzdGFuZCA9IEZBTFNFKQ0KcHJpbnQocGFtMikNCmBgYA0KDQoNCmBgYHtyfQ0KYGBgDQoNCmBgYHtyfQ0KZGQgPC0gY2JpbmQoQXJyZXN0LCBjbHVzdGVyID0gcGFtMiRjbHVzdGVyKQ0KaGVhZChkZCwgbiA9IDEwKQ0KYGBgDQoNCmBgYHtyfQ0KDQpwYW0yJG1lZG9pZHMNCg0KYGBgDQpgYGB7cn0NCiMgQ2x1c3RlciBudW1iZXJzDQpoZWFkKHBhbTIkY2x1c3RlcmluZykNCmBgYA0KYGBge3J9DQoNCnBhbTQgPC0gcGFtKEFycmVzdCwgNCxtZXRyaWMgPSAiZXVjbGlkZWFuIiwgc3RhbmQgPSBGQUxTRSkNCnByaW50KHBhbTQpDQpwYW00JG1lZG9pZHMNCmhlYWQocGFtNCRjbHVzdGVyaW5nKQ0KDQpgYGANCg0KYGBge3J9DQojIyBpbXBvcnQgZGF0YQ0KVVNBX2NsYXJhIDwtIFVTQXJyZXN0cw0KDQojIyBydW4gQ0xBUkENCmNsYXJheCA8LSBjbGFyYShVU0FfY2xhcmFbMTo0XSwgMykNCg0KIyMgcHJpbnQgY29tcG9uZW50cyBvZiBjbGFyYXgNCnByaW50KGNsYXJheCkNCg0KIyMgcGxvdCBjbHVzdGVycw0KcGxvdChVU0FfY2xhcmEsIGNvbCA9IGNsYXJheCRjbHVzdGVyKQ0KIyMgcGxvdCBjZW50ZXJzDQpwb2ludHMoY2xhcmF4JGNlbnRlcnMsIGNvbCA9IDE6MiwgcGNoID0gOCkNCmBgYA0KDQo=