library(cluster) #conduct cluster analysis
library(compareGroups) #build descriptive statistic tables
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
## Loading required package: gdata
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
## The following object is masked from 'package:Hmisc':
##
## combine
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
## Loading required package: xtable
##
## Attaching package: 'xtable'
## The following objects are masked from 'package:Hmisc':
##
## label, label<-
## Loading required package: SNPassoc
## Loading required package: haplo.stats
## Loading required package: mvtnorm
## Loading required package: parallel
library(mlbench) #contains the dataset
library(NbClust) #cluster validity measures
library(sparcl) #colored dendrogram
# Sonar, Mines vs. Rocks
data(Sonar)
dim(Sonar)
## [1] 208 61
head(Sonar)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1 0.0200 0.0371 0.0428 0.0207 0.0954 0.0986 0.1539 0.1601 0.3109 0.2111
## 2 0.0453 0.0523 0.0843 0.0689 0.1183 0.2583 0.2156 0.3481 0.3337 0.2872
## 3 0.0262 0.0582 0.1099 0.1083 0.0974 0.2280 0.2431 0.3771 0.5598 0.6194
## 4 0.0100 0.0171 0.0623 0.0205 0.0205 0.0368 0.1098 0.1276 0.0598 0.1264
## 5 0.0762 0.0666 0.0481 0.0394 0.0590 0.0649 0.1209 0.2467 0.3564 0.4459
## 6 0.0286 0.0453 0.0277 0.0174 0.0384 0.0990 0.1201 0.1833 0.2105 0.3039
## V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 0.1609 0.1582 0.2238 0.0645 0.0660 0.2273 0.3100 0.2999 0.5078 0.4797
## 2 0.4918 0.6552 0.6919 0.7797 0.7464 0.9444 1.0000 0.8874 0.8024 0.7818
## 3 0.6333 0.7060 0.5544 0.5320 0.6479 0.6931 0.6759 0.7551 0.8929 0.8619
## 4 0.0881 0.1992 0.0184 0.2261 0.1729 0.2131 0.0693 0.2281 0.4060 0.3973
## 5 0.4152 0.3952 0.4256 0.4135 0.4528 0.5326 0.7306 0.6193 0.2032 0.4636
## 6 0.2988 0.4250 0.6343 0.8198 1.0000 0.9988 0.9508 0.9025 0.7234 0.5122
## V21 V22 V23 V24 V25 V26 V27 V28 V29 V30
## 1 0.5783 0.5071 0.4328 0.5550 0.6711 0.6415 0.7104 0.8080 0.6791 0.3857
## 2 0.5212 0.4052 0.3957 0.3914 0.3250 0.3200 0.3271 0.2767 0.4423 0.2028
## 3 0.7974 0.6737 0.4293 0.3648 0.5331 0.2413 0.5070 0.8533 0.6036 0.8514
## 4 0.2741 0.3690 0.5556 0.4846 0.3140 0.5334 0.5256 0.2520 0.2090 0.3559
## 5 0.4148 0.4292 0.5730 0.5399 0.3161 0.2285 0.6995 1.0000 0.7262 0.4724
## 6 0.2074 0.3985 0.5890 0.2872 0.2043 0.5782 0.5389 0.3750 0.3411 0.5067
## V31 V32 V33 V34 V35 V36 V37 V38 V39 V40
## 1 0.1307 0.2604 0.5121 0.7547 0.8537 0.8507 0.6692 0.6097 0.4943 0.2744
## 2 0.3788 0.2947 0.1984 0.2341 0.1306 0.4182 0.3835 0.1057 0.1840 0.1970
## 3 0.8512 0.5045 0.1862 0.2709 0.4232 0.3043 0.6116 0.6756 0.5375 0.4719
## 4 0.6260 0.7340 0.6120 0.3497 0.3953 0.3012 0.5408 0.8814 0.9857 0.9167
## 5 0.5103 0.5459 0.2881 0.0981 0.1951 0.4181 0.4604 0.3217 0.2828 0.2430
## 6 0.5580 0.4778 0.3299 0.2198 0.1407 0.2856 0.3807 0.4158 0.4054 0.3296
## V41 V42 V43 V44 V45 V46 V47 V48 V49 V50
## 1 0.0510 0.2834 0.2825 0.4256 0.2641 0.1386 0.1051 0.1343 0.0383 0.0324
## 2 0.1674 0.0583 0.1401 0.1628 0.0621 0.0203 0.0530 0.0742 0.0409 0.0061
## 3 0.4647 0.2587 0.2129 0.2222 0.2111 0.0176 0.1348 0.0744 0.0130 0.0106
## 4 0.6121 0.5006 0.3210 0.3202 0.4295 0.3654 0.2655 0.1576 0.0681 0.0294
## 5 0.1979 0.2444 0.1847 0.0841 0.0692 0.0528 0.0357 0.0085 0.0230 0.0046
## 6 0.2707 0.2650 0.0723 0.1238 0.1192 0.1089 0.0623 0.0494 0.0264 0.0081
## V51 V52 V53 V54 V55 V56 V57 V58 V59 V60
## 1 0.0232 0.0027 0.0065 0.0159 0.0072 0.0167 0.0180 0.0084 0.0090 0.0032
## 2 0.0125 0.0084 0.0089 0.0048 0.0094 0.0191 0.0140 0.0049 0.0052 0.0044
## 3 0.0033 0.0232 0.0166 0.0095 0.0180 0.0244 0.0316 0.0164 0.0095 0.0078
## 4 0.0241 0.0121 0.0036 0.0150 0.0085 0.0073 0.0050 0.0044 0.0040 0.0117
## 5 0.0156 0.0031 0.0054 0.0105 0.0110 0.0015 0.0072 0.0048 0.0107 0.0094
## 6 0.0104 0.0045 0.0014 0.0038 0.0013 0.0089 0.0057 0.0027 0.0051 0.0062
## Class
## 1 R
## 2 R
## 3 R
## 4 R
## 5 R
## 6 R
Sonar$Class <- NULL
str(Sonar)
## 'data.frame': 208 obs. of 60 variables:
## $ V1 : num 0.02 0.0453 0.0262 0.01 0.0762 0.0286 0.0317 0.0519 0.0223 0.0164 ...
## $ V2 : num 0.0371 0.0523 0.0582 0.0171 0.0666 0.0453 0.0956 0.0548 0.0375 0.0173 ...
## $ V3 : num 0.0428 0.0843 0.1099 0.0623 0.0481 ...
## $ V4 : num 0.0207 0.0689 0.1083 0.0205 0.0394 ...
## $ V5 : num 0.0954 0.1183 0.0974 0.0205 0.059 ...
## $ V6 : num 0.0986 0.2583 0.228 0.0368 0.0649 ...
## $ V7 : num 0.154 0.216 0.243 0.11 0.121 ...
## $ V8 : num 0.16 0.348 0.377 0.128 0.247 ...
## $ V9 : num 0.3109 0.3337 0.5598 0.0598 0.3564 ...
## $ V10: num 0.211 0.287 0.619 0.126 0.446 ...
## $ V11: num 0.1609 0.4918 0.6333 0.0881 0.4152 ...
## $ V12: num 0.158 0.655 0.706 0.199 0.395 ...
## $ V13: num 0.2238 0.6919 0.5544 0.0184 0.4256 ...
## $ V14: num 0.0645 0.7797 0.532 0.2261 0.4135 ...
## $ V15: num 0.066 0.746 0.648 0.173 0.453 ...
## $ V16: num 0.227 0.944 0.693 0.213 0.533 ...
## $ V17: num 0.31 1 0.6759 0.0693 0.7306 ...
## $ V18: num 0.3 0.887 0.755 0.228 0.619 ...
## $ V19: num 0.508 0.802 0.893 0.406 0.203 ...
## $ V20: num 0.48 0.782 0.862 0.397 0.464 ...
## $ V21: num 0.578 0.521 0.797 0.274 0.415 ...
## $ V22: num 0.507 0.405 0.674 0.369 0.429 ...
## $ V23: num 0.433 0.396 0.429 0.556 0.573 ...
## $ V24: num 0.555 0.391 0.365 0.485 0.54 ...
## $ V25: num 0.671 0.325 0.533 0.314 0.316 ...
## $ V26: num 0.641 0.32 0.241 0.533 0.229 ...
## $ V27: num 0.71 0.327 0.507 0.526 0.7 ...
## $ V28: num 0.808 0.277 0.853 0.252 1 ...
## $ V29: num 0.679 0.442 0.604 0.209 0.726 ...
## $ V30: num 0.386 0.203 0.851 0.356 0.472 ...
## $ V31: num 0.131 0.379 0.851 0.626 0.51 ...
## $ V32: num 0.26 0.295 0.504 0.734 0.546 ...
## $ V33: num 0.512 0.198 0.186 0.612 0.288 ...
## $ V34: num 0.7547 0.2341 0.2709 0.3497 0.0981 ...
## $ V35: num 0.854 0.131 0.423 0.395 0.195 ...
## $ V36: num 0.851 0.418 0.304 0.301 0.418 ...
## $ V37: num 0.669 0.384 0.612 0.541 0.46 ...
## $ V38: num 0.61 0.106 0.676 0.881 0.322 ...
## $ V39: num 0.494 0.184 0.537 0.986 0.283 ...
## $ V40: num 0.274 0.197 0.472 0.917 0.243 ...
## $ V41: num 0.051 0.167 0.465 0.612 0.198 ...
## $ V42: num 0.2834 0.0583 0.2587 0.5006 0.2444 ...
## $ V43: num 0.282 0.14 0.213 0.321 0.185 ...
## $ V44: num 0.4256 0.1628 0.2222 0.3202 0.0841 ...
## $ V45: num 0.2641 0.0621 0.2111 0.4295 0.0692 ...
## $ V46: num 0.1386 0.0203 0.0176 0.3654 0.0528 ...
## $ V47: num 0.1051 0.053 0.1348 0.2655 0.0357 ...
## $ V48: num 0.1343 0.0742 0.0744 0.1576 0.0085 ...
## $ V49: num 0.0383 0.0409 0.013 0.0681 0.023 0.0264 0.0507 0.0285 0.0777 0.0092 ...
## $ V50: num 0.0324 0.0061 0.0106 0.0294 0.0046 0.0081 0.0159 0.0178 0.0439 0.0198 ...
## $ V51: num 0.0232 0.0125 0.0033 0.0241 0.0156 0.0104 0.0195 0.0052 0.0061 0.0118 ...
## $ V52: num 0.0027 0.0084 0.0232 0.0121 0.0031 0.0045 0.0201 0.0081 0.0145 0.009 ...
## $ V53: num 0.0065 0.0089 0.0166 0.0036 0.0054 0.0014 0.0248 0.012 0.0128 0.0223 ...
## $ V54: num 0.0159 0.0048 0.0095 0.015 0.0105 0.0038 0.0131 0.0045 0.0145 0.0179 ...
## $ V55: num 0.0072 0.0094 0.018 0.0085 0.011 0.0013 0.007 0.0121 0.0058 0.0084 ...
## $ V56: num 0.0167 0.0191 0.0244 0.0073 0.0015 0.0089 0.0138 0.0097 0.0049 0.0068 ...
## $ V57: num 0.018 0.014 0.0316 0.005 0.0072 0.0057 0.0092 0.0085 0.0065 0.0032 ...
## $ V58: num 0.0084 0.0049 0.0164 0.0044 0.0048 0.0027 0.0143 0.0047 0.0093 0.0035 ...
## $ V59: num 0.009 0.0052 0.0095 0.004 0.0107 0.0051 0.0036 0.0048 0.0059 0.0056 ...
## $ V60: num 0.0032 0.0044 0.0078 0.0117 0.0094 0.0062 0.0103 0.0053 0.0022 0.004 ...
summary(Sonar)
## V1 V2 V3 V4
## Min. :0.00150 Min. :0.00060 Min. :0.00150 Min. :0.00580
## 1st Qu.:0.01335 1st Qu.:0.01645 1st Qu.:0.01895 1st Qu.:0.02438
## Median :0.02280 Median :0.03080 Median :0.03430 Median :0.04405
## Mean :0.02916 Mean :0.03844 Mean :0.04383 Mean :0.05389
## 3rd Qu.:0.03555 3rd Qu.:0.04795 3rd Qu.:0.05795 3rd Qu.:0.06450
## Max. :0.13710 Max. :0.23390 Max. :0.30590 Max. :0.42640
## V5 V6 V7 V8
## Min. :0.00670 Min. :0.01020 Min. :0.0033 Min. :0.00550
## 1st Qu.:0.03805 1st Qu.:0.06703 1st Qu.:0.0809 1st Qu.:0.08042
## Median :0.06250 Median :0.09215 Median :0.1070 Median :0.11210
## Mean :0.07520 Mean :0.10457 Mean :0.1217 Mean :0.13480
## 3rd Qu.:0.10028 3rd Qu.:0.13412 3rd Qu.:0.1540 3rd Qu.:0.16960
## Max. :0.40100 Max. :0.38230 Max. :0.3729 Max. :0.45900
## V9 V10 V11 V12
## Min. :0.00750 Min. :0.0113 Min. :0.0289 Min. :0.0236
## 1st Qu.:0.09703 1st Qu.:0.1113 1st Qu.:0.1293 1st Qu.:0.1335
## Median :0.15225 Median :0.1824 Median :0.2248 Median :0.2490
## Mean :0.17800 Mean :0.2083 Mean :0.2360 Mean :0.2502
## 3rd Qu.:0.23342 3rd Qu.:0.2687 3rd Qu.:0.3016 3rd Qu.:0.3312
## Max. :0.68280 Max. :0.7106 Max. :0.7342 Max. :0.7060
## V13 V14 V15 V16
## Min. :0.0184 Min. :0.0273 Min. :0.0031 Min. :0.0162
## 1st Qu.:0.1661 1st Qu.:0.1752 1st Qu.:0.1646 1st Qu.:0.1963
## Median :0.2640 Median :0.2811 Median :0.2817 Median :0.3047
## Mean :0.2733 Mean :0.2966 Mean :0.3202 Mean :0.3785
## 3rd Qu.:0.3513 3rd Qu.:0.3862 3rd Qu.:0.4529 3rd Qu.:0.5357
## Max. :0.7131 Max. :0.9970 Max. :1.0000 Max. :0.9988
## V17 V18 V19 V20
## Min. :0.0349 Min. :0.0375 Min. :0.0494 Min. :0.0656
## 1st Qu.:0.2059 1st Qu.:0.2421 1st Qu.:0.2991 1st Qu.:0.3506
## Median :0.3084 Median :0.3683 Median :0.4350 Median :0.5425
## Mean :0.4160 Mean :0.4523 Mean :0.5048 Mean :0.5630
## 3rd Qu.:0.6594 3rd Qu.:0.6791 3rd Qu.:0.7314 3rd Qu.:0.8093
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V21 V22 V23 V24
## Min. :0.0512 Min. :0.0219 Min. :0.0563 Min. :0.0239
## 1st Qu.:0.3997 1st Qu.:0.4069 1st Qu.:0.4502 1st Qu.:0.5407
## Median :0.6177 Median :0.6649 Median :0.6997 Median :0.6985
## Mean :0.6091 Mean :0.6243 Mean :0.6470 Mean :0.6727
## 3rd Qu.:0.8170 3rd Qu.:0.8320 3rd Qu.:0.8486 3rd Qu.:0.8722
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V25 V26 V27 V28
## Min. :0.0240 Min. :0.0921 Min. :0.0481 Min. :0.0284
## 1st Qu.:0.5258 1st Qu.:0.5442 1st Qu.:0.5319 1st Qu.:0.5348
## Median :0.7211 Median :0.7545 Median :0.7456 Median :0.7319
## Mean :0.6754 Mean :0.6999 Mean :0.7022 Mean :0.6940
## 3rd Qu.:0.8737 3rd Qu.:0.8938 3rd Qu.:0.9171 3rd Qu.:0.9003
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V29 V30 V31 V32
## Min. :0.0144 Min. :0.0613 Min. :0.0482 Min. :0.0404
## 1st Qu.:0.4637 1st Qu.:0.4114 1st Qu.:0.3456 1st Qu.:0.2814
## Median :0.6808 Median :0.6071 Median :0.4904 Median :0.4296
## Mean :0.6421 Mean :0.5809 Mean :0.5045 Mean :0.4390
## 3rd Qu.:0.8521 3rd Qu.:0.7352 3rd Qu.:0.6420 3rd Qu.:0.5803
## Max. :1.0000 Max. :1.0000 Max. :0.9657 Max. :0.9306
## V33 V34 V35 V36
## Min. :0.0477 Min. :0.0212 Min. :0.0223 Min. :0.0080
## 1st Qu.:0.2579 1st Qu.:0.2176 1st Qu.:0.1794 1st Qu.:0.1543
## Median :0.3912 Median :0.3510 Median :0.3127 Median :0.3211
## Mean :0.4172 Mean :0.4032 Mean :0.3926 Mean :0.3848
## 3rd Qu.:0.5561 3rd Qu.:0.5961 3rd Qu.:0.5934 3rd Qu.:0.5565
## Max. :1.0000 Max. :0.9647 Max. :1.0000 Max. :1.0000
## V37 V38 V39 V40
## Min. :0.0351 Min. :0.0383 Min. :0.0371 Min. :0.0117
## 1st Qu.:0.1601 1st Qu.:0.1743 1st Qu.:0.1740 1st Qu.:0.1865
## Median :0.3063 Median :0.3127 Median :0.2835 Median :0.2781
## Mean :0.3638 Mean :0.3397 Mean :0.3258 Mean :0.3112
## 3rd Qu.:0.5189 3rd Qu.:0.4405 3rd Qu.:0.4349 3rd Qu.:0.4244
## Max. :0.9497 Max. :1.0000 Max. :0.9857 Max. :0.9297
## V41 V42 V43 V44
## Min. :0.0360 Min. :0.0056 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.1631 1st Qu.:0.1589 1st Qu.:0.1552 1st Qu.:0.1269
## Median :0.2595 Median :0.2451 Median :0.2225 Median :0.1777
## Mean :0.2893 Mean :0.2783 Mean :0.2465 Mean :0.2141
## 3rd Qu.:0.3875 3rd Qu.:0.3842 3rd Qu.:0.3245 3rd Qu.:0.2717
## Max. :0.8995 Max. :0.8246 Max. :0.7733 Max. :0.7762
## V45 V46 V47 V48
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.09448 1st Qu.:0.06855 1st Qu.:0.06425 1st Qu.:0.04512
## Median :0.14800 Median :0.12135 Median :0.10165 Median :0.07810
## Mean :0.19723 Mean :0.16063 Mean :0.12245 Mean :0.09142
## 3rd Qu.:0.23155 3rd Qu.:0.20037 3rd Qu.:0.15443 3rd Qu.:0.12010
## Max. :0.70340 Max. :0.72920 Max. :0.55220 Max. :0.33390
## V49 V50 V51 V52
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.000800
## 1st Qu.:0.02635 1st Qu.:0.01155 1st Qu.:0.008425 1st Qu.:0.007275
## Median :0.04470 Median :0.01790 Median :0.013900 Median :0.011400
## Mean :0.05193 Mean :0.02042 Mean :0.016069 Mean :0.013420
## 3rd Qu.:0.06853 3rd Qu.:0.02527 3rd Qu.:0.020825 3rd Qu.:0.016725
## Max. :0.19810 Max. :0.08250 Max. :0.100400 Max. :0.070900
## V53 V54 V55
## Min. :0.000500 Min. :0.001000 Min. :0.00060
## 1st Qu.:0.005075 1st Qu.:0.005375 1st Qu.:0.00415
## Median :0.009550 Median :0.009300 Median :0.00750
## Mean :0.010709 Mean :0.010941 Mean :0.00929
## 3rd Qu.:0.014900 3rd Qu.:0.014500 3rd Qu.:0.01210
## Max. :0.039000 Max. :0.035200 Max. :0.04470
## V56 V57 V58
## Min. :0.000400 Min. :0.00030 Min. :0.000300
## 1st Qu.:0.004400 1st Qu.:0.00370 1st Qu.:0.003600
## Median :0.006850 Median :0.00595 Median :0.005800
## Mean :0.008222 Mean :0.00782 Mean :0.007949
## 3rd Qu.:0.010575 3rd Qu.:0.01043 3rd Qu.:0.010350
## Max. :0.039400 Max. :0.03550 Max. :0.044000
## V59 V60
## Min. :0.000100 Min. :0.000600
## 1st Qu.:0.003675 1st Qu.:0.003100
## Median :0.006400 Median :0.005300
## Mean :0.007941 Mean :0.006507
## 3rd Qu.:0.010325 3rd Qu.:0.008525
## Max. :0.036400 Max. :0.043900
df = as.data.frame(scale(Sonar))
str(df)
## 'data.frame': 208 obs. of 60 variables:
## $ V1 : num -0.399 0.702 -0.129 -0.834 2.046 ...
## $ V2 : num -0.0406 0.4206 0.5996 -0.6473 0.8545 ...
## $ V3 : num -0.0269 1.0531 1.7193 0.4806 0.1111 ...
## $ V4 : num -0.713 0.323 1.169 -0.718 -0.311 ...
## $ V5 : num 0.364 0.776 0.4 -0.985 -0.292 ...
## $ V6 : num -0.101 2.601 2.088 -1.147 -0.671 ...
## $ V7 : num 0.5204 1.519 1.964 -0.1933 -0.0137 ...
## $ V8 : num 0.2971 2.5049 2.8455 -0.0845 1.3141 ...
## $ V9 : num 1.123 1.315 3.225 -0.998 1.507 ...
## $ V10: num 0.0211 0.5873 3.0587 -0.609 1.768 ...
## $ V11: num -0.566 1.927 2.994 -1.115 1.35 ...
## $ V12: num -0.657 2.891 3.254 -0.364 1.035 ...
## $ V13: num -0.351 2.97 1.994 -1.808 1.08 ...
## $ V14: num -1.411 2.937 1.431 -0.428 0.711 ...
## $ V15: num -1.237 2.075 1.595 -0.717 0.645 ...
## $ V16: num -0.65 2.432 1.352 -0.711 0.662 ...
## $ V17: num -0.402 2.215 0.986 -1.315 1.193 ...
## $ V18: num -0.583 1.664 1.158 -0.857 0.638 ...
## $ V19: num 0.0116 1.1535 1.5043 -0.383 -1.1691 ...
## $ V20: num -0.317 0.833 1.138 -0.631 -0.379 ...
## $ V21: num -0.119 -0.341 0.731 -1.299 -0.753 ...
## $ V22: num -0.458 -0.856 0.193 -0.998 -0.762 ...
## $ V23: num -0.856 -1.004 -0.87 -0.365 -0.296 ...
## $ V24: num -0.492 -1.176 -1.287 -0.786 -0.555 ...
## $ V25: num -0.0177 -1.4307 -0.5811 -1.4756 -1.4671 ...
## $ V26: num -0.246 -1.601 -1.933 -0.702 -1.987 ...
## $ V27: num 0.0336 -1.5267 -0.7944 -0.7187 -0.0108 ...
## $ V28: num 0.481 -1.759 0.672 -1.864 1.29 ...
## $ V29: num 0.154 -0.832 -0.16 -1.803 0.35 ...
## $ V30: num -0.884 -1.713 1.225 -1.019 -0.492 ...
## $ V31: num -1.7467 -0.5873 1.6203 0.5679 0.0272 ...
## $ V32: num -0.838 -0.677 0.307 1.383 0.501 ...
## $ V33: num 0.459 -1.06 -1.119 0.943 -0.625 ...
## $ V34: num 1.52 -0.731 -0.572 -0.232 -1.32 ...
## $ V35: num 1.7795 -1.011 0.1182 0.0105 -0.762 ...
## $ V36: num 1.764 0.126 -0.305 -0.317 0.126 ...
## $ V37: num 1.2729 0.0821 1.0328 0.7377 0.4026 ...
## $ V38: num 1.268 -1.0985 1.5774 2.5437 -0.0843 ...
## $ V39: num 0.846 -0.712 1.063 3.315 -0.216 ...
## $ V40: num -0.206 -0.639 0.899 3.389 -0.382 ...
## $ V41: num -1.392 -0.712 1.025 1.887 -0.534 ...
## $ V42: num 0.0303 -1.3038 -0.1161 1.3175 -0.2009 ...
## $ V43: num 0.259 -0.766 -0.242 0.536 -0.445 ...
## $ V44: num 1.587 -0.385 0.061 0.796 -0.975 ...
## $ V45: num 0.441 -0.8912 0.0915 1.5318 -0.8444 ...
## $ V46: num -0.164 -1.048 -1.068 1.529 -0.805 ...
## $ V47: num -0.2 -0.799 0.142 1.645 -0.998 ...
## $ V48: num 0.687 -0.276 -0.273 1.06 -1.329 ...
## $ V49: num -0.379 -0.307 -1.083 0.45 -0.805 ...
## $ V50: num 0.876 -1.048 -0.719 0.657 -1.158 ...
## $ V51: num 0.594 -0.297 -1.063 0.669 -0.039 ...
## $ V52: num -1.113 -0.521 1.015 -0.137 -1.071 ...
## $ V53: num -0.596 -0.256 0.834 -1.007 -0.752 ...
## $ V54: num 0.6793 -0.8411 -0.1974 0.556 -0.0604 ...
## $ V55: num -0.2949 0.0155 1.2288 -0.1115 0.2412 ...
## $ V56: num 1.478 1.896 2.82 -0.161 -1.172 ...
## $ V57: num 1.76 1.068 4.11 -0.487 -0.107 ...
## $ V58: num 0.0697 -0.4713 1.3062 -0.5486 -0.4867 ...
## $ V59: num 0.171 -0.443 0.252 -0.638 0.446 ...
## $ V60: num -0.657 -0.419 0.257 1.032 0.575 ...
numComplete = NbClust(df, distance="euclidean", min.nc=5, max.nc=10,
method="complete", index="all")

## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##

## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 4 proposed 5 as the best number of clusters
## * 2 proposed 6 as the best number of clusters
## * 11 proposed 8 as the best number of clusters
## * 6 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 8
##
##
## *******************************************************************
numComplete$Best.nc
## KL CH Hartigan CCC Scott Marriot
## Number_clusters 8.000 5.0000 8.0000 8.000 8.0000 6.000000e+00
## Value_Index 4.344 20.2751 7.7286 -5.804 373.6376 3.433609e+100
## TrCovW TraceW Friedman Rubin Cindex DB
## Number_clusters 8.000 8.0000 8.0000 8.0000 5.000 10.0000
## Value_Index 7130.026 304.5389 38.5919 -0.0561 0.454 1.4647
## Silhouette Duda PseudoT2 Beale Ratkowsky Ball
## Number_clusters 10.000 8.0000 8.0000 8.0000 5.0000 6.0000
## Value_Index 0.141 1.1488 -0.1295 -2.7705 0.2317 354.4423
## PtBiserial Frey McClain Dunn Hubert SDindex Dindex
## Number_clusters 10.0000 4 5.0000 10.0000 0 10.00 0
## Value_Index 0.5436 NA 1.2119 0.2721 0 0.49 0
## SDbw
## Number_clusters 10.0000
## Value_Index 0.6152
dis = dist(df, method="euclidean")
hc = hclust(dis, method="complete")
plot(hc, hang=-1,labels=FALSE, main="Complete-Linkage")
comp8 = cutree(hc, 8)
ColorDendrogram(hc, y = comp8, main = "Complete", branchlength = 50)

table(comp8)
## comp8
## 1 2 3 4 5 6 7 8
## 111 50 5 18 6 9 6 3
#Glass Identification Database
data(Glass)
dim(Glass)
## [1] 214 10
levels(Glass$Type)
## [1] "1" "2" "3" "5" "6" "7"
head(Glass)
## RI Na Mg Al Si K Ca Ba Fe Type
## 1 1.52101 13.64 4.49 1.10 71.78 0.06 8.75 0 0.00 1
## 2 1.51761 13.89 3.60 1.36 72.73 0.48 7.83 0 0.00 1
## 3 1.51618 13.53 3.55 1.54 72.99 0.39 7.78 0 0.00 1
## 4 1.51766 13.21 3.69 1.29 72.61 0.57 8.22 0 0.00 1
## 5 1.51742 13.27 3.62 1.24 73.08 0.55 8.07 0 0.00 1
## 6 1.51596 12.79 3.61 1.62 72.97 0.64 8.07 0 0.26 1
str(Glass)
## 'data.frame': 214 obs. of 10 variables:
## $ RI : num 1.52 1.52 1.52 1.52 1.52 ...
## $ Na : num 13.6 13.9 13.5 13.2 13.3 ...
## $ Mg : num 4.49 3.6 3.55 3.69 3.62 3.61 3.6 3.61 3.58 3.6 ...
## $ Al : num 1.1 1.36 1.54 1.29 1.24 1.62 1.14 1.05 1.37 1.36 ...
## $ Si : num 71.8 72.7 73 72.6 73.1 ...
## $ K : num 0.06 0.48 0.39 0.57 0.55 0.64 0.58 0.57 0.56 0.57 ...
## $ Ca : num 8.75 7.83 7.78 8.22 8.07 8.07 8.17 8.24 8.3 8.4 ...
## $ Ba : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Fe : num 0 0 0 0 0 0.26 0 0 0 0.11 ...
## $ Type: Factor w/ 6 levels "1","2","3","5",..: 1 1 1 1 1 1 1 1 1 1 ...
df2 = as.data.frame(scale(Glass[,-10]))
str(df2)
## 'data.frame': 214 obs. of 9 variables:
## $ RI: num 0.871 -0.249 -0.72 -0.232 -0.311 ...
## $ Na: num 0.284 0.59 0.15 -0.242 -0.169 ...
## $ Mg: num 1.252 0.635 0.6 0.697 0.649 ...
## $ Al: num -0.691 -0.17 0.19 -0.31 -0.41 ...
## $ Si: num -1.1244 0.1021 0.4378 -0.0528 0.554 ...
## $ K : num -0.6701 -0.0262 -0.1641 0.1118 0.0812 ...
## $ Ca: num -0.145 -0.792 -0.827 -0.518 -0.623 ...
## $ Ba: num -0.352 -0.352 -0.352 -0.352 -0.352 ...
## $ Fe: num -0.585 -0.585 -0.585 -0.585 -0.585 ...
table(Glass$Type)
##
## 1 2 3 5 6 7
## 70 76 17 13 9 29
#hierarchical clustering
numComplete = NbClust(df2, distance="euclidean", min.nc=2, max.nc=6,
method="complete", index="all")

## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##

## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 6 proposed 2 as the best number of clusters
## * 9 proposed 3 as the best number of clusters
## * 2 proposed 4 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 5 proposed 6 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
numComplete$Best.nc
## KL CH Hartigan CCC Scott Marriot
## Number_clusters 3.0000 3.0000 3.0000 2.0000 4.000 4.000000e+00
## Value_Index 6.1495 33.0364 15.3975 -6.9247 301.587 9.787967e+16
## TrCovW TraceW Friedman Rubin Cindex DB
## Number_clusters 3.00 3.0000 3.0000 3.0000 2.0000 6.0000
## Value_Index 13673.73 113.0336 60.6448 -0.0703 0.2421 0.8198
## Silhouette Duda PseudoT2 Beale Ratkowsky Ball
## Number_clusters 5.0000 2.0000 2.0000 2.0000 3.0000 3.0000
## Value_Index 0.4095 0.8541 33.4887 1.0208 0.2641 350.3589
## PtBiserial Frey McClain Dunn Hubert SDindex Dindex
## Number_clusters 6.0000 1 2.000 6.0000 0 6.0000 0
## Value_Index 0.5954 NA 0.096 0.1573 0 0.8283 0
## SDbw
## Number_clusters 6.0000
## Value_Index 0.5434
dis = dist(df2, method="euclidean")
hc = hclust(dis, method="complete")
plot(hc, hang=-1,labels=FALSE, main="Complete-Linkage")
comp3 = cutree(hc, 3)
ColorDendrogram(hc, y = comp3, main = "Complete", branchlength = 30)

table(comp3)
## comp3
## 1 2 3
## 192 16 6
table(comp3,Glass$Type)
##
## comp3 1 2 3 5 6 7
## 1 70 64 17 6 9 26
## 2 0 12 0 4 0 0
## 3 0 0 0 3 0 3
NbClust(df2, diss=NULL, distance="euclidean", min.nc=2, max.nc=6,
method="ward.D2", index="all")

## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##

## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 3 proposed 2 as the best number of clusters
## * 3 proposed 3 as the best number of clusters
## * 8 proposed 4 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 6 proposed 6 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 4
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW
## 2 1.1170 54.1540 36.9793 -0.9405 375.2761 9.566690e+16 55386.41
## 3 0.7489 50.0525 38.3118 -2.1169 544.6034 9.756835e+16 36071.60
## 4 1.1922 51.9503 32.7478 0.3654 928.3092 2.887279e+16 27154.77
## 5 0.9506 52.9722 34.2352 3.1077 1216.6658 1.172503e+16 22534.94
## 6 2.0614 55.8977 20.2704 7.4254 1357.4165 8.746518e+15 15016.14
## TraceW Friedman Rubin Cindex DB Silhouette Duda Pseudot2
## 2 1526.9503 143.1121 1.2554 0.2195 1.5776 0.4499 0.7967 46.7061
## 3 1300.1623 295.6671 1.4744 0.1954 1.3824 0.4522 0.5146 25.4717
## 4 1100.3660 345.5944 1.7421 0.2253 0.9734 0.4523 0.8050 41.9052
## 5 951.9214 372.3038 2.0138 0.2024 1.2483 0.2590 0.7049 51.0711
## 6 817.9390 411.5637 2.3437 0.1772 1.1955 0.2822 0.7056 38.3794
## Beale Ratkowsky Ball Ptbiserial Frey McClain Dunn Hubert
## 2 1.5243 0.2694 763.4752 0.5201 -0.2385 0.1638 0.1457 0.0016
## 3 5.4627 0.2961 433.3874 0.6562 -1.3430 0.2119 0.1105 0.0018
## 4 1.4462 0.3098 275.0915 0.6714 4.7399 0.2107 0.1287 0.0018
## 5 2.4933 0.3108 190.3843 0.4812 0.8382 0.7687 0.0673 0.0018
## 6 2.4781 0.3075 136.3232 0.4516 0.2068 1.2365 0.0361 0.0019
## SDindex Dindex SDbw
## 2 2.2887 2.2087 1.5473
## 3 2.2862 2.0601 1.5639
## 4 1.4944 1.9451 0.8433
## 5 1.9771 1.7571 0.7711
## 6 1.9133 1.5899 0.7484
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.8235 39.2304 0.1338
## 3 0.6621 13.7821 0.0000
## 4 0.8205 37.8498 0.1630
## 5 0.8000 30.4920 0.0080
## 6 0.7808 25.8208 0.0086
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot
## Number_clusters 6.0000 6.0000 6.0000 6.0000 4.0000 4.00000e+00
## Value_Index 2.0614 55.8977 13.9648 7.4254 383.7058 5.15478e+16
## TrCovW TraceW Friedman Rubin Cindex DB Silhouette
## Number_clusters 3.00 4.0000 3.0000 4.000 6.0000 4.0000 4.0000
## Value_Index 19314.81 51.3517 152.5551 0.004 0.1772 0.9734 0.4523
## Duda PseudoT2 Beale Ratkowsky Ball PtBiserial Frey
## Number_clusters NA NA 2.0000 5.0000 3.0000 4.0000 1
## Value_Index NA NA 1.5243 0.3108 330.0877 0.6714 NA
## McClain Dunn Hubert SDindex Dindex SDbw
## Number_clusters 2.0000 2.0000 0 4.0000 0 6.0000
## Value_Index 0.1638 0.1457 0 1.4944 0 0.7484
##
## $Best.partition
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## 2 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 1 3 1 1 1 1 1 1 2 4 4 2 1 1 1 1 1 1
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## 1 1 1 1 3 3 3 1 1 3 1 3 3 3 3 3 3 3
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
## 3 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3
hcWard = hclust(dis, method="ward.D2")
plot(hcWard, labels=FALSE, main="Ward's-Linkage")
ward4 = cutree(hcWard, 4)
table(ward4,Glass$Type)
##
## ward4 1 2 3 5 6 7
## 1 70 68 17 8 8 4
## 2 0 8 0 2 0 0
## 3 0 0 0 1 1 25
## 4 0 0 0 2 0 0
table(comp3, ward4)
## ward4
## comp3 1 2 3 4
## 1 169 0 23 0
## 2 6 10 0 0
## 3 0 0 4 2
aggregate(Glass[,-10],list(comp3),mean)
## Group.1 RI Na Mg Al Si K Ca
## 1 1 1.517975 13.44609 2.899375 1.414167 72.74359 0.4296875 8.754062
## 2 2 1.524340 12.82500 0.541875 1.392500 72.03375 0.3212500 12.410000
## 3 3 1.514918 13.73833 1.523333 2.568333 71.33167 3.1216667 6.241667
## Ba Fe
## 1 0.1348958 0.05130208
## 2 0.2287500 0.14687500
## 3 1.3166667 0.00000000
aggregate(Glass[,-10],list(ward4),mean)
## Group.1 RI Na Mg Al Si K
## 1 1 1.518276 13.26034 3.2143429 1.325886 72.66789 0.4849714
## 2 2 1.526123 12.66200 0.1010000 1.199000 71.88500 0.2030000
## 3 3 1.516459 14.66963 0.4062963 2.190000 72.97741 0.2611111
## 4 4 1.513185 13.01000 0.0000000 3.030000 70.59000 6.2100000
## Ca Ba Fe
## 1 8.834000 0.01548571 0.06348571
## 2 13.437000 0.31500000 0.07000000
## 3 8.243704 1.17037037 0.01444444
## 4 6.945000 0.00000000 0.00000000
par(mfrow=c(1,2))

Glass$comp_cluster = comp3
Glass$ward_cluster = ward4
boxplot(RI~comp_cluster, data=Glass, main="RI by Complete Linkage")
boxplot(RI~ward_cluster, data=Glass, main="RI by Ward's Linkage")

#k means clustering
NbClust(df2, min.nc=2, max.nc=4, method="kmeans")

## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##

## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 3 proposed 2 as the best number of clusters
## * 10 proposed 3 as the best number of clusters
## * 10 proposed 4 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## 2 0.6241 42.5981 31.4956 -3.8304 241.7117 1.785743e+17 55872.41 1596.257
## 3 0.3291 40.0215 53.8473 -5.5533 506.3398 1.166707e+17 45133.01 1389.785
## 4 2.0350 51.1953 31.7592 0.0528 853.9172 4.087528e+16 28322.72 1107.221
## Friedman Rubin Cindex DB Silhouette Duda Pseudot2 Beale
## 2 124.4273 1.2009 0.2278 1.9306 0.3033 1.1084 -4.5004 -0.5741
## 3 176.0639 1.3794 0.2608 1.4944 0.3227 6.0797 -53.4732 -2.5086
## 4 294.5052 1.7314 0.1928 1.5092 0.2804 2.8459 -50.5917 -3.7867
## Ratkowsky Ball Ptbiserial Frey McClain Dunn Hubert SDindex
## 2 0.2234 798.1285 0.3338 -1.3753 0.3362 0.0584 0.0011 1.4670
## 3 0.2527 463.2615 0.4520 0.9951 0.3243 0.0718 0.0017 2.2238
## 4 0.3156 276.8053 0.4453 0.5656 1.0597 0.0504 0.0014 1.3994
## Dindex SDbw
## 2 2.2814 1.0414
## 3 2.1995 1.9700
## 4 1.7433 0.8216
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.7148 18.3576 1
## 3 0.0985 585.8822 1
## 4 0.6927 34.6028 1
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot
## Number_clusters 4.000 4.0000 3.0000 4.0000 4.0000 3.00000e+00
## Value_Index 2.035 51.1953 22.3517 0.0528 347.5775 -1.38918e+16
## TrCovW TraceW Friedman Rubin Cindex DB Silhouette
## Number_clusters 4.00 3.000 4.0000 3.0000 4.0000 3.0000 3.0000
## Value_Index 16810.29 -76.091 118.4413 0.1736 0.1928 1.4944 0.3227
## Duda PseudoT2 Beale Ratkowsky Ball PtBiserial Frey
## Number_clusters 2.0000 2.0000 2.0000 4.0000 3.000 3.000 1
## Value_Index 1.1084 -4.5004 -0.5741 0.3156 334.867 0.452 NA
## McClain Dunn Hubert SDindex Dindex SDbw
## Number_clusters 3.0000 3.0000 0 4.0000 0 4.0000
## Value_Index 0.3243 0.0718 0 1.3994 0 0.8216
##
## $Best.partition
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## 2 2 3 3 2 2 2 3 2 2 2 3 3 2 3 2 2 2
## 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 2 2
## 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## 3 2 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## 2 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## 2 2 2 2 2 2 2 3 2 2 2 2 2 3 2 2 2 2
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 3 1 2 3 3 2 2 3 3 1 1 3 3 3 2 2 2 2
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## 2 2 2 3 2 2 2 3 3 3 2 2 2 2 2 2 2 2
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
## 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
set.seed(123)
km=kmeans(df2,3,nstart=25)
table(km$cluster)
##
## 1 2 3
## 45 32 137
km$centers
## RI Na Mg Al Si K
## 1 1.4606888 -0.09043613 -0.3972366 -0.6970535 -0.7718375 -0.38937157
## 2 -0.6459528 1.38028977 -1.5680171 1.5380586 0.2049626 0.17078016
## 3 -0.3289088 -0.29269815 0.4967313 -0.1302954 0.2056488 0.08800552
## Ca Ba Fe
## 1 1.3701447 -0.1884750 0.25647565
## 2 -0.4535000 1.6817596 -0.46000045
## 3 -0.3441205 -0.3309119 0.02320153
Glass$km_cluster = km$cluster
boxplot(Na~km_cluster, data=Glass, main="Na Content, K-Means")
boxplot(Na~ward_cluster, data=Glass, main="Na Content, Ward's")

table(km$cluster, Glass$Type)
##
## 1 2 3 5 6 7
## 1 16 15 3 8 1 2
## 2 0 0 0 3 3 26
## 3 54 61 14 2 5 1
#Clustering with mixed data
df2$Type = as.factor(Glass$Type)
df2$sandy = as.factor(ifelse(df2$Si>0,"High","Low"))
str(df2)
## 'data.frame': 214 obs. of 11 variables:
## $ RI : num 0.871 -0.249 -0.72 -0.232 -0.311 ...
## $ Na : num 0.284 0.59 0.15 -0.242 -0.169 ...
## $ Mg : num 1.252 0.635 0.6 0.697 0.649 ...
## $ Al : num -0.691 -0.17 0.19 -0.31 -0.41 ...
## $ Si : num -1.1244 0.1021 0.4378 -0.0528 0.554 ...
## $ K : num -0.6701 -0.0262 -0.1641 0.1118 0.0812 ...
## $ Ca : num -0.145 -0.792 -0.827 -0.518 -0.623 ...
## $ Ba : num -0.352 -0.352 -0.352 -0.352 -0.352 ...
## $ Fe : num -0.585 -0.585 -0.585 -0.585 -0.585 ...
## $ Type : Factor w/ 6 levels "1","2","3","5",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ sandy: Factor w/ 2 levels "High","Low": 2 1 1 2 1 1 1 1 2 1 ...
disMat = daisy(df2, metric="gower")
set.seed(123)
pamFit = pam(disMat, k=3)
table(pamFit$clustering)
##
## 1 2 3
## 84 94 36
table(pamFit$clustering, Glass$Type)
##
## 1 2 3 5 6 7
## 1 26 35 10 6 3 4
## 2 44 37 7 3 3 0
## 3 0 4 0 4 3 25
df2$cluster = pamFit$clustering
group = compareGroups(cluster~., data=df2)
clustab = createTable(group)
clustab
##
## --------Summary descriptives table by 'cluster'---------
##
## _________________________________________________________
## 1 2 3 p.overall
## N=84 N=94 N=36
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## RI 0.58 (1.15) -0.41 (0.42) -0.28 (1.07) <0.001
## Na 0.19 (0.84) -0.50 (0.56) 0.88 (1.42) <0.001
## Mg 0.23 (0.86) 0.47 (0.30) -1.76 (0.42) <0.001
## Al -0.21 (1.09) -0.14 (0.45) 0.86 (1.34) <0.001
## Si -0.91 (0.85) 0.47 (0.36) 0.91 (0.77) <0.001
## K 0.09 (1.44) 0.08 (0.23) -0.41 (0.89) 0.027
## Ca 0.23 (1.17) -0.33 (0.43) 0.31 (1.34) <0.001
## Ba -0.11 (0.98) -0.34 (0.05) 1.15 (1.45) <0.001
## Fe 0.18 (1.17) -0.02 (0.94) -0.37 (0.55) 0.020
## Type: <0.001
## 1 26 (31.0%) 44 (46.8%) 0 (0.00%)
## 2 35 (41.7%) 37 (39.4%) 4 (11.1%)
## 3 10 (11.9%) 7 (7.45%) 0 (0.00%)
## 5 6 (7.14%) 3 (3.19%) 4 (11.1%)
## 6 3 (3.57%) 3 (3.19%) 3 (8.33%)
## 7 4 (4.76%) 0 (0.00%) 25 (69.4%)
## sandy: <0.001
## High 1 (1.19%) 93 (98.9%) 35 (97.2%)
## Low 83 (98.8%) 1 (1.06%) 1 (2.78%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
export2csv(clustab,file="Glass_clusters.csv")