To perform the principle component analysis on wine data and use both an accurate clustering model (Kmeans and Hierarchial clustering) and group those clusters.
library(cluster)
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.5.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.1
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
# Loading Wine data
mydata<-read.csv(file.choose()) ## use read.csv for csv files
View(mydata)
help(princomp) ## to understand the api for princomp
## starting httpd help server ...
## done
## the first column in mydata has Types of Wine
View(mydata[-1])
# mydata[-1] -> Considering only numerical values for applying PCA
data <- mydata[,-1]
attach(data)
cor(data)
## Alcohol Malic Ash Alcalinity
## Alcohol 1.00000000 0.09439694 0.211544596 -0.31023514
## Malic 0.09439694 1.00000000 0.164045470 0.28850040
## Ash 0.21154460 0.16404547 1.000000000 0.44336719
## Alcalinity -0.31023514 0.28850040 0.443367187 1.00000000
## Magnesium 0.27079823 -0.05457510 0.286586691 -0.08333309
## Phenols 0.28910112 -0.33516700 0.128979538 -0.32111332
## Flavanoids 0.23681493 -0.41100659 0.115077279 -0.35136986
## Nonflavanoids -0.15592947 0.29297713 0.186230446 0.36192172
## Proanthocyanins 0.13669791 -0.22074619 0.009651935 -0.19732684
## Color 0.54636420 0.24898534 0.258887259 0.01873198
## Hue -0.07174720 -0.56129569 -0.074666889 -0.27395522
## Dilution 0.07234319 -0.36871043 0.003911231 -0.27676855
## Proline 0.64372004 -0.19201056 0.223626264 -0.44059693
## Magnesium Phenols Flavanoids Nonflavanoids
## Alcohol 0.27079823 0.28910112 0.2368149 -0.1559295
## Malic -0.05457510 -0.33516700 -0.4110066 0.2929771
## Ash 0.28658669 0.12897954 0.1150773 0.1862304
## Alcalinity -0.08333309 -0.32111332 -0.3513699 0.3619217
## Magnesium 1.00000000 0.21440123 0.1957838 -0.2562940
## Phenols 0.21440123 1.00000000 0.8645635 -0.4499353
## Flavanoids 0.19578377 0.86456350 1.0000000 -0.5378996
## Nonflavanoids -0.25629405 -0.44993530 -0.5378996 1.0000000
## Proanthocyanins 0.23644061 0.61241308 0.6526918 -0.3658451
## Color 0.19995001 -0.05513642 -0.1723794 0.1390570
## Hue 0.05539820 0.43368134 0.5434786 -0.2626396
## Dilution 0.06600394 0.69994936 0.7871939 -0.5032696
## Proline 0.39335085 0.49811488 0.4941931 -0.3113852
## Proanthocyanins Color Hue Dilution
## Alcohol 0.136697912 0.54636420 -0.07174720 0.072343187
## Malic -0.220746187 0.24898534 -0.56129569 -0.368710428
## Ash 0.009651935 0.25888726 -0.07466689 0.003911231
## Alcalinity -0.197326836 0.01873198 -0.27395522 -0.276768549
## Magnesium 0.236440610 0.19995001 0.05539820 0.066003936
## Phenols 0.612413084 -0.05513642 0.43368134 0.699949365
## Flavanoids 0.652691769 -0.17237940 0.54347857 0.787193902
## Nonflavanoids -0.365845099 0.13905701 -0.26263963 -0.503269596
## Proanthocyanins 1.000000000 -0.02524993 0.29554425 0.519067096
## Color -0.025249931 1.00000000 -0.52181319 -0.428814942
## Hue 0.295544253 -0.52181319 1.00000000 0.565468293
## Dilution 0.519067096 -0.42881494 0.56546829 1.000000000
## Proline 0.330416700 0.31610011 0.23618345 0.312761075
## Proline
## Alcohol 0.6437200
## Malic -0.1920106
## Ash 0.2236263
## Alcalinity -0.4405969
## Magnesium 0.3933508
## Phenols 0.4981149
## Flavanoids 0.4941931
## Nonflavanoids -0.3113852
## Proanthocyanins 0.3304167
## Color 0.3161001
## Hue 0.2361834
## Dilution 0.3127611
## Proline 1.0000000
# cor = TRUE use correlation matrix for getting PCA scores
?princomp
pcaObj<-princomp(data, cor = TRUE, scores = TRUE, covmat = NULL)
str(pcaObj)
## List of 7
## $ sdev : Named num [1:13] 2.169 1.58 1.203 0.959 0.924 ...
## ..- attr(*, "names")= chr [1:13] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ loadings: 'loadings' num [1:13, 1:13] 0.14433 -0.24519 -0.00205 -0.23932 0.14199 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:13] "Alcohol" "Malic" "Ash" "Alcalinity" ...
## .. ..$ : chr [1:13] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ center : Named num [1:13] 13 2.34 2.37 19.49 99.74 ...
## ..- attr(*, "names")= chr [1:13] "Alcohol" "Malic" "Ash" "Alcalinity" ...
## $ scale : Named num [1:13] 0.81 1.114 0.274 3.33 14.242 ...
## ..- attr(*, "names")= chr [1:13] "Alcohol" "Malic" "Ash" "Alcalinity" ...
## $ n.obs : int 178
## $ scores : num [1:178, 1:13] 3.32 2.21 2.52 3.76 1.01 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:13] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ call : language princomp(x = data, cor = TRUE, scores = TRUE, covmat = NULL)
## - attr(*, "class")= chr "princomp"
## princomp(mydata, cor = TRUE) not_same_as prcomp(mydata, scale=TRUE); similar, but different
summary(pcaObj)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 2.1692972 1.5801816 1.2025273 0.9586313 0.92370351
## Proportion of Variance 0.3619885 0.1920749 0.1112363 0.0706903 0.06563294
## Cumulative Proportion 0.3619885 0.5540634 0.6652997 0.7359900 0.80162293
## Comp.6 Comp.7 Comp.8 Comp.9
## Standard deviation 0.80103498 0.74231281 0.59033665 0.53747553
## Proportion of Variance 0.04935823 0.04238679 0.02680749 0.02222153
## Cumulative Proportion 0.85098116 0.89336795 0.92017544 0.94239698
## Comp.10 Comp.11 Comp.12 Comp.13
## Standard deviation 0.50090167 0.47517222 0.41081655 0.321524394
## Proportion of Variance 0.01930019 0.01736836 0.01298233 0.007952149
## Cumulative Proportion 0.96169717 0.97906553 0.99204785 1.000000000
str(pcaObj)
## List of 7
## $ sdev : Named num [1:13] 2.169 1.58 1.203 0.959 0.924 ...
## ..- attr(*, "names")= chr [1:13] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ loadings: 'loadings' num [1:13, 1:13] 0.14433 -0.24519 -0.00205 -0.23932 0.14199 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:13] "Alcohol" "Malic" "Ash" "Alcalinity" ...
## .. ..$ : chr [1:13] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ center : Named num [1:13] 13 2.34 2.37 19.49 99.74 ...
## ..- attr(*, "names")= chr [1:13] "Alcohol" "Malic" "Ash" "Alcalinity" ...
## $ scale : Named num [1:13] 0.81 1.114 0.274 3.33 14.242 ...
## ..- attr(*, "names")= chr [1:13] "Alcohol" "Malic" "Ash" "Alcalinity" ...
## $ n.obs : int 178
## $ scores : num [1:178, 1:13] 3.32 2.21 2.52 3.76 1.01 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:13] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ call : language princomp(x = data, cor = TRUE, scores = TRUE, covmat = NULL)
## - attr(*, "class")= chr "princomp"
loadings(pcaObj)
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## Alcohol 0.144 0.484 0.207 0.266 0.214 0.396
## Malic -0.245 0.225 -0.537 0.537 -0.421
## Ash 0.316 -0.626 0.214 0.143 0.154 0.149 -0.170
## Alcalinity -0.239 -0.612 -0.101 0.287 0.428
## Magnesium 0.142 0.300 -0.131 0.352 -0.727 -0.323 -0.156
## Phenols 0.395 -0.146 -0.198 0.149 -0.406
## Flavanoids 0.423 -0.151 -0.152 0.109 -0.187
## Nonflavanoids -0.299 -0.170 0.203 0.501 -0.259 -0.595 -0.233
## Proanthocyanins 0.313 -0.149 -0.399 -0.137 -0.534 -0.372 0.368
## Color 0.530 0.137 -0.419 0.228
## Hue 0.297 -0.279 0.428 0.174 0.106 -0.232 0.437
## Dilution 0.376 -0.164 -0.166 -0.184 0.101 0.266
## Proline 0.287 0.365 0.127 0.232 0.158 0.120 0.120
## Comp.9 Comp.10 Comp.11 Comp.12 Comp.13
## Alcohol 0.509 0.212 0.226 0.266
## Malic -0.309 -0.122
## Ash -0.308 0.499 -0.141
## Alcalinity 0.200 -0.479
## Magnesium 0.271
## Phenols 0.286 -0.320 -0.304 0.304 -0.464
## Flavanoids -0.163 0.832
## Nonflavanoids 0.196 0.216 -0.117 0.114
## Proanthocyanins -0.209 0.134 0.237 -0.117
## Color -0.291 -0.604
## Hue -0.522 -0.259
## Dilution 0.137 0.524 -0.601 -0.157
## Proline -0.576 0.162 -0.539
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.077 0.077 0.077 0.077 0.077 0.077 0.077 0.077
## Cumulative Var 0.077 0.154 0.231 0.308 0.385 0.462 0.538 0.615
## Comp.9 Comp.10 Comp.11 Comp.12 Comp.13
## SS loadings 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.077 0.077 0.077 0.077 0.077
## Cumulative Var 0.692 0.769 0.846 0.923 1.000
plot(pcaObj) # graph showing importance of principal components

# Comp.1 having highest importance (highest variance)
biplot(pcaObj)

# Showing the increase of variance with considering principal components
# Which helps in choosing number of principal components
plot(cumsum(pcaObj$sdev*pcaObj$sdev)*100/(sum(pcaObj$sdev*pcaObj$sdev)),type="b")

#pcaObj$loadings
pcaObj$scores[,1:3] # Top 3 PCA Scores which represents the whole data
## Comp.1 Comp.2 Comp.3
## [1,] 3.31675081 1.44346263 0.165739045
## [2,] 2.20946492 -0.33339289 2.026457374
## [3,] 2.51674015 1.03115130 -0.982818670
## [4,] 3.75706561 2.75637191 0.176191842
## [5,] 1.00890849 0.86983082 -2.026688219
## [6,] 3.05025392 2.12240111 0.629395827
## [7,] 2.44908967 1.17485013 0.977094891
## [8,] 2.05943687 1.60896307 -0.146281883
## [9,] 2.51087430 0.91807096 1.770969027
## [10,] 2.75362819 0.78943767 0.984247490
## [11,] 3.47973668 1.30233324 0.422735217
## [12,] 1.75475290 0.61197723 1.190878320
## [13,] 2.11346234 0.67570634 0.865086426
## [14,] 3.45815682 1.13062988 1.204276353
## [15,] 4.31278391 2.09597558 1.263912752
## [16,] 2.30518820 1.66255173 -0.217902616
## [17,] 2.17195527 2.32730534 -0.831729866
## [18,] 1.89897118 1.63136888 -0.794913792
## [19,] 3.54198508 2.51834367 0.485458508
## [20,] 2.08452220 1.06113799 0.164746678
## [21,] 3.12440254 0.78689711 0.364887083
## [22,] 1.08657007 0.24174355 -0.936961600
## [23,] 2.53522408 -0.09184062 0.311932659
## [24,] 1.64498834 -0.51627893 -0.143885095
## [25,] 1.76157587 -0.31714893 -0.890285647
## [26,] 0.99007910 0.94066734 -3.820908008
## [27,] 1.77527763 0.68617513 0.086700406
## [28,] 1.23542396 -0.08980704 1.386896545
## [29,] 2.18840633 0.68956962 -1.394566881
## [30,] 2.25610898 0.19146194 1.092657258
## [31,] 2.50022003 1.24083383 -1.386017855
## [32,] 2.67741105 1.47187365 0.332261728
## [33,] 1.62857912 0.05270445 0.167128706
## [34,] 1.90269086 1.63306043 -1.172082119
## [35,] 1.41038853 0.69793432 -0.479743025
## [36,] 1.90382623 0.17671095 -0.450835040
## [37,] 1.38486223 0.65863985 -0.458438581
## [38,] 1.12220741 0.11410976 0.039107277
## [39,] 1.50219450 -0.76943201 1.426177346
## [40,] 2.52980109 1.80300198 0.343152389
## [41,] 2.58809543 0.77961630 0.118477466
## [42,] 0.66848199 0.16996094 0.783362548
## [43,] 3.07080699 1.15591896 0.312758084
## [44,] 0.46220914 0.33074213 0.201476496
## [45,] 2.10135193 -0.07100892 0.655849415
## [46,] 1.13616618 1.77710739 -0.028705736
## [47,] 2.72660096 1.19133469 0.539773261
## [48,] 2.82133927 0.64625860 1.155552411
## [49,] 2.00985085 1.24702946 0.057293988
## [50,] 2.70749130 1.75196741 0.643113612
## [51,] 3.21491747 0.16699199 1.973571680
## [52,] 2.85895983 0.74527880 -0.004719502
## [53,] 3.50560436 1.61273386 0.520774530
## [54,] 2.22479138 1.87516800 -0.339549850
## [55,] 2.14698782 1.01675154 0.957762762
## [56,] 2.46932948 1.32900831 -0.513437453
## [57,] 2.74151791 1.43654878 0.612473396
## [58,] 2.17374092 1.21219984 -0.261779593
## [59,] 3.13938015 1.73157912 0.285661413
## [60,] -0.92858197 -3.07348616 4.585064007
## [61,] -1.54248014 -1.38144351 0.874683112
## [62,] -1.83624976 -0.82998412 1.605702186
## [63,] 0.03060683 -1.26278614 1.784408010
## [64,] 2.05026161 -1.92503260 0.007368777
## [65,] -0.60968083 -1.90805881 -0.679357938
## [66,] 0.90022784 -0.76391147 -0.573361302
## [67,] 2.24850719 -1.88459248 2.031840193
## [68,] 0.18338403 -2.42714611 1.069745560
## [69,] -0.81280503 -0.22051399 0.707005396
## [70,] 1.97562050 -1.40328323 1.238276220
## [71,] -1.57221622 -0.88498314 0.628997950
## [72,] 1.65768181 -0.95671220 -1.952584217
## [73,] -0.72537239 -1.06364540 -0.080332229
## [74,] 2.56222717 0.26019855 -3.374393962
## [75,] 1.83256757 -1.28787820 -0.458280027
## [76,] -0.86799290 -2.44410119 1.563333179
## [77,] 0.37001440 -2.15390698 2.449386348
## [78,] -1.45737704 -1.38335177 0.227306902
## [79,] 1.26293085 -0.77084953 1.184224517
## [80,] 0.37615037 -1.02704340 -1.794466295
## [81,] 0.76206390 -3.37505381 0.357470056
## [82,] 1.03457797 -1.45070974 0.363011773
## [83,] -0.49487676 -2.38124353 -1.335743176
## [84,] -2.53897708 -0.08744336 -0.474251393
## [85,] 0.83532015 -1.47367055 -0.610093576
## [86,] 0.78790461 -2.02662652 0.254723404
## [87,] -0.80683216 -2.23383039 -0.772855797
## [88,] -0.55804262 -2.37298543 -2.307611404
## [89,] -1.11511104 -1.80224719 -0.959253308
## [90,] -0.55572283 -2.65754004 -0.849126898
## [91,] -1.34928528 -2.11800147 0.047652321
## [92,] -1.56448261 -1.85221452 -0.781067031
## [93,] -1.93255561 -1.55949546 0.089274676
## [94,] 0.74666594 -2.31293171 -0.114679769
## [95,] 0.95745536 -2.22352843 -0.142444774
## [96,] 2.54386518 0.16927402 -0.788696991
## [97,] -0.54395259 -0.36892655 -1.308895932
## [98,] 1.03104975 -2.56556935 1.086390174
## [99,] 2.25190942 -1.43274138 0.230208244
## [100,] 1.41021602 -2.16619177 -0.748896411
## [101,] 0.79771979 -2.37694880 1.568112531
## [102,] -0.54953173 -2.29312864 1.498935323
## [103,] -0.16117374 -1.16448332 -1.003713103
## [104,] -0.65979494 -2.67996119 0.764920868
## [105,] 0.39235441 -2.09873171 0.471850008
## [106,] -1.77249908 -1.71728847 -0.947033174
## [107,] -0.36626736 -2.16935330 0.481324235
## [108,] -1.62067257 -1.35558339 -0.287159001
## [109,] 0.08253578 -2.30623459 0.463574989
## [110,] 1.57827507 -1.46203429 -1.779645955
## [111,] 1.42056925 -1.41820664 -0.139275829
## [112,] -0.27870275 -1.93056809 -0.078670553
## [113,] -1.30314497 -0.76317231 -1.999596510
## [114,] -0.45707187 -2.26941561 -1.061338968
## [115,] -0.49418585 -1.93904505 -1.323938072
## [116,] 0.48207441 -3.87178385 -1.344271223
## [117,] -0.25288888 -2.82149237 0.302639785
## [118,] -0.10722764 -1.92892204 -0.690148243
## [119,] -2.43301260 -1.25714104 1.903027404
## [120,] -0.55108954 -2.22216155 0.356228830
## [121,] 0.73962193 -1.40895667 -1.125345492
## [122,] 1.33632173 0.25333693 -5.345388179
## [123,] -1.17708700 -0.66396684 -3.010221888
## [124,] -0.46233501 -0.61828818 -0.483442366
## [125,] 0.97847408 -1.44557050 -1.481236975
## [126,] -0.09680973 -2.10999799 -0.434826116
## [127,] 0.03848715 -1.26676211 -0.687577913
## [128,] -1.59715850 -1.20814357 -3.361175555
## [129,] -0.47956492 -1.93884066 -1.296507519
## [130,] -1.79283347 -1.15028810 -0.782800173
## [131,] -1.32710166 0.17038923 1.180013355
## [132,] -2.38450083 0.37458261 0.723822595
## [133,] -2.93694010 0.26386183 0.167639816
## [134,] -2.14681113 0.36825495 0.453301301
## [135,] -2.36986949 -0.45963481 1.101399789
## [136,] -3.06384157 0.35341284 1.099124104
## [137,] -3.91575378 0.15458252 -0.221827800
## [138,] -3.93646339 0.65968723 -1.712215419
## [139,] -3.09427612 0.34884276 1.026831413
## [140,] -2.37447163 0.29198035 -1.241914333
## [141,] -2.77881295 0.28680487 -0.609670124
## [142,] -2.28656128 0.37250784 0.971643032
## [143,] -2.98563349 0.48921791 -0.946952932
## [144,] -2.37519470 0.48233372 0.252883994
## [145,] -2.20986553 1.16005250 1.245125226
## [146,] -2.62562100 0.56316076 0.855961082
## [147,] -4.28063878 0.64967096 1.458196962
## [148,] -3.58264137 1.27270275 0.110784038
## [149,] -2.80706372 1.57053379 0.472527935
## [150,] -2.89965933 2.04105701 0.495959810
## [151,] -2.32073698 2.35636608 -0.437681744
## [152,] -2.54983095 2.04528309 0.312267999
## [153,] -1.81254128 1.52764595 -1.362589782
## [154,] -2.76014464 2.13893235 0.964628688
## [155,] -2.73715050 0.40988627 1.190404684
## [156,] -3.60486887 1.80238422 0.094036861
## [157,] -2.88982600 1.92521861 0.782322556
## [158,] -3.39215608 1.31187639 -1.602025969
## [159,] -1.04818190 3.51508969 -1.160038566
## [160,] -1.60991228 2.40663816 -0.548559697
## [161,] -3.14313097 0.73816104 0.090998724
## [162,] -2.24015690 1.17546529 0.101376932
## [163,] -2.84767378 0.55604397 -0.804215218
## [164,] -2.59749706 0.69796554 0.884939521
## [165,] -2.94929937 1.55530896 0.983400727
## [166,] -3.53003227 0.88252680 0.466029128
## [167,] -2.40611054 2.59235618 -0.428226211
## [168,] -2.92908473 1.27444695 1.213358272
## [169,] -2.18141278 2.07753731 -0.763782552
## [170,] -2.38092779 2.58866743 -1.418044029
## [171,] -3.21161722 -0.25124910 0.847129152
## [172,] -3.67791872 0.84774784 1.339420231
## [173,] -2.46555580 2.19379830 0.918780960
## [174,] -3.37052415 2.21628914 0.342569512
## [175,] -2.60195585 1.75722935 -0.207581355
## [176,] -2.67783946 2.76089913 0.940941877
## [177,] -2.38701709 2.29734668 0.550696197
## [178,] -3.20875816 2.76891957 -1.013913664
# cbind used to bind the data in column wise
# Considering top 3 principal component scores and binding them with mydata
mydata<-cbind(mydata,pcaObj$scores[,1:3])
View(mydata)
# Hierarchial Clustering
# preparing data for clustering (considering only pca scores as they represent the entire data)
clus_data<-mydata[,8:10]
# Normalizing the data
norm_clus<-scale(clus_data) # Scale function is used to normalize data
dist1<-dist(norm_clus,method = "euclidean") # method for finding the distance
# here I am considering Euclidean distance
# Clustering the data using hclust function --> Hierarchical
fit1<-hclust(dist1,method="complete") # method here is complete linkage
plot(fit1) # Displaying Dendrogram
rect.hclust(fit1, k=7, border="red")

groups<-cutree(fit1,7) # Cutting the dendrogram for 7 clusters
membership_1<-as.matrix(groups) # cluster numbering
View(membership_1)
final1<-cbind(membership_1,mydata) # binding column wise with orginal data
View(final1)
View(aggregate(final1[,-c(2,16:18)],by=list(membership_1),FUN=mean)) # Inferences can be
# drawn from the aggregate of the universities data on membership_1
write.csv(final1,file="wine_cluster.csv",row.names = F,col.names = F)
## Warning in write.csv(final1, file = "wine_cluster.csv", row.names = F,
## col.names = F): attempt to set 'col.names' ignored
getwd()
## [1] "C:/Users/Thiru/Desktop/Assignments In Progress/PCA/PCA_Wine"
# K-Means Clustering :
library(plyr)
## Warning: package 'plyr' was built under R version 3.5.1
mydata <- read.csv(file.choose())
str(mydata)
## 'data.frame': 178 obs. of 18 variables:
## $ membership_1 : int 1 2 3 1 1 1 1 2 1 1 ...
## $ Type : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Alcohol : num 14.2 13.2 13.2 14.4 13.2 ...
## $ Malic : num 1.71 1.78 2.36 1.95 2.59 1.76 1.87 2.15 1.64 1.35 ...
## $ Ash : num 2.43 2.14 2.67 2.5 2.87 2.45 2.45 2.61 2.17 2.27 ...
## $ Alcalinity : num 15.6 11.2 18.6 16.8 21 15.2 14.6 17.6 14 16 ...
## $ Magnesium : int 127 100 101 113 118 112 96 121 97 98 ...
## $ Phenols : num 2.8 2.65 2.8 3.85 2.8 3.27 2.5 2.6 2.8 2.98 ...
## $ Flavanoids : num 3.06 2.76 3.24 3.49 2.69 3.39 2.52 2.51 2.98 3.15 ...
## $ Nonflavanoids : num 0.28 0.26 0.3 0.24 0.39 0.34 0.3 0.31 0.29 0.22 ...
## $ Proanthocyanins: num 2.29 1.28 2.81 2.18 1.82 1.97 1.98 1.25 1.98 1.85 ...
## $ Color : num 5.64 4.38 5.68 7.8 4.32 6.75 5.25 5.05 5.2 7.22 ...
## $ Hue : num 1.04 1.05 1.03 0.86 1.04 1.05 1.02 1.06 1.08 1.01 ...
## $ Dilution : num 3.92 3.4 3.17 3.45 2.93 2.85 3.58 3.58 2.85 3.55 ...
## $ Proline : int 1065 1050 1185 1480 735 1450 1290 1295 1045 1045 ...
## $ Comp.1 : num 3.32 2.21 2.52 3.76 1.01 ...
## $ Comp.2 : num 1.443 -0.333 1.031 2.756 0.87 ...
## $ Comp.3 : num 0.166 2.026 -0.983 0.176 -2.027 ...
View(mydata)
normalized_data<-scale(mydata[,15:17])
wss = (nrow(normalized_data)-1)*sum(apply(normalized_data, 2, var)) # Determine number of clusters by scree-plot
for (i in 1:7) wss[i] = sum(kmeans(normalized_data, centers=i)$withinss)
plot(1:7, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares") # Look for an "elbow" in the scree plot #
title(sub = "K-Means Clustering Scree-Plot")

fit <- eclust(normalized_data, "kmeans", k = 7, nstart = 25, graph = FALSE) # 7 cluster solution
fviz_cluster(fit, geom = "point", frame.type = "norm")
## Warning: argument frame is deprecated; please use ellipse instead.
## Warning: argument frame.type is deprecated; please use ellipse.type
## instead.

final2<- data.frame(fit$cluster,mydata) # append cluster membership
View(final2)
aggregate(mydata[,2:17], by=list(fit$cluster), FUN=mean)
## Group.1 Type Alcohol Malic Ash Alcalinity Magnesium
## 1 1 3.000000 13.38000 3.375417 2.492500 22.06250 102.87500
## 2 2 1.950000 12.36450 1.933000 2.241000 19.52500 99.15000
## 3 3 2.000000 12.12833 1.639667 2.152667 19.81333 87.83333
## 4 4 2.960000 12.93240 3.314800 2.379200 20.84000 95.32000
## 5 5 1.000000 13.93615 1.784615 2.515000 17.00385 107.73077
## 6 6 2.000000 12.38842 2.436842 2.376316 20.94737 95.36842
## 7 7 1.058824 13.55353 2.101176 2.411765 17.48824 107.97059
## Phenols Flavanoids Nonflavanoids Proanthocyanins Color Hue
## 1 1.782500 0.8845833 0.4370833 1.395417 9.132083 0.641250
## 2 2.688000 2.6505000 0.2775000 2.049000 3.660500 1.047500
## 3 2.202667 1.9826667 0.3576667 1.515000 2.656333 1.120667
## 4 1.578000 0.7148000 0.4640000 0.940000 5.626000 0.728800
## 5 2.980769 3.1888462 0.2976923 2.018077 6.328846 1.101538
## 6 1.880526 1.6384211 0.4589474 1.269474 3.150526 0.944000
## 7 2.742059 2.8111765 0.2817647 1.857941 4.800294 1.049118
## Dilution Proline Comp.1 Comp.2
## 1 1.644583 664.1667 -2.570976 2.0441393
## 2 3.046000 606.2500 1.208095 -1.2632790
## 3 2.824000 434.0000 -0.120410 -2.3395956
## 4 1.734000 592.4000 -2.909965 0.4174399
## 5 3.063077 1302.5769 2.746798 1.5328082
## 6 2.472632 525.4211 -1.338027 -1.1181664
## 7 3.229412 976.5294 1.997315 0.5103017
table(fit$cluster)
##
## 1 2 3 4 5 6 7
## 24 20 30 25 26 19 34