cereal <- read.csv("T11-9.DAT.txt", sep="", header=FALSE)
colnames(cereal) = c("Brand","Manufacturer","Calories","Protein","Fat","Sodium","Fiber","Carbohydrates", "Sugar", "Potassium", "Group")
cereal <- cereal[,2:10]
cereal$Carbohydrates <- as.numeric(cereal$Carbohydrates)
head(cereal)
## Manufacturer Calories Protein Fat Sodium Fiber Carbohydrates Sugar
## 1 G 110 2 2 180 1.5 4 10
## 2 G 110 6 2 290 2.0 13 1
## 3 G 110 1 1 180 0.0 7 13
## 4 G 110 1 1 180 0.0 7 13
## 5 G 110 1 1 280 0.0 11 9
## 6 G 110 3 1 250 1.5 6 10
## Potassium
## 1 70
## 2 105
## 3 55
## 4 65
## 5 45
## 6 90
The E(AER) is the total number of misclassified observations/total number of observations.
From our cross validation below, we see that
\(E(AER) = \frac{3+2+4+1+3+0}{43} = \frac{13}{43} = 30.23\)
#######
#Holdout
########
library(MASS)
## Warning: package 'MASS' was built under R version 3.5.2
set.seed(1234)
holdout.class=NULL
z <- lda(Manufacturer ~ ., data=cereal, prior = c(1,1,1)/3, CV = T)
z
## $class
## [1] Q G G G G G G G G G Q G K K G K G K K K K Q K K K K K G K G K K K G G
## [36] K K G G G Q Q Q
## Levels: G K Q
##
## $posterior
## G K Q
## 1 0.278140838 2.889560e-02 6.929636e-01
## 2 0.730719531 1.254803e-01 1.438002e-01
## 3 0.892517863 1.471458e-02 9.276756e-02
## 4 0.908670975 5.893725e-03 8.543530e-02
## 5 0.946025582 2.357346e-02 3.040096e-02
## 6 0.834693817 1.243000e-01 4.100620e-02
## 7 0.863002362 8.328412e-02 5.371351e-02
## 8 0.898741508 1.815321e-02 8.310529e-02
## 9 0.611557678 2.686117e-01 1.198306e-01
## 10 0.885693026 2.967013e-02 8.463685e-02
## 11 0.112184389 2.796065e-04 8.875360e-01
## 12 0.680369344 1.867973e-01 1.328334e-01
## 13 0.089987322 9.099946e-01 1.804610e-05
## 14 0.120934789 8.165935e-01 6.247169e-02
## 15 0.598145848 2.015665e-01 2.002876e-01
## 16 0.104519662 8.519051e-01 4.357519e-02
## 17 0.584428213 3.637771e-01 5.179466e-02
## 18 0.475810090 5.231557e-01 1.034206e-03
## 19 0.026391108 9.707286e-01 2.880285e-03
## 20 0.169418260 8.262758e-01 4.305896e-03
## 21 0.006311571 9.914992e-01 2.189212e-03
## 22 0.004888582 4.913226e-07 9.951109e-01
## 23 0.019651176 9.795082e-01 8.406046e-04
## 24 0.201829875 6.770748e-01 1.210954e-01
## 25 0.021316051 9.778924e-01 7.915461e-04
## 26 0.002358525 9.912760e-01 6.365446e-03
## 27 0.001825402 9.981740e-01 6.170877e-07
## 28 0.581189584 3.172359e-01 1.015745e-01
## 29 0.161801169 8.377511e-01 4.477056e-04
## 30 0.829202368 1.232868e-01 4.751087e-02
## 31 0.128971096 8.686543e-01 2.374617e-03
## 32 0.013130587 9.833586e-01 3.510799e-03
## 33 0.375295542 6.218472e-01 2.857262e-03
## 34 0.803257106 1.946352e-01 2.107715e-03
## 35 0.776510431 2.191418e-01 4.347771e-03
## 36 0.075498413 9.237816e-01 7.199928e-04
## 37 0.338119094 6.603473e-01 1.533593e-03
## 38 0.967229996 1.787118e-02 1.489882e-02
## 39 0.719405356 2.741577e-01 6.436896e-03
## 40 0.633077966 3.485389e-02 3.320681e-01
## 41 0.040617057 3.060885e-04 9.590769e-01
## 42 0.014304551 1.343497e-04 9.855611e-01
## 43 0.040795598 1.730986e-02 9.418945e-01
##
## $terms
## Manufacturer ~ Calories + Protein + Fat + Sodium + Fiber + Carbohydrates +
## Sugar + Potassium
## attr(,"variables")
## list(Manufacturer, Calories, Protein, Fat, Sodium, Fiber, Carbohydrates,
## Sugar, Potassium)
## attr(,"factors")
## Calories Protein Fat Sodium Fiber Carbohydrates Sugar
## Manufacturer 0 0 0 0 0 0 0
## Calories 1 0 0 0 0 0 0
## Protein 0 1 0 0 0 0 0
## Fat 0 0 1 0 0 0 0
## Sodium 0 0 0 1 0 0 0
## Fiber 0 0 0 0 1 0 0
## Carbohydrates 0 0 0 0 0 1 0
## Sugar 0 0 0 0 0 0 1
## Potassium 0 0 0 0 0 0 0
## Potassium
## Manufacturer 0
## Calories 0
## Protein 0
## Fat 0
## Sodium 0
## Fiber 0
## Carbohydrates 0
## Sugar 0
## Potassium 1
## attr(,"term.labels")
## [1] "Calories" "Protein" "Fat" "Sodium"
## [5] "Fiber" "Carbohydrates" "Sugar" "Potassium"
## attr(,"order")
## [1] 1 1 1 1 1 1 1 1
## attr(,"intercept")
## [1] 1
## attr(,"response")
## [1] 1
## attr(,".Environment")
## <environment: R_GlobalEnv>
## attr(,"predvars")
## list(Manufacturer, Calories, Protein, Fat, Sodium, Fiber, Carbohydrates,
## Sugar, Potassium)
## attr(,"dataClasses")
## Manufacturer Calories Protein Fat Sodium
## "factor" "numeric" "numeric" "numeric" "numeric"
## Fiber Carbohydrates Sugar Potassium
## "numeric" "numeric" "numeric" "numeric"
##
## $call
## lda(formula = Manufacturer ~ ., data = cereal, prior = c(1, 1,
## 1)/3, CV = T)
##
## $xlevels
## named list()
holdout.class=z$class
table(cereal$Manufacturer,holdout.class)
## holdout.class
## G K Q
## G 12 3 2
## K 4 15 1
## Q 3 0 3
From the LDA coefficient 1, we see that Fat and Fiber have the highest loading. When looking at the group means, we notice that the fat levels for Kellogs cereal is much less than the other two brands. Additionally, the fiber is quite different between the groups. There are 3 different levels of fiber, with Kellogs being the highest. From LDA, I would argue that Kelloggs is the healthiest brand.
z <- lda(Manufacturer ~ ., data=cereal, prior = c(1,1,1)/3)
z
## Call:
## lda(Manufacturer ~ ., data = cereal, prior = c(1, 1, 1)/3)
##
## Prior probabilities of groups:
## G K Q
## 0.3333333 0.3333333 0.3333333
##
## Group means:
## Calories Protein Fat Sodium Fiber Carbohydrates Sugar
## G 110.5882 2.352941 1.235294 203.52941 1.294118 9.705882 8.117647
## K 111.0000 2.600000 0.650000 185.50000 2.250000 12.050000 7.950000
## Q 90.0000 2.333333 1.333333 98.33333 1.116667 5.500000 5.000000
## Potassium
## G 85.00000
## K 91.75000
## Q 58.33333
##
## Coefficients of linear discriminants:
## LD1 LD2
## Calories -0.042394995 -0.022385273
## Protein -0.192736441 0.043708862
## Fat 1.030238269 0.230557790
## Sodium -0.002097074 0.008271173
## Fiber -0.938811912 -1.424824908
## Carbohydrates -0.112847256 0.016979986
## Sugar -0.103510958 0.070253284
## Potassium 0.019779960 0.035706831
##
## Proportion of trace:
## LD1 LD2
## 0.8095 0.1905
Below, we can see the different clusters that exist. We see that all three brands have their own clusters.
plot(z)
Euclidean distance matrix is below.
library(robustHD)
## Loading required package: ggplot2
## Loading required package: perry
## Loading required package: parallel
## Loading required package: robustbase
cereal <- read.csv("T11-9.DAT.txt", sep="", header=FALSE)
colnames(cereal) = c("Brand","Manufacturer","Calories","Protein","Fat","Sodium","Fiber","Carbohydrates", "Sugar", "Potassium", "Group")
cereal <- cereal[,1:10]
cereal$Carbohydrates <- as.numeric(cereal$Carbohydrates)
cereal$Fiber <- as.numeric(cereal$Fiber)
cereal <- as.data.frame(cereal)
std = standardize(cereal[3:10])
rownames(std) = cereal$Brand
std$Manufacturer <- cereal$Manufacturer
cereal <- std
d=dist(cereal,method = "euclidean");d # distance matrix
## Warning in dist(cereal, method = "euclidean"): NAs introduced by coercion
## ACCheerios Cheerios CocoaPuffs CountChocula
## Cheerios 4.8395420
## CocoaPuffs 2.0788869 5.8827377
## CountChocula 2.0664677 5.8630142 0.1604395
## GoldenGrahams 2.8169231 5.1614788 1.8855412 1.9059087
## HoneyNutCheerios 1.9240801 4.0041958 2.3528874 2.3198349
## Kix 3.7879494 4.1414714 3.4335717 3.4485328
## LuckyCharms 1.8151598 5.1542845 0.8990474 0.9132508
## MultiGrainCheerios 2.4607348 4.0793019 2.5688929 2.5386544
## OatmealRaisinCrisp 2.0031230 4.0062413 2.9345402 2.8814295
## RaisinNutBran 1.8428539 4.5528547 3.4800349 3.4203494
## TotalCornFlakes 3.6546522 4.3146748 3.2800497 3.2996106
## TotalRaisinBran 4.1967807 5.2922293 4.4951255 4.3966993
## TotalWholeGrain 3.1952659 3.3053677 3.7574902 3.7230800
## Trix 2.2750180 5.9686791 0.7918774 0.8984741
## Cheaties 3.3354923 3.2972088 3.8355312 3.8018274
## WheatiesHoneyGold 2.3668926 4.3431622 1.9730066 1.9730066
## AllBran 7.6469762 6.4091900 8.3170784 8.2362170
## AppleJacks 2.9959472 6.1989783 1.9560170 1.9951059
## CornFlakes 4.5985231 4.6394901 4.0516857 4.0675375
## CornPops 3.3200728 6.5716879 1.9937690 2.0447595
## CracklinOatBran 2.7558597 4.7001921 4.6406146 4.5848105
## Crispix 4.2693894 4.7372295 3.6236442 3.6448927
## FrootLoops 1.8271521 5.6396862 1.4212052 1.4745403
## FrostedFlakes 3.2300886 5.9367847 1.7623557 1.8127557
## FrostedMiniWheats 4.1624719 5.6556479 4.1218081 4.0967518
## FruitfulBran 4.3404259 5.1610680 4.4319666 4.3558083
## JustRightCrunchyNuggets 2.6683735 4.3158584 2.3965832 2.3965832
## MueslixCrispyBlend 4.0563728 5.2069243 4.5441050 4.4871008
## NutNHoneyCrunch 2.4062287 4.7017903 1.6966933 1.7267691
## NutriGrainAlmondRaisin 3.7164966 3.6655756 4.3328823 4.2910951
## NutriGrainWheat 4.3357755 4.2585203 4.3280571 4.3101779
## Product19 4.5859791 3.9963663 4.2285460 4.2376673
## RaisinBran 4.1028175 4.9912741 4.6395336 4.5385683
## RiceKrispies 4.6963214 4.7744937 3.9536620 3.9699052
## Smacks 4.2103303 6.0478606 3.3621941 3.3774713
## SpecialK 5.0777354 2.9858072 5.3104961 5.3129191
## CapNCrunch 1.7759102 5.5890302 1.5838805 1.6240018
## HoneyGrahamOhs 1.4630121 5.3583639 1.7155068 1.7378683
## Life 2.2573877 3.1895146 3.6727050 3.6480921
## PuffedRice 5.7196404 7.5536294 5.3405653 5.3622109
## PuffedWheat 5.4572823 7.2087553 5.4795420 5.4842377
## QuakerOatmeal 4.3277625 4.9069806 5.7493226 5.7268929
## GoldenGrahams HoneyNutCheerios Kix
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios 2.4249460
## Kix 2.0363262 3.2035500
## LuckyCharms 1.9814624 1.7325936 3.1618146
## MultiGrainCheerios 2.0285480 1.8804215 2.1146688
## OatmealRaisinCrisp 3.2689463 2.2059345 3.5994145
## RaisinNutBran 4.1839947 2.5110149 4.6979490
## TotalCornFlakes 2.2894683 3.2906179 0.8073886
## TotalRaisinBran 4.7976346 3.5913830 5.2211504
## TotalWholeGrain 3.2728604 2.5027713 2.6386432
## Trix 2.1433356 2.7363199 3.3569653
## Cheaties 3.2974572 2.6389715 2.5661937
## WheatiesHoneyGold 1.5540343 1.9179079 1.8233389
## AllBran 7.9363273 6.9326776 7.5352258
## AppleJacks 3.2440745 2.6923319 4.3041352
## CornFlakes 2.6897085 3.5855316 1.6230053
## CornPops 3.1159403 3.3373810 4.0016201
## CracklinOatBran 5.1151851 3.6858458 5.4393730
## Crispix 2.6212285 3.4272181 1.5529363
## FrootLoops 2.8775827 2.2615157 3.9529741
## FrostedFlakes 1.9031476 2.7028148 2.9990267
## FrostedMiniWheats 4.8284181 3.9282711 4.6696827
## FruitfulBran 4.4733538 3.1612737 4.8719856
## JustRightCrunchyNuggets 2.0089646 2.3945653 1.6969269
## MueslixCrispyBlend 4.8202389 4.0664538 5.0276571
## NutNHoneyCrunch 1.5890145 2.1303842 2.1227452
## NutriGrainAlmondRaisin 3.8442375 3.4350622 3.4195412
## NutriGrainWheat 3.8727844 3.4464556 3.0412532
## Product19 2.9228477 3.2547034 1.9674967
## RaisinBran 4.8194379 3.4247162 5.1832315
## RiceKrispies 2.5409090 3.6809752 1.4046970
## Smacks 3.7986813 4.2147016 3.8957828
## SpecialK 4.8410088 3.6913554 3.9035963
## CapNCrunch 2.0222036 2.6595281 3.4500434
## HoneyGrahamOhs 2.0341378 2.4406298 3.3646562
## Life 3.9069397 2.3733073 3.7778537
## PuffedRice 5.6752768 5.9279035 5.4755710
## PuffedWheat 6.0039557 5.6051292 5.9242671
## QuakerOatmeal 6.3784777 4.7605236 6.0910273
## LuckyCharms MultiGrainCheerios OatmealRaisinCrisp
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios
## Kix
## LuckyCharms
## MultiGrainCheerios 2.2660288
## OatmealRaisinCrisp 2.4652237 2.6788328
## RaisinNutBran 3.0590465 2.9941773 2.5068259
## TotalCornFlakes 2.9943919 2.0956661 3.4463407
## TotalRaisinBran 4.2555039 3.9814340 2.9176366
## TotalWholeGrain 3.2892486 1.3497011 2.9492695
## Trix 1.1515278 2.6728689 3.0912068
## Cheaties 3.3781243 1.4082976 3.0125203
## WheatiesHoneyGold 1.6269942 1.1118023 2.3736532
## AllBran 8.0365206 6.2812439 7.1264959
## AppleJacks 1.7989986 3.2240681 3.6444368
## CornFlakes 3.8097866 2.4449223 4.6087536
## CornPops 2.1619578 3.1479402 3.9181821
## CracklinOatBran 4.3212366 3.9208932 2.8598289
## Crispix 3.3672621 2.2753290 4.1169026
## FrootLoops 1.1492944 2.7969732 2.7649059
## FrostedFlakes 1.9223695 2.3969103 3.7351142
## FrostedMiniWheats 3.7586014 3.4153333 4.0536755
## FruitfulBran 4.1625484 3.3986897 3.7400451
## JustRightCrunchyNuggets 2.0686437 1.2472135 2.5506853
## MueslixCrispyBlend 4.2944978 4.3752687 2.3309748
## NutNHoneyCrunch 1.3200238 1.9843382 2.3640958
## NutriGrainAlmondRaisin 3.9889411 3.1094469 2.1679839
## NutriGrainWheat 3.9144812 2.2300370 4.2270507
## Product19 3.8185108 2.5535685 4.4729217
## RaisinBran 4.3828933 3.6004724 3.2537880
## RiceKrispies 3.7200940 2.7463964 4.5122322
## Smacks 3.2899973 3.6350541 3.7465604
## SpecialK 4.5144318 3.9177413 4.4746833
## CapNCrunch 1.7909841 2.9588010 2.6174737
## HoneyGrahamOhs 1.8795133 2.6178674 2.3622674
## Life 3.0016615 2.5520271 2.2309957
## PuffedRice 5.2828504 4.9285302 6.6598568
## PuffedWheat 5.2825201 4.9417671 6.4580693
## QuakerOatmeal 5.1499504 4.9232722 4.4059097
## RaisinNutBran TotalCornFlakes TotalRaisinBran
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios
## Kix
## LuckyCharms
## MultiGrainCheerios
## OatmealRaisinCrisp
## RaisinNutBran
## TotalCornFlakes 4.5164416
## TotalRaisinBran 4.0498078 5.1860712
## TotalWholeGrain 3.0772080 2.5502192 3.9678456
## Trix 3.6699821 3.0497077 4.8329933
## Cheaties 3.2558368 2.4751829 3.9881585
## WheatiesHoneyGold 3.3584962 1.6544004 4.0217136
## AllBran 6.7235024 7.6257621 5.8985991
## AppleJacks 3.8029094 4.0307853 4.6303148
## CornFlakes 5.2858335 1.9795437 5.6067309
## CornPops 4.2525259 3.5994248 4.9553639
## CracklinOatBran 1.7485873 5.2882682 4.2019162
## Crispix 5.0041481 1.4755197 5.1832579
## FrootLoops 2.9336370 3.6534443 4.4433069
## FrostedFlakes 4.3708584 2.8838419 4.6998869
## FrostedMiniWheats 3.8106643 4.1253405 4.5449985
## FruitfulBran 4.1457674 4.9339577 2.1171503
## JustRightCrunchyNuggets 3.4850193 1.2831974 4.2332413
## MueslixCrispyBlend 4.3987476 4.8858453 2.2733263
## NutNHoneyCrunch 3.6562425 1.9109930 4.2688969
## NutriGrainAlmondRaisin 4.1080372 3.4228961 2.9765780
## NutriGrainWheat 4.2350698 2.8449038 4.8363250
## Product19 5.1371894 2.4138501 5.4035659
## RaisinBran 3.5984788 5.1919983 1.4029245
## RiceKrispies 5.5320483 1.8048880 5.5690549
## Smacks 5.0298549 3.4265444 4.6980531
## SpecialK 4.9948837 3.9093619 5.4142885
## CapNCrunch 3.5558213 3.4177796 4.7546355
## HoneyGrahamOhs 3.2181305 3.3354265 4.4123988
## Life 1.7161277 3.5635673 4.2704574
## PuffedRice 5.7431220 4.9975574 8.0761154
## PuffedWheat 5.0507591 5.4936996 7.7417351
## QuakerOatmeal 3.0833089 5.6867260 5.9296891
## TotalWholeGrain Trix Cheaties
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios
## Kix
## LuckyCharms
## MultiGrainCheerios
## OatmealRaisinCrisp
## RaisinNutBran
## TotalCornFlakes
## TotalRaisinBran
## TotalWholeGrain
## Trix 3.7749197
## Cheaties 0.2320985 3.8385995
## WheatiesHoneyGold 2.1134221 1.9489778 2.1261285
## AllBran 5.4984340 8.5429068 5.4442823
## AppleJacks 4.0147698 1.8981322 4.1141733
## CornFlakes 2.7770256 4.0174234 2.7082805
## CornPops 4.0016526 1.5980193 4.0617792
## CracklinOatBran 3.7674040 4.7942925 3.9008790
## Crispix 2.6055401 3.4470899 2.5321436
## FrootLoops 3.6357669 1.2989949 3.7452432
## FrostedFlakes 3.4702691 1.7362596 3.5088625
## FrostedMiniWheats 3.1684403 3.8406189 3.2106638
## FruitfulBran 3.3121306 4.7730950 3.3525450
## JustRightCrunchyNuggets 1.9511702 2.2119353 1.9373166
## MueslixCrispyBlend 4.4294852 4.6712849 4.4234002
## NutNHoneyCrunch 2.9039652 1.5966481 2.9316588
## NutriGrainAlmondRaisin 2.9437375 4.4167079 2.8789760
## NutriGrainWheat 1.6120341 4.2189290 1.5611037
## Product19 2.7010455 4.2902159 2.6507166
## RaisinBran 3.4135839 5.0007311 3.4528109
## RiceKrispies 3.1925220 3.9187468 3.1156651
## Smacks 4.1544732 3.0079582 4.0693170
## SpecialK 3.3431642 5.2833230 3.3512112
## CapNCrunch 4.0544785 1.8141783 4.1269071
## HoneyGrahamOhs 3.6621772 1.9418121 3.7422064
## Life 2.2798104 3.6793939 2.4062631
## PuffedRice 5.1535343 4.9416605 5.2003599
## PuffedWheat 4.9664537 5.1964416 5.0684502
## QuakerOatmeal 4.3250124 5.5931944 4.4659529
## WheatiesHoneyGold AllBran AppleJacks CornFlakes
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios
## Kix
## LuckyCharms
## MultiGrainCheerios
## OatmealRaisinCrisp
## RaisinNutBran
## TotalCornFlakes
## TotalRaisinBran
## TotalWholeGrain
## Trix
## Cheaties
## WheatiesHoneyGold
## AllBran 7.1228334
## AppleJacks 2.7567024 8.3938374
## CornFlakes 2.5509477 7.1830933 4.4261664
## CornPops 2.6120893 8.4965149 1.3047962 4.1485443
## CracklinOatBran 4.3040427 6.5544395 5.1893668 6.2035553
## Crispix 2.0696747 7.3741266 3.8407513 1.1190600
## FrootLoops 2.2905308 8.2325944 1.3439414 4.4816349
## FrostedFlakes 1.8782858 8.0149199 1.8992958 2.9852704
## FrostedMiniWheats 3.4729475 6.7877832 3.2472366 4.6492408
## FruitfulBran 3.7556680 5.1410444 4.1415516 4.6479077
## JustRightCrunchyNuggets 0.6587180 7.1071535 3.0484042 2.4832179
## MueslixCrispyBlend 4.0193123 7.3448129 4.9931016 5.9196142
## NutNHoneyCrunch 0.9431881 7.9719745 2.5497008 3.0289589
## NutriGrainAlmondRaisin 2.9931223 6.3521283 5.0351694 4.3267529
## NutriGrainWheat 2.7789269 5.7174181 4.1133854 2.4560516
## Product19 2.7035260 6.9551832 4.4658370 1.0243326
## RaisinBran 4.0092929 4.7278481 4.7529231 5.3560370
## RiceKrispies 2.5348805 7.6979261 4.4280212 0.8767621
## Smacks 2.9071272 7.9013404 3.5962108 4.5246265
## SpecialK 3.9171308 7.2390974 4.8425276 3.7464018
## CapNCrunch 2.3705175 8.6539133 3.2370513 4.4795971
## HoneyGrahamOhs 2.1877390 8.1749947 3.2335481 4.3250786
## Life 2.7848011 6.5853336 3.9490034 4.4773767
## PuffedRice 5.1149254 8.9861697 5.1397252 5.2692950
## PuffedWheat 5.2978644 8.5222680 5.0091275 5.6811643
## QuakerOatmeal 5.1829672 7.6828471 5.4456822 6.5950249
## CornPops CracklinOatBran Crispix FrootLoops
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios
## Kix
## LuckyCharms
## MultiGrainCheerios
## OatmealRaisinCrisp
## RaisinNutBran
## TotalCornFlakes
## TotalRaisinBran
## TotalWholeGrain
## Trix
## Cheaties
## WheatiesHoneyGold
## AllBran
## AppleJacks
## CornFlakes
## CornPops
## CracklinOatBran 5.4874812
## Crispix 3.4183714 5.9263003
## FrootLoops 1.8138336 4.1786692 3.9185218
## FrostedFlakes 1.5639639 5.5458612 2.5039862 2.2551018
## FrostedMiniWheats 3.0657815 4.8481031 3.8998988 3.4037253
## FruitfulBran 4.5360457 4.7386360 4.4334197 4.3289662
## JustRightCrunchyNuggets 2.7164842 4.3662600 1.8461460 2.5925300
## MueslixCrispyBlend 5.1302419 4.1579863 5.3053751 4.4296577
## NutNHoneyCrunch 2.4572359 4.6317979 2.4317974 2.0633557
## NutriGrainAlmondRaisin 5.0183255 3.9237947 3.9258765 4.4031463
## NutriGrainWheat 3.9332175 5.1642435 2.2448490 4.1730135
## Product19 4.4766383 6.0867802 1.7236497 4.5328956
## RaisinBran 5.0886579 3.7680896 5.1102825 4.5469506
## RiceKrispies 4.1379106 6.4471987 1.1341609 4.4956439
## Smacks 3.1128873 5.7237973 3.7641122 3.3682686
## SpecialK 5.2877195 5.9331232 3.6191972 4.9044384
## CapNCrunch 3.2875493 4.2469694 4.1494532 2.2494368
## HoneyGrahamOhs 3.2585449 3.8071574 3.9990695 2.2199005
## Life 4.3086079 2.4959685 4.1835649 3.0854585
## PuffedRice 4.5754636 6.9674891 5.0032542 5.1664493
## PuffedWheat 4.7867483 6.3410133 5.4436021 5.0365446
## QuakerOatmeal 5.7595799 3.4651268 6.1457224 4.7993940
## FrostedFlakes FrostedMiniWheats FruitfulBran
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios
## Kix
## LuckyCharms
## MultiGrainCheerios
## OatmealRaisinCrisp
## RaisinNutBran
## TotalCornFlakes
## TotalRaisinBran
## TotalWholeGrain
## Trix
## Cheaties
## WheatiesHoneyGold
## AllBran
## AppleJacks
## CornFlakes
## CornPops
## CracklinOatBran
## Crispix
## FrootLoops
## FrostedFlakes
## FrostedMiniWheats 3.7697934
## FruitfulBran 4.0294662 4.0520982
## JustRightCrunchyNuggets 2.1984958 3.2156808 4.0074234
## MueslixCrispyBlend 5.0070272 5.0671378 3.9524938
## NutNHoneyCrunch 1.8744844 3.8150843 4.2400954
## NutriGrainAlmondRaisin 4.4663435 4.7809391 3.7351257
## NutriGrainWheat 3.4920248 2.7836187 3.7397265
## Product19 3.2950689 4.7696258 4.3755158
## RaisinBran 4.7549902 4.3255983 1.5988714
## RiceKrispies 2.9525556 4.8860911 4.8429429
## Smacks 3.2910778 3.7249561 4.9200003
## SpecialK 4.7903849 4.4250496 4.6912831
## CapNCrunch 2.8815205 5.1005298 5.0446758
## HoneyGrahamOhs 2.8245732 4.8453061 4.6479789
## Life 4.2438016 3.5618941 4.2855722
## PuffedRice 5.0627631 4.3258365 7.3021263
## PuffedWheat 5.3476130 3.9902882 6.8933968
## QuakerOatmeal 6.2643928 4.0508119 6.0052004
## JustRightCrunchyNuggets MueslixCrispyBlend
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios
## Kix
## LuckyCharms
## MultiGrainCheerios
## OatmealRaisinCrisp
## RaisinNutBran
## TotalCornFlakes
## TotalRaisinBran
## TotalWholeGrain
## Trix
## Cheaties
## WheatiesHoneyGold
## AllBran
## AppleJacks
## CornFlakes
## CornPops
## CracklinOatBran
## Crispix
## FrootLoops
## FrostedFlakes
## FrostedMiniWheats
## FruitfulBran
## JustRightCrunchyNuggets
## MueslixCrispyBlend 4.1275381
## NutNHoneyCrunch 1.2419173 3.9647353
## NutriGrainAlmondRaisin 2.9930253 2.1940417
## NutriGrainWheat 2.5111886 5.5071730
## Product19 2.7599329 5.8241231
## RaisinBran 4.2078628 3.3130992
## RiceKrispies 2.4890955 5.7069207
## Smacks 2.8746169 4.3271318
## SpecialK 3.8739059 5.7010208
## CapNCrunch 2.7716104 4.2472614
## HoneyGrahamOhs 2.5748774 3.9809501
## Life 2.7843307 4.2500624
## PuffedRice 4.8089941 8.4269451
## PuffedWheat 5.0566813 8.2920395
## QuakerOatmeal 4.9799564 5.8829582
## NutNHoneyCrunch NutriGrainAlmondRaisin
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios
## Kix
## LuckyCharms
## MultiGrainCheerios
## OatmealRaisinCrisp
## RaisinNutBran
## TotalCornFlakes
## TotalRaisinBran
## TotalWholeGrain
## Trix
## Cheaties
## WheatiesHoneyGold
## AllBran
## AppleJacks
## CornFlakes
## CornPops
## CracklinOatBran
## Crispix
## FrootLoops
## FrostedFlakes
## FrostedMiniWheats
## FruitfulBran
## JustRightCrunchyNuggets
## MueslixCrispyBlend
## NutNHoneyCrunch
## NutriGrainAlmondRaisin 3.2703362
## NutriGrainWheat 3.5107777 4.1551370
## Product19 3.1574941 4.2402895
## RaisinBran 4.5116560 3.2910215
## RiceKrispies 2.7876393 4.2227260
## Smacks 2.9423991 4.1002306
## SpecialK 4.1032070 4.6038159
## CapNCrunch 2.0067049 3.9301339
## HoneyGrahamOhs 2.0250961 3.5711474
## Life 3.1346659 3.4228416
## PuffedRice 5.4196322 7.5287995
## PuffedWheat 5.6386160 7.5222327
## QuakerOatmeal 5.3793586 5.5603840
## NutriGrainWheat Product19 RaisinBran RiceKrispies
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios
## Kix
## LuckyCharms
## MultiGrainCheerios
## OatmealRaisinCrisp
## RaisinNutBran
## TotalCornFlakes
## TotalRaisinBran
## TotalWholeGrain
## Trix
## Cheaties
## WheatiesHoneyGold
## AllBran
## AppleJacks
## CornFlakes
## CornPops
## CracklinOatBran
## Crispix
## FrootLoops
## FrostedFlakes
## FrostedMiniWheats
## FruitfulBran
## JustRightCrunchyNuggets
## MueslixCrispyBlend
## NutNHoneyCrunch
## NutriGrainAlmondRaisin
## NutriGrainWheat
## Product19 2.5231860
## RaisinBran 4.2852086 5.1278199
## RiceKrispies 3.0052824 1.3477272 5.5261364
## Smacks 4.2624139 4.7711823 5.0291303 4.3440737
## SpecialK 3.2640698 3.0096565 5.2850702 3.8079535
## CapNCrunch 5.0294664 4.6166268 4.9773278 4.3319821
## HoneyGrahamOhs 4.7182520 4.4733227 4.5465743 4.2673085
## Life 3.4658776 4.2275074 3.9234809 4.6949875
## PuffedRice 4.4645376 5.7359671 7.6026522 5.6609637
## PuffedWheat 4.4132311 5.9317249 7.1482348 6.1473866
## QuakerOatmeal 4.9807149 6.4225135 5.5758667 6.8680414
## Smacks SpecialK CapNCrunch HoneyGrahamOhs
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios
## Kix
## LuckyCharms
## MultiGrainCheerios
## OatmealRaisinCrisp
## RaisinNutBran
## TotalCornFlakes
## TotalRaisinBran
## TotalWholeGrain
## Trix
## Cheaties
## WheatiesHoneyGold
## AllBran
## AppleJacks
## CornFlakes
## CornPops
## CracklinOatBran
## Crispix
## FrootLoops
## FrostedFlakes
## FrostedMiniWheats
## FruitfulBran
## JustRightCrunchyNuggets
## MueslixCrispyBlend
## NutNHoneyCrunch
## NutriGrainAlmondRaisin
## NutriGrainWheat
## Product19
## RaisinBran
## RiceKrispies
## Smacks
## SpecialK 5.3823011
## CapNCrunch 3.9312855 5.6923203
## HoneyGrahamOhs 3.9357586 5.5721092 0.6541480
## Life 4.4201531 3.7521658 3.6327039 3.3570111
## PuffedRice 5.8281390 6.4571154 6.2477120 6.1848769
## PuffedWheat 6.3248209 6.1368075 6.3951560 6.2643142
## QuakerOatmeal 6.3371836 5.1065222 5.8755173 5.6110845
## Life PuffedRice PuffedWheat
## Cheerios
## CocoaPuffs
## CountChocula
## GoldenGrahams
## HoneyNutCheerios
## Kix
## LuckyCharms
## MultiGrainCheerios
## OatmealRaisinCrisp
## RaisinNutBran
## TotalCornFlakes
## TotalRaisinBran
## TotalWholeGrain
## Trix
## Cheaties
## WheatiesHoneyGold
## AllBran
## AppleJacks
## CornFlakes
## CornPops
## CracklinOatBran
## Crispix
## FrootLoops
## FrostedFlakes
## FrostedMiniWheats
## FruitfulBran
## JustRightCrunchyNuggets
## MueslixCrispyBlend
## NutNHoneyCrunch
## NutriGrainAlmondRaisin
## NutriGrainWheat
## Product19
## RaisinBran
## RiceKrispies
## Smacks
## SpecialK
## CapNCrunch
## HoneyGrahamOhs
## Life
## PuffedRice 5.5406508
## PuffedWheat 5.0566903 1.6622826
## QuakerOatmeal 2.8847652 5.8678267 4.8791908
The different dendrograms are plotted using average linkage, single linkage, and complete linkage.
#Average Linkage, Euclidean
d=dist(cereal,method = "euclidean") # distance matrix
## Warning in dist(cereal, method = "euclidean"): NAs introduced by coercion
fit=hclust(d, method="ave")
plot(fit,cex=0.5, labels=cereal$Brand, main = "Average Linkage, Euclidean") # display dendogram
groups=cutree(fit, k=3) # cut tree into 3 clusters
rect.hclust(fit, k=3, border="red")
#Single Linkage, Euclidean
d=dist(cereal,method = "euclidean") # distance matrix
## Warning in dist(cereal, method = "euclidean"): NAs introduced by coercion
fit=hclust(d, method="single")
plot(fit,cex=0.5, labels= cereal$Brand, main = "Single Linkage, Euclidean") # display dendogram
groups=cutree(fit, k=3) # cut tree into 3 clusters
rect.hclust(fit, k=3, border="red")
#Complete Linkage, Euclidean
d=dist(cereal,method = "euclidean") # distance matrix
## Warning in dist(cereal, method = "euclidean"): NAs introduced by coercion
fit=hclust(d, method="complete")
plot(fit,cex=0.5, labels =cereal$Brand, main = "Complete Linkage, Euclidean") # display dendogram
groups=cutree(fit, k=3) # cut tree into 3 clusters
rect.hclust(fit, k=3, border="red")
In comparison to part E, we notice that from the Puffed Rice, Puffed Wheat and All Bran all form separate clusters (when K = 4 and with average linkage). After inspecting them further, they seem to be the healthiest cereals which clusters them together.
Additionally, with K-Means we can see that all the types of clustering include one cluster that is generally “unhealthy” brands of cereal. With K = 2, the blue is generally unhealthier. With K = 3, the green is the unhealthiest. With K = 4, the blue is the unhealthiest. We notice with complete linkage, all the cereals like Cocoa Puffs, Trix, Lucky Charms, etc. are very close to each other. This is expected.
library(factoextra)
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
cereal <- read.csv("T11-9.DAT.txt", sep="", header=FALSE)
colnames(cereal) = c("Brand","Manufacturer","Calories","Protein","Fat","Sodium","Fiber","Carbohydrates", "Sugar", "Potassium", "Group")
cereal$Carbohydrates <- as.numeric(cereal$Carbohydrates)
df <- na.omit(cereal)
df <- scale(df[,3:10])
head(df)
## Calories Protein Fat Sodium Fiber Carbohydrates
## 1 0.1103426 -0.3806804 1.2767742 -0.005871679 -0.1189104 -1.3587478
## 2 0.1103426 2.8931707 1.2767742 1.382780515 0.1589780 0.6106732
## 3 0.1103426 -1.1991431 0.0290176 -0.005871679 -0.9525758 -0.7022742
## 4 0.1103426 -1.1991431 0.0290176 -0.005871679 -0.9525758 -0.7022742
## 5 0.1103426 -1.1991431 0.0290176 1.256539406 -0.9525758 0.1730241
## 6 0.1103426 0.4377824 0.0290176 0.877816080 -0.1189104 -0.9210987
## Sugar Potassium
## 1 0.5280395 -0.21810133
## 2 -1.4559536 0.31132205
## 3 1.1893705 -0.44499706
## 4 1.1893705 -0.29373324
## 5 0.3075958 -0.59626088
## 6 0.5280395 0.08442632
rownames(df) <- cereal$Brand
head(df)
## Calories Protein Fat Sodium Fiber
## ACCheerios 0.1103426 -0.3806804 1.2767742 -0.005871679 -0.1189104
## Cheerios 0.1103426 2.8931707 1.2767742 1.382780515 0.1589780
## CocoaPuffs 0.1103426 -1.1991431 0.0290176 -0.005871679 -0.9525758
## CountChocula 0.1103426 -1.1991431 0.0290176 -0.005871679 -0.9525758
## GoldenGrahams 0.1103426 -1.1991431 0.0290176 1.256539406 -0.9525758
## HoneyNutCheerios 0.1103426 0.4377824 0.0290176 0.877816080 -0.1189104
## Carbohydrates Sugar Potassium
## ACCheerios -1.3587478 0.5280395 -0.21810133
## Cheerios 0.6106732 -1.4559536 0.31132205
## CocoaPuffs -0.7022742 1.1893705 -0.44499706
## CountChocula -0.7022742 1.1893705 -0.29373324
## GoldenGrahams 0.1730241 0.3075958 -0.59626088
## HoneyNutCheerios -0.9210987 0.5280395 0.08442632
set.seed(123)
k2 <- kmeans(df, centers = 2, nstart = 25)
k3 <- kmeans(df, centers = 3, nstart = 25)
k4 <- kmeans(df, centers = 4, nstart = 25)
fviz_cluster(k2, data = df)
fviz_cluster(k3, data = df)
fviz_cluster(k4, data = df)