Scotch Whisky
Dataset includes scores of 86 malt scotch whiskies
#Load library
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#Load data
data_set <- read_csv("whisky.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## RowID = col_double(),
## Distillery = col_character(),
## Body = col_double(),
## Sweetness = col_double(),
## Smoky = col_double(),
## Medicinal = col_double(),
## Tobacco = col_double(),
## Honey = col_double(),
## Spicy = col_double(),
## Winey = col_double(),
## Nutty = col_double(),
## Malty = col_double(),
## Fruity = col_double(),
## Floral = col_double(),
## Postcode = col_character(),
## Latitude = col_double(),
## Longitude = col_double()
## )
head(data_set, 10)
## # A tibble: 10 x 17
## RowID Distillery Body Sweetness Smoky Medicinal Tobacco Honey Spicy Winey
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Aberfeldy 2 2 2 0 0 2 1 2
## 2 2 Aberlour 3 3 1 0 0 4 3 2
## 3 3 AnCnoc 1 3 2 0 0 2 0 0
## 4 4 Ardbeg 4 1 4 4 0 0 2 0
## 5 5 Ardmore 2 2 2 0 0 1 1 1
## 6 6 ArranIsle~ 2 3 1 1 0 1 1 1
## 7 7 Auchentos~ 0 2 0 0 0 1 1 0
## 8 8 Auchroisk 2 3 1 0 0 2 1 2
## 9 9 Aultmore 2 2 1 0 0 1 0 0
## 10 10 Balblair 2 3 2 1 0 0 2 0
## # ... with 7 more variables: Nutty <dbl>, Malty <dbl>, Fruity <dbl>,
## # Floral <dbl>, Postcode <chr>, Latitude <dbl>, Longitude <dbl>
dim(data_set)
## [1] 86 17
str(data_set)
## tibble [86 x 17] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ RowID : num [1:86] 1 2 3 4 5 6 7 8 9 10 ...
## $ Distillery: chr [1:86] "Aberfeldy" "Aberlour" "AnCnoc" "Ardbeg" ...
## $ Body : num [1:86] 2 3 1 4 2 2 0 2 2 2 ...
## $ Sweetness : num [1:86] 2 3 3 1 2 3 2 3 2 3 ...
## $ Smoky : num [1:86] 2 1 2 4 2 1 0 1 1 2 ...
## $ Medicinal : num [1:86] 0 0 0 4 0 1 0 0 0 1 ...
## $ Tobacco : num [1:86] 0 0 0 0 0 0 0 0 0 0 ...
## $ Honey : num [1:86] 2 4 2 0 1 1 1 2 1 0 ...
## $ Spicy : num [1:86] 1 3 0 2 1 1 1 1 0 2 ...
## $ Winey : num [1:86] 2 2 0 0 1 1 0 2 0 0 ...
## $ Nutty : num [1:86] 2 2 2 1 2 0 2 2 2 2 ...
## $ Malty : num [1:86] 2 3 2 2 3 1 2 2 2 1 ...
## $ Fruity : num [1:86] 2 3 3 1 1 1 3 2 2 2 ...
## $ Floral : num [1:86] 2 2 2 0 1 2 3 1 2 1 ...
## $ Postcode : chr [1:86] "PH15 2EB" "AB38 9PJ" "AB5 5LI" "PA42 7EB" ...
## $ Latitude : num [1:86] 286580 326340 352960 141560 355350 ...
## $ Longitude : num [1:86] 749680 842570 839320 646220 829140 ...
## - attr(*, "spec")=
## .. cols(
## .. RowID = col_double(),
## .. Distillery = col_character(),
## .. Body = col_double(),
## .. Sweetness = col_double(),
## .. Smoky = col_double(),
## .. Medicinal = col_double(),
## .. Tobacco = col_double(),
## .. Honey = col_double(),
## .. Spicy = col_double(),
## .. Winey = col_double(),
## .. Nutty = col_double(),
## .. Malty = col_double(),
## .. Fruity = col_double(),
## .. Floral = col_double(),
## .. Postcode = col_character(),
## .. Latitude = col_double(),
## .. Longitude = col_double()
## .. )
names(data_set)
## [1] "RowID" "Distillery" "Body" "Sweetness" "Smoky"
## [6] "Medicinal" "Tobacco" "Honey" "Spicy" "Winey"
## [11] "Nutty" "Malty" "Fruity" "Floral" "Postcode"
## [16] "Latitude" "Longitude"
Data visualization
#Check if dataset has NAs
library(naniar)
vis_miss(data_set)

#Correlation of dataset
library(corrplot)
## corrplot 0.84 loaded
correlations <- cor(data_set[,3:14])
correlations
## Body Sweetness Smoky Medicinal Tobacco
## Body 1.00000000 -0.136517815 0.52403158 0.35405026 0.168718106
## Sweetness -0.13651781 1.000000000 -0.40589707 -0.39201677 -0.147870502
## Smoky 0.52403158 -0.405897070 1.00000000 0.68607052 0.365500702
## Medicinal 0.35405026 -0.392016773 0.68607052 1.00000000 0.425105554
## Tobacco 0.16871811 -0.147870502 0.36550070 0.42510555 1.000000000
## Honey 0.08203083 0.132558139 -0.19531751 -0.39662863 -0.275490317
## Spicy 0.18849952 -0.054199926 0.23174451 0.04490323 0.054067800
## Winey 0.40857633 0.115727403 -0.02819042 -0.20265132 0.009096926
## Nutty 0.12632342 -0.032492924 -0.02313214 -0.11367135 -0.117717350
## Malty -0.11685910 -0.001515808 -0.19287531 -0.25895929 -0.059347395
## Fruity -0.01320457 0.019819512 -0.31296978 -0.33097549 -0.235145442
## Floral -0.46120320 0.144986800 -0.43166295 -0.51132279 -0.212375174
## Honey Spicy Winey Nutty Malty
## Body 0.08203083 0.18849952 0.408576331 0.12632342 -0.116859104
## Sweetness 0.13255814 -0.05419993 0.115727403 -0.03249292 -0.001515808
## Smoky -0.19531751 0.23174451 -0.028190424 -0.02313214 -0.192875307
## Medicinal -0.39662863 0.04490323 -0.202651322 -0.11367135 -0.258959287
## Tobacco -0.27549032 0.05406780 0.009096926 -0.11771735 -0.059347395
## Honey 1.00000000 0.13956277 0.362020513 0.18849200 0.310184243
## Spicy 0.13956277 1.00000000 0.092704030 -0.04285577 0.036303005
## Winey 0.36202051 0.09270403 1.000000000 0.19846716 0.112368417
## Nutty 0.18849200 -0.04285577 0.198467159 1.00000000 0.066157368
## Malty 0.31018424 0.03630300 0.112368417 0.06615737 1.000000000
## Fruity 0.10882249 0.14471361 0.090693903 0.07176477 0.207288023
## Floral 0.18302902 0.03466260 -0.126931653 0.01830236 0.106308708
## Fruity Floral
## Body -0.01320457 -0.46120320
## Sweetness 0.01981951 0.14498680
## Smoky -0.31296978 -0.43166295
## Medicinal -0.33097549 -0.51132279
## Tobacco -0.23514544 -0.21237517
## Honey 0.10882249 0.18302902
## Spicy 0.14471361 0.03466260
## Winey 0.09069390 -0.12693165
## Nutty 0.07176477 0.01830236
## Malty 0.20728802 0.10630871
## Fruity 1.00000000 0.26233561
## Floral 0.26233561 1.00000000
corrplot(correlations, method = "color", tl.col = "black", tl.cex = .5,
addCoef.col = "black", addcolorlabel = "no", order = "AOE",
number.cex = .5, is.corr = T)
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt =
## tl.srt, : "addcolorlabel" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col =
## tl.col, : "addcolorlabel" is not a graphical parameter
## Warning in title(title, ...): "addcolorlabel" is not a graphical parameter

| #### Data processing |
r whisky <- as.data.frame(data_set[,2:14]) head(whisky) |
## Distillery Body Sweetness Smoky Medicinal Tobacco Honey Spicy Winey Nutty ## 1 Aberfeldy 2 2 2 0 0 2 1 2 2 ## 2 Aberlour 3 3 1 0 0 4 3 2 2 ## 3 AnCnoc 1 3 2 0 0 2 0 0 2 ## 4 Ardbeg 4 1 4 4 0 0 2 0 1 ## 5 Ardmore 2 2 2 0 0 1 1 1 2 ## 6 ArranIsleOf 2 3 1 1 0 1 1 1 0 ## Malty Fruity Floral ## 1 2 2 2 ## 2 3 3 2 ## 3 2 3 2 ## 4 2 1 0 ## 5 3 1 1 ## 6 1 1 2 |
r rownames(whisky) <- whisky$Distillery whisky$Distillery <- NULL head(whisky) |
## Body Sweetness Smoky Medicinal Tobacco Honey Spicy Winey Nutty ## Aberfeldy 2 2 2 0 0 2 1 2 2 ## Aberlour 3 3 1 0 0 4 3 2 2 ## AnCnoc 1 3 2 0 0 2 0 0 2 ## Ardbeg 4 1 4 4 0 0 2 0 1 ## Ardmore 2 2 2 0 0 1 1 1 2 ## ArranIsleOf 2 3 1 1 0 1 1 1 0 ## Malty Fruity Floral ## Aberfeldy 2 2 2 ## Aberlour 3 3 2 ## AnCnoc 2 3 2 ## Ardbeg 2 1 0 ## Ardmore 3 1 1 ## ArranIsleOf 1 1 2 |
Elbow method
set.seed(23)
wcss <- vector()
for (i in 1:10){
wcss[i] <- sum(kmeans(whisky, i)$withinss)
}
plot(1:10, wcss, type = 'b', main = "Elbow method",
xlab = "Number of clusters (k)", ylab = "WCSS(k)")

#I chose k = 4
Apply the k-means algorithm with optimal k
k_means <- kmeans(whisky, 4, iter.max = 300, nstart = 10)
k_means
## K-means clustering with 4 clusters of sizes 37, 15, 6, 28
##
## Cluster means:
## Body Sweetness Smoky Medicinal Tobacco Honey Spicy
## 1 1.432432 2.486486 1.054054 0.24324324 0.05405405 0.9729730 1.108108
## 2 1.866667 1.933333 2.066667 1.06666667 0.20000000 1.1333333 1.466667
## 3 3.666667 1.500000 3.666667 3.33333333 0.66666667 0.1666667 1.666667
## 4 2.678571 2.392857 1.428571 0.07142857 0.03571429 1.8928571 1.642857
## Winey Nutty Malty Fruity Floral
## 1 0.4594595 1.162162 1.675676 1.972973 2.1081081
## 2 0.8666667 1.533333 1.800000 1.066667 1.1333333
## 3 0.5000000 1.166667 1.333333 1.166667 0.1666667
## 4 1.8214286 1.892857 2.071429 2.107143 1.7857143
##
## Clustering vector:
## Aberfeldy Aberlour AnCnoc Ardbeg
## 4 4 1 3
## Ardmore ArranIsleOf Auchentoshan Auchroisk
## 2 1 1 4
## Aultmore Balblair Balmenach Belvenie
## 1 2 4 4
## BenNevis Benriach Benrinnes Benromach
## 4 1 4 4
## Bladnoch BlairAthol Bowmore Bruichladdich
## 1 4 2 2
## Bunnahabhain Caol Ila Cardhu Clynelish
## 1 3 1 3
## Craigallechie Craigganmore Dailuaine Dalmore
## 4 1 4 4
## Dalwhinnie Deanston Dufftown Edradour
## 1 4 1 4
## GlenDeveronMacduff GlenElgin GlenGarioch GlenGrant
## 2 1 2 1
## GlenKeith GlenMoray GlenOrd GlenScotia
## 1 1 4 2
## GlenSpey Glenallachie Glendronach Glendullan
## 1 1 4 4
## Glenfarclas Glenfiddich Glengoyne Glenkinchie
## 4 1 1 1
## Glenlivet Glenlossie Glenmorangie Glenrothes
## 4 1 1 4
## Glenturret Highland Park Inchgower Isle of Jura
## 4 2 1 2
## Knochando Lagavulin Laphroig Linkwood
## 4 3 3 1
## Loch Lomond Longmorn Macallan Mannochmore
## 1 4 4 1
## Miltonduff Mortlach Oban OldFettercairn
## 1 4 2 2
## OldPulteney RoyalBrackla RoyalLochnagar Scapa
## 2 1 4 4
## Speyburn Speyside Springbank Strathisla
## 1 1 2 4
## Strathmill Talisker Tamdhu Tamnavulin
## 1 3 1 1
## Teaninich Tobermory Tomatin Tomintoul
## 1 1 2 1
## Tormore Tullibardine
## 2 1
##
## Within cluster sum of squares by cluster:
## [1] 162.32432 78.93333 24.33333 137.60714
## (between_SS / total_SS = 39.4 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
summary(k_means)
## Length Class Mode
## cluster 86 -none- numeric
## centers 48 -none- numeric
## totss 1 -none- numeric
## withinss 4 -none- numeric
## tot.withinss 1 -none- numeric
## betweenss 1 -none- numeric
## size 4 -none- numeric
## iter 1 -none- numeric
## ifault 1 -none- numeric
table(k_means$cluster)
##
## 1 2 3 4
## 37 15 6 28
data.frame(k_means$centers)
## Body Sweetness Smoky Medicinal Tobacco Honey Spicy
## 1 1.432432 2.486486 1.054054 0.24324324 0.05405405 0.9729730 1.108108
## 2 1.866667 1.933333 2.066667 1.06666667 0.20000000 1.1333333 1.466667
## 3 3.666667 1.500000 3.666667 3.33333333 0.66666667 0.1666667 1.666667
## 4 2.678571 2.392857 1.428571 0.07142857 0.03571429 1.8928571 1.642857
## Winey Nutty Malty Fruity Floral
## 1 0.4594595 1.162162 1.675676 1.972973 2.1081081
## 2 0.8666667 1.533333 1.800000 1.066667 1.1333333
## 3 0.5000000 1.166667 1.333333 1.166667 0.1666667
## 4 1.8214286 1.892857 2.071429 2.107143 1.7857143
aggregate(whisky, by = list(cluster = k_means$cluster), mean)
## cluster Body Sweetness Smoky Medicinal Tobacco Honey Spicy
## 1 1 1.432432 2.486486 1.054054 0.24324324 0.05405405 0.9729730 1.108108
## 2 2 1.866667 1.933333 2.066667 1.06666667 0.20000000 1.1333333 1.466667
## 3 3 3.666667 1.500000 3.666667 3.33333333 0.66666667 0.1666667 1.666667
## 4 4 2.678571 2.392857 1.428571 0.07142857 0.03571429 1.8928571 1.642857
## Winey Nutty Malty Fruity Floral
## 1 0.4594595 1.162162 1.675676 1.972973 2.1081081
## 2 0.8666667 1.533333 1.800000 1.066667 1.1333333
## 3 0.5000000 1.166667 1.333333 1.166667 0.1666667
## 4 1.8214286 1.892857 2.071429 2.107143 1.7857143
y_kmeans <- k_means$cluster
whisky_cluster <- cbind(whisky, y_kmeans)
head(whisky_cluster, 10)
## Body Sweetness Smoky Medicinal Tobacco Honey Spicy Winey Nutty
## Aberfeldy 2 2 2 0 0 2 1 2 2
## Aberlour 3 3 1 0 0 4 3 2 2
## AnCnoc 1 3 2 0 0 2 0 0 2
## Ardbeg 4 1 4 4 0 0 2 0 1
## Ardmore 2 2 2 0 0 1 1 1 2
## ArranIsleOf 2 3 1 1 0 1 1 1 0
## Auchentoshan 0 2 0 0 0 1 1 0 2
## Auchroisk 2 3 1 0 0 2 1 2 2
## Aultmore 2 2 1 0 0 1 0 0 2
## Balblair 2 3 2 1 0 0 2 0 2
## Malty Fruity Floral y_kmeans
## Aberfeldy 2 2 2 4
## Aberlour 3 3 2 4
## AnCnoc 2 3 2 1
## Ardbeg 2 1 0 3
## Ardmore 3 1 1 2
## ArranIsleOf 1 1 2 1
## Auchentoshan 2 3 3 1
## Auchroisk 2 2 1 4
## Aultmore 2 2 2 1
## Balblair 1 2 1 2
Viewing the clusters
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(k_means, data = whisky, geom = c("text"), labelsize = 7,
main = "Clustering of Scotch Whisky")

| #### Clustering model validation |
r library(fpc) library(NbClust) nb <- NbClust(whisky, distance = "euclidean", min.nc = 2, max.nc = 12, method = "kmeans", index = "all") |
 |
## *** : The Hubert index is a graphical method of determining the number of clusters. ## In the plot of Hubert index, we seek a significant knee that corresponds to a ## significant increase of the value of the measure i.e the significant peak in Hubert ## index second differences plot. ## |
 |
## *** : The D index is a graphical method of determining the number of clusters. ## In the plot of D index, we seek a significant knee (the significant peak in Dindex ## second differences plot) that corresponds to a significant increase of the value of ## the measure. ## ## ******************************************************************* ## * Among all indices: ## * 11 proposed 2 as the best number of clusters ## * 7 proposed 3 as the best number of clusters ## * 2 proposed 5 as the best number of clusters ## * 2 proposed 7 as the best number of clusters ## * 1 proposed 9 as the best number of clusters ## * 1 proposed 12 as the best number of clusters ## ## ***** Conclusion ***** ## ## * According to the majority rule, the best number of clusters is 2 ## ## ## ******************************************************************* |
r fviz_nbclust(nb) + theme_minimal() |
## Among all indices: ## =================== ## * 2 proposed 0 as the best number of clusters ## * 11 proposed 2 as the best number of clusters ## * 7 proposed 3 as the best number of clusters ## * 2 proposed 5 as the best number of clusters ## * 2 proposed 7 as the best number of clusters ## * 1 proposed 9 as the best number of clusters ## * 1 proposed 12 as the best number of clusters ## ## Conclusion ## ========================= ## * According to the majority rule, the best number of clusters is 2 . |
 |
Geospatial visualization
In this part I used the library ‘ggmap’ and I didn’t show the command register_google. Also I investigated the Latitude and Longitude of all companies of dataset.
zone <- read.csv("Zone.txt")
head(zone)
## Lat Lon
## 1 56.62759 -3.847860
## 2 57.46793 -3.229942
## 3 57.65829 -3.082305
## 4 55.64025 -6.110231
## 5 57.35170 -2.747930
## 6 55.66252 -5.272552
zone$Postcode <- data_set$Postcode
head(zone)
## Lat Lon Postcode
## 1 56.62759 -3.847860 PH15 2EB
## 2 57.46793 -3.229942 AB38 9PJ
## 3 57.65829 -3.082305 AB5 5LI
## 4 55.64025 -6.110231 PA42 7EB
## 5 57.35170 -2.747930 AB54 4NH
## 6 55.66252 -5.272552 KA27 8HJ
zone <- zone[,c(3,1,2)]
head(zone)
## Postcode Lat Lon
## 1 PH15 2EB 56.62759 -3.847860
## 2 AB38 9PJ 57.46793 -3.229942
## 3 AB5 5LI 57.65829 -3.082305
## 4 PA42 7EB 55.64025 -6.110231
## 5 AB54 4NH 57.35170 -2.747930
## 6 KA27 8HJ 55.66252 -5.272552
data_set2 <- cbind(data_set, zone[,2:3])
head(data_set2)
## RowID Distillery Body Sweetness Smoky Medicinal Tobacco Honey Spicy Winey
## 1 1 Aberfeldy 2 2 2 0 0 2 1 2
## 2 2 Aberlour 3 3 1 0 0 4 3 2
## 3 3 AnCnoc 1 3 2 0 0 2 0 0
## 4 4 Ardbeg 4 1 4 4 0 0 2 0
## 5 5 Ardmore 2 2 2 0 0 1 1 1
## 6 6 ArranIsleOf 2 3 1 1 0 1 1 1
## Nutty Malty Fruity Floral Postcode Latitude Longitude Lat Lon
## 1 2 2 2 2 PH15 2EB 286580 749680 56.62759 -3.847860
## 2 2 3 3 2 AB38 9PJ 326340 842570 57.46793 -3.229942
## 3 2 2 3 2 AB5 5LI 352960 839320 57.65829 -3.082305
## 4 1 2 1 0 PA42 7EB 141560 646220 55.64025 -6.110231
## 5 2 3 1 1 AB54 4NH 355350 829140 57.35170 -2.747930
## 6 0 1 1 2 KA27 8HJ 194050 649950 55.66252 -5.272552
data_set2 -> scotland_zone
scotland_zone <- scotland_zone %>% select(Distillery, Lat, Lon)
scotland_zone$cluster <- y_kmeans
head(scotland_zone)
## Distillery Lat Lon cluster
## 1 Aberfeldy 56.62759 -3.847860 4
## 2 Aberlour 57.46793 -3.229942 4
## 3 AnCnoc 57.65829 -3.082305 1
## 4 Ardbeg 55.64025 -6.110231 3
## 5 Ardmore 57.35170 -2.747930 2
## 6 ArranIsleOf 55.66252 -5.272552 1
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
Geospatial visualization
scotland <- get_map(location = c(lon = -4.0000000, lat = 57.0000000), zoom = 6)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=57,-4&zoom=6&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
ggmap(scotland) +
geom_point(data = scotland_zone, aes(x = Lon, y = Lat, color = as.character(cluster))) +
scale_color_manual(values = c('red', 'green', 'blue', 'purple')) +
labs(color = 'Cluster', title = "Scotch Whisky Cluster", x = 'Longitude', y = 'Latitude')

Analysis
whisky_cluster %>% filter(y_kmeans == 4)
## Body Sweetness Smoky Medicinal Tobacco Honey Spicy Winey Nutty
## Aberfeldy 2 2 2 0 0 2 1 2 2
## Aberlour 3 3 1 0 0 4 3 2 2
## Auchroisk 2 3 1 0 0 2 1 2 2
## Balmenach 4 3 2 0 0 2 1 3 3
## Belvenie 3 2 1 0 0 3 2 1 0
## BenNevis 4 2 2 0 0 2 2 0 2
## Benrinnes 3 2 2 0 0 3 1 1 2
## Benromach 2 2 2 0 0 2 2 1 2
## BlairAthol 2 2 2 0 0 1 2 2 2
## Craigallechie 2 2 2 0 1 2 2 1 2
## Dailuaine 4 2 2 0 0 1 2 2 2
## Dalmore 3 2 2 1 0 1 2 2 1
## Deanston 2 2 1 0 0 2 1 1 1
## Edradour 2 3 1 0 0 2 1 1 4
## GlenOrd 3 2 1 0 0 1 2 1 1
## Glendronach 4 2 2 0 0 2 1 4 2
## Glendullan 3 2 1 0 0 2 1 2 1
## Glenfarclas 2 4 1 0 0 1 2 3 2
## Glenlivet 2 3 1 0 0 2 2 2 1
## Glenrothes 2 3 1 0 0 1 1 2 1
## Glenturret 2 3 1 0 0 2 2 2 2
## Knochando 2 3 1 0 0 2 2 1 2
## Longmorn 3 2 1 0 0 1 1 1 3
## Macallan 4 3 1 0 0 2 1 4 2
## Mortlach 3 2 2 0 0 2 3 3 2
## RoyalLochnagar 3 2 2 0 0 2 2 2 2
## Scapa 2 2 1 1 0 2 1 1 2
## Strathisla 2 2 1 0 0 2 2 2 3
## Malty Fruity Floral y_kmeans
## Aberfeldy 2 2 2 4
## Aberlour 3 3 2 4
## Auchroisk 2 2 1 4
## Balmenach 0 1 2 4
## Belvenie 2 2 2 4
## BenNevis 2 2 2 4
## Benrinnes 3 2 2 4
## Benromach 2 2 2 4
## BlairAthol 2 2 2 4
## Craigallechie 2 1 4 4
## Dailuaine 2 2 1 4
## Dalmore 2 3 1 4
## Deanston 3 2 1 4
## Edradour 2 2 2 4
## GlenOrd 2 2 2 4
## Glendronach 2 2 0 4
## Glendullan 2 3 2 4
## Glenfarclas 3 2 2 4
## Glenlivet 2 2 3 4
## Glenrothes 2 2 0 4
## Glenturret 2 1 2 4
## Knochando 1 2 2 4
## Longmorn 3 2 3 4
## Macallan 2 3 1 4
## Mortlach 1 2 2 4
## RoyalLochnagar 2 3 1 4
## Scapa 2 2 2 4
## Strathisla 3 3 2 4
c1 <- whisky_cluster %>% filter(y_kmeans == 1) %>% select(-y_kmeans)
head(c1)
## Body Sweetness Smoky Medicinal Tobacco Honey Spicy Winey Nutty
## AnCnoc 1 3 2 0 0 2 0 0 2
## ArranIsleOf 2 3 1 1 0 1 1 1 0
## Auchentoshan 0 2 0 0 0 1 1 0 2
## Aultmore 2 2 1 0 0 1 0 0 2
## Benriach 2 2 1 0 0 2 2 0 0
## Bladnoch 1 2 1 0 0 0 1 1 0
## Malty Fruity Floral
## AnCnoc 2 3 2
## ArranIsleOf 1 1 2
## Auchentoshan 2 3 3
## Aultmore 2 2 2
## Benriach 2 3 2
## Bladnoch 2 2 3
c1 <- apply(c1, 2, mean)
c2 <- whisky_cluster %>% filter(y_kmeans == 2) %>% select(-y_kmeans)
head(c2)
## Body Sweetness Smoky Medicinal Tobacco Honey Spicy Winey
## Ardmore 2 2 2 0 0 1 1 1
## Balblair 2 3 2 1 0 0 2 0
## Bowmore 2 2 3 1 0 2 2 1
## Bruichladdich 1 1 2 2 0 2 2 1
## GlenDeveronMacduff 2 3 1 1 1 1 1 2
## GlenGarioch 2 1 3 0 0 0 3 1
## Nutty Malty Fruity Floral
## Ardmore 2 3 1 1
## Balblair 2 1 2 1
## Bowmore 1 1 1 2
## Bruichladdich 2 2 2 2
## GlenDeveronMacduff 0 2 0 1
## GlenGarioch 0 2 2 2
c2 <- apply(c2, 2, mean)
c3 <- whisky_cluster %>% filter(y_kmeans == 3) %>% select(-y_kmeans)
head(c3)
## Body Sweetness Smoky Medicinal Tobacco Honey Spicy Winey Nutty Malty
## Ardbeg 4 1 4 4 0 0 2 0 1 2
## Caol Ila 3 1 4 2 1 0 2 0 2 1
## Clynelish 3 2 3 3 1 0 2 0 1 1
## Lagavulin 4 1 4 4 1 0 1 2 1 1
## Laphroig 4 2 4 4 1 0 0 1 1 1
## Talisker 4 2 3 3 0 1 3 0 1 2
## Fruity Floral
## Ardbeg 1 0
## Caol Ila 1 1
## Clynelish 2 0
## Lagavulin 1 0
## Laphroig 0 0
## Talisker 2 0
c3 <- apply(c3, 2, mean)
c4 <- whisky_cluster %>% filter(y_kmeans == 4) %>% select(-y_kmeans)
head(c4)
## Body Sweetness Smoky Medicinal Tobacco Honey Spicy Winey Nutty Malty
## Aberfeldy 2 2 2 0 0 2 1 2 2 2
## Aberlour 3 3 1 0 0 4 3 2 2 3
## Auchroisk 2 3 1 0 0 2 1 2 2 2
## Balmenach 4 3 2 0 0 2 1 3 3 0
## Belvenie 3 2 1 0 0 3 2 1 0 2
## BenNevis 4 2 2 0 0 2 2 0 2 2
## Fruity Floral
## Aberfeldy 2 2
## Aberlour 3 2
## Auchroisk 2 1
## Balmenach 1 2
## Belvenie 2 2
## BenNevis 2 2
c4 <- apply(c4, 2, mean)
analysis <- data.frame('c1' = c1,
'c2' = c2,
'c3' = c3,
'c4' = c4)
head(analysis)
## c1 c2 c3 c4
## Body 1.43243243 1.866667 3.6666667 2.67857143
## Sweetness 2.48648649 1.933333 1.5000000 2.39285714
## Smoky 1.05405405 2.066667 3.6666667 1.42857143
## Medicinal 0.24324324 1.066667 3.3333333 0.07142857
## Tobacco 0.05405405 0.200000 0.6666667 0.03571429
## Honey 0.97297297 1.133333 0.1666667 1.89285714
analysis %>% arrange(desc(c1))
## c1 c2 c3 c4
## Sweetness 2.48648649 1.9333333 1.5000000 2.39285714
## Floral 2.10810811 1.1333333 0.1666667 1.78571429
## Fruity 1.97297297 1.0666667 1.1666667 2.10714286
## Malty 1.67567568 1.8000000 1.3333333 2.07142857
## Body 1.43243243 1.8666667 3.6666667 2.67857143
## Nutty 1.16216216 1.5333333 1.1666667 1.89285714
## Spicy 1.10810811 1.4666667 1.6666667 1.64285714
## Smoky 1.05405405 2.0666667 3.6666667 1.42857143
## Honey 0.97297297 1.1333333 0.1666667 1.89285714
## Winey 0.45945946 0.8666667 0.5000000 1.82142857
## Medicinal 0.24324324 1.0666667 3.3333333 0.07142857
## Tobacco 0.05405405 0.2000000 0.6666667 0.03571429
analysis %>% arrange(desc(c2))
## c1 c2 c3 c4
## Smoky 1.05405405 2.0666667 3.6666667 1.42857143
## Sweetness 2.48648649 1.9333333 1.5000000 2.39285714
## Body 1.43243243 1.8666667 3.6666667 2.67857143
## Malty 1.67567568 1.8000000 1.3333333 2.07142857
## Nutty 1.16216216 1.5333333 1.1666667 1.89285714
## Spicy 1.10810811 1.4666667 1.6666667 1.64285714
## Honey 0.97297297 1.1333333 0.1666667 1.89285714
## Floral 2.10810811 1.1333333 0.1666667 1.78571429
## Medicinal 0.24324324 1.0666667 3.3333333 0.07142857
## Fruity 1.97297297 1.0666667 1.1666667 2.10714286
## Winey 0.45945946 0.8666667 0.5000000 1.82142857
## Tobacco 0.05405405 0.2000000 0.6666667 0.03571429
analysis %>% arrange(desc(c3))
## c1 c2 c3 c4
## Body 1.43243243 1.8666667 3.6666667 2.67857143
## Smoky 1.05405405 2.0666667 3.6666667 1.42857143
## Medicinal 0.24324324 1.0666667 3.3333333 0.07142857
## Spicy 1.10810811 1.4666667 1.6666667 1.64285714
## Sweetness 2.48648649 1.9333333 1.5000000 2.39285714
## Malty 1.67567568 1.8000000 1.3333333 2.07142857
## Nutty 1.16216216 1.5333333 1.1666667 1.89285714
## Fruity 1.97297297 1.0666667 1.1666667 2.10714286
## Tobacco 0.05405405 0.2000000 0.6666667 0.03571429
## Winey 0.45945946 0.8666667 0.5000000 1.82142857
## Honey 0.97297297 1.1333333 0.1666667 1.89285714
## Floral 2.10810811 1.1333333 0.1666667 1.78571429
analysis %>% arrange(desc(c4))
## c1 c2 c3 c4
## Body 1.43243243 1.8666667 3.6666667 2.67857143
## Sweetness 2.48648649 1.9333333 1.5000000 2.39285714
## Fruity 1.97297297 1.0666667 1.1666667 2.10714286
## Malty 1.67567568 1.8000000 1.3333333 2.07142857
## Honey 0.97297297 1.1333333 0.1666667 1.89285714
## Nutty 1.16216216 1.5333333 1.1666667 1.89285714
## Winey 0.45945946 0.8666667 0.5000000 1.82142857
## Floral 2.10810811 1.1333333 0.1666667 1.78571429
## Spicy 1.10810811 1.4666667 1.6666667 1.64285714
## Smoky 1.05405405 2.0666667 3.6666667 1.42857143
## Medicinal 0.24324324 1.0666667 3.3333333 0.07142857
## Tobacco 0.05405405 0.2000000 0.6666667 0.03571429
Conclusion
- Cluster 1 companies produce whiskey that focuses a lot on product image.
- Cluster 2 companies produce better quality whiskey focused on the classic without experimenting too much.
- Cluster 3 companies produce quality whiskeys that are characterized by strong flavors.
- Cluster 4 companies produce quality whiskey characterized by sweet flavors.