This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
Importing Data & cleaning data to produce data table in R
library(GGally)
## Warning: package 'GGally' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(inspectdf)
## Warning: package 'inspectdf' was built under R version 4.4.3
library(ggplot2)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.4.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
library(stats)
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(scales)
## Warning: package 'scales' was built under R version 4.4.3
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.4.3
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.4.3
wine <- read.csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv",
sep = ";", # split on semicolons
quote = "\"", # strip the quotes in the header
stringsAsFactors = FALSE,
check.names = TRUE # makes names like fixed.acidity, volatile.acidity, ...
)
Exploratory data analysis
data <- wine[-12]
summary(data)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.60 Min. :0.1200 Min. :0.000 Min. : 0.900
## 1st Qu.: 7.10 1st Qu.:0.3900 1st Qu.:0.090 1st Qu.: 1.900
## Median : 7.90 Median :0.5200 Median :0.260 Median : 2.200
## Mean : 8.32 Mean :0.5278 Mean :0.271 Mean : 2.539
## 3rd Qu.: 9.20 3rd Qu.:0.6400 3rd Qu.:0.420 3rd Qu.: 2.600
## Max. :15.90 Max. :1.5800 Max. :1.000 Max. :15.500
## chlorides free.sulfur.dioxide total.sulfur.dioxide density
## Min. :0.01200 Min. : 1.00 Min. : 6.00 Min. :0.9901
## 1st Qu.:0.07000 1st Qu.: 7.00 1st Qu.: 22.00 1st Qu.:0.9956
## Median :0.07900 Median :14.00 Median : 38.00 Median :0.9968
## Mean :0.08747 Mean :15.87 Mean : 46.47 Mean :0.9967
## 3rd Qu.:0.09000 3rd Qu.:21.00 3rd Qu.: 62.00 3rd Qu.:0.9978
## Max. :0.61100 Max. :72.00 Max. :289.00 Max. :1.0037
## pH sulphates alcohol
## Min. :2.740 Min. :0.3300 Min. : 8.40
## 1st Qu.:3.210 1st Qu.:0.5500 1st Qu.: 9.50
## Median :3.310 Median :0.6200 Median :10.20
## Mean :3.311 Mean :0.6581 Mean :10.42
## 3rd Qu.:3.400 3rd Qu.:0.7300 3rd Qu.:11.10
## Max. :4.010 Max. :2.0000 Max. :14.90
str(data)
## 'data.frame': 1599 obs. of 11 variables:
## $ fixed.acidity : num 7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
## $ volatile.acidity : num 0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
## $ citric.acid : num 0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
## $ residual.sugar : num 1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
## $ chlorides : num 0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
## $ free.sulfur.dioxide : num 11 25 15 17 11 13 15 15 9 17 ...
## $ total.sulfur.dioxide: num 34 67 54 60 34 40 59 21 18 102 ...
## $ density : num 0.998 0.997 0.997 0.998 0.998 ...
## $ pH : num 3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
## $ sulphates : num 0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
## $ alcohol : num 9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
hist.data.frame(data)
preproc <- preProcess(data, method=c("center", "scale"))
data.norm <- predict(preproc, data)
GGally::ggcorr(data.norm, hjust = 1, layout.exp = 2, label = T, label_size = 2.9)
PCA
missingincols <- sapply(data, function(x) sum(is.na(x))/nrow(data))
percent(missingincols)
## fixed.acidity volatile.acidity citric.acid
## "0%" "0%" "0%"
## residual.sugar chlorides free.sulfur.dioxide
## "0%" "0%" "0%"
## total.sulfur.dioxide density pH
## "0%" "0%" "0%"
## sulphates alcohol
## "0%" "0%"
pca<- prcomp(data.norm, center=FALSE, scale.=FALSE)
pca$rotation
## PC1 PC2 PC3 PC4
## fixed.acidity 0.48931422 0.110502738 -0.12330157 0.229617370
## volatile.acidity -0.23858436 -0.274930480 -0.44996253 -0.078959783
## citric.acid 0.46363166 0.151791356 0.23824707 0.079418256
## residual.sugar 0.14610715 -0.272080238 0.10128338 0.372792562
## chlorides 0.21224658 -0.148051555 -0.09261383 -0.666194756
## free.sulfur.dioxide -0.03615752 -0.513566812 0.42879287 0.043537818
## total.sulfur.dioxide 0.02357485 -0.569486959 0.32241450 0.034577115
## density 0.39535301 -0.233575490 -0.33887135 0.174499758
## pH -0.43851962 -0.006710793 0.05769735 0.003787746
## sulphates 0.24292133 0.037553916 0.27978615 -0.550872362
## alcohol -0.11323206 0.386180959 0.47167322 0.122181088
## PC5 PC6 PC7 PC8
## fixed.acidity -0.08261366 -0.10147858 0.35022736 -0.17759545
## volatile.acidity 0.21873452 -0.41144893 0.53373510 -0.07877531
## citric.acid -0.05857268 -0.06959338 -0.10549701 -0.37751558
## residual.sugar 0.73214429 -0.04915555 -0.29066341 0.29984469
## chlorides 0.24650090 -0.30433857 -0.37041337 -0.35700936
## free.sulfur.dioxide -0.15915198 0.01400021 0.11659611 -0.20478050
## total.sulfur.dioxide -0.22246456 -0.13630755 0.09366237 0.01903597
## density 0.15707671 0.39115230 0.17048116 -0.23922267
## pH 0.26752977 0.52211645 0.02513762 -0.56139075
## sulphates 0.22596222 0.38126343 0.44746911 0.37460432
## alcohol 0.35068141 -0.36164504 0.32765090 -0.21762556
## PC9 PC10 PC11
## fixed.acidity -0.194020908 0.24952314 -0.639691452
## volatile.acidity 0.129110301 -0.36592473 -0.002388597
## citric.acid 0.381449669 -0.62167708 0.070910304
## residual.sugar -0.007522949 -0.09287208 -0.184029964
## chlorides -0.111338666 0.21767112 -0.053065322
## free.sulfur.dioxide -0.635405218 -0.24848326 0.051420865
## total.sulfur.dioxide 0.592115893 0.37075027 -0.068701598
## density -0.020718675 0.23999012 0.567331898
## pH 0.167745886 0.01096960 -0.340710903
## sulphates 0.058367062 -0.11232046 -0.069555381
## alcohol -0.037603106 0.30301450 0.314525906
data.eigen<-eigen(cov(data.norm))
data.eigen$values
## [1] 3.09913244 1.92590969 1.55054349 1.21323253 0.95929207 0.65960826
## [7] 0.58379122 0.42295670 0.34464212 0.18133317 0.05955831
fviz_eig(pca, choice = "eigenvalue", ncp = 25, barfill = "red", barcolor = "black", linecolor = "black", addlabels = TRUE)
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.7604 1.3878 1.2452 1.1015 0.97943 0.81216 0.76406
## Proportion of Variance 0.2817 0.1751 0.1410 0.1103 0.08721 0.05996 0.05307
## Cumulative Proportion 0.2817 0.4568 0.5978 0.7081 0.79528 0.85525 0.90832
## PC8 PC9 PC10 PC11
## Standard deviation 0.65035 0.58706 0.42583 0.24405
## Proportion of Variance 0.03845 0.03133 0.01648 0.00541
## Cumulative Proportion 0.94677 0.97810 0.99459 1.00000
fviz_eig(pca, ncp = 25, barfill = "red", barcolor = "black", linecolor = "black", addlabels = TRUE)
fviz_pca_var(pca, col.var="contrib")+
scale_color_gradient2(low="red", mid="blue", high="green3", midpoint=10)
autoplot(pca, loadings=TRUE, loadings.colour='red', loadings.label=TRUE, loadings.label.size=3)
PC1 <- fviz_contrib(pca, choice = "var", axes = 1,fill = "red",color = "red")
PC2 <- fviz_contrib(pca, choice = "var", axes = 2,fill = "red",color = "red")
PC3 <- fviz_contrib(pca, choice = "var", axes = 3,fill = "red",color = "red")
PC4 <- fviz_contrib(pca, choice = "var", axes = 4,fill = "red",color = "red")
grid.arrange(PC1, PC2, PC3, PC4,ncol=2, nrow=2)
PC1: fixed acidity, citric acid, pH and density PC2: alcohol, quality, total sulfur dioxide, volatile acidity and density PC3: free sulfur dioxide and total sulfur dioxide PC4: chlorides, sulphates and residual sugar
K-means clustering:
set.seed(123)
dataK2 <- kmeans(data.norm, centers = 2, nstart = 25)
print(dataK2)
## K-means clustering with 2 clusters of sizes 590, 1009
##
## Cluster means:
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 0.9207271 -0.6361774 0.9851367 0.16097077 0.2641746
## 2 -0.5383835 0.3719967 -0.5760462 -0.09412562 -0.1544728
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates
## 1 -0.2356324 -0.2152177 0.4994664 -0.7077417 0.5314734
## 2 0.1377831 0.1258458 -0.2920566 0.4138430 -0.3107723
## alcohol
## 1 0.1730540
## 2 -0.1011911
##
## Clustering vector:
## [1] 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 1 1 2 1 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2
## [38] 1 2 1 1 2 1 1 2 2 2 1 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [75] 1 1 1 2 2 2 2 1 2 1 2 2 1 2 1 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2
## [112] 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
## [149] 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2
## [186] 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 1 2 2 2 1 1 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2
## [223] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1 2 2 2 2 2 1 2 1 2 2 2 1 2 1
## [260] 1 2 2 2 2 1 1 2 1 2 1 2 1 1 2 2 2 2 1 1 1 1 1 2 1 2 2 1 2 2 1 2 1 1 2 1 1
## [297] 1 2 2 2 2 1 2 2 1 1 2 1 1 2 1 2 2 2 2 2 2 2 1 2 1 2 2 1 1 1 1 1 1 1 1 1 2
## [334] 2 2 1 1 2 1 1 1 1 1 1 1 2 2 1 1 2 1 2 2 1 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1
## [371] 2 1 1 2 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 2 1 1 1 1 1 2 1 1 2 2 2 1 1 2 1 1
## [408] 1 1 1 2 2 2 1 2 1 1 2 1 2 1 2 2 1 2 2 2 2 1 1 1 2 1 1 1 1 2 1 1 2 1 1 1 1
## [445] 2 2 1 1 2 1 1 1 2 1 2 1 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1
## [482] 1 1 1 1 1 1 1 1 1 2 1 1 2 2 1 2 2 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [519] 1 2 1 2 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 2 1 1 1 2 1 1 2 1
## [556] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 1 1 2
## [593] 1 1 2 1 1 1 2 1 2 1 2 1 2 2 1 1 1 2 1 1 2 1 1 2 2 1 1 1 2 2 1 2 2 2 2 2 2
## [630] 2 2 1 2 2 2 2 2 2 2 1 1 2 1 2 1 2 2 2 2 2 1 2 1 1 1 1 1 1 2 2 2 2 2 1 1 2
## [667] 1 1 1 1 2 2 2 2 1 1 1 2 2 1 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 1 1 2 2
## [704] 1 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2
## [741] 2 2 2 1 1 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2
## [778] 2 1 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 1 1 1 2 2 2 1 1 2
## [815] 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 1 2 1 2 1 2 1 2 2 2 2 2 1
## [852] 1 1 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 2 2 2 1 2 2 2 2 1
## [889] 2 1 2 2 1 2 2 2 1 2 1 2 1 2 2 2 2 2 2 2 2 2 1 1 1 1 2 1 2 2 2 2 1 2 2 2 1
## [926] 2 1 2 1 1 2 2 2 2 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 2 1 2
## [963] 2 1 1 1 1 2 1 2 1 1 1 1 1 2 2 2 2 1 1 2 2 1 1 2 1 2 2 1 2 2 2 2 1 2 2 2 2
## [1000] 2 2 1 1 2 2 2 1 1 1 1 1 1 2 2 2 1 1 2 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2 2 2 1
## [1037] 2 2 1 2 2 2 2 1 2 2 2 2 1 1 2 1 2 1 2 2 1 2 1 1 1 1 1 1 2 2 2 1 1 2 1 2 2
## [1074] 2 2 1 1 1 1 1 1 1 2 1 2 2 1 1 1 1 1 1 2 1 2 1 2 1 1 1 1 2 2 2 2 2 1 1 2 1
## [1111] 2 2 1 1 2 2 2 2 2 2 2 2 2 1 2 1 2 2 1 1 2 2 2 2 1 2 1 1 2 2 2 1 2 2 2 1 2
## [1148] 1 2 1 1 2 2 1 2 2 1 2 1 1 1 1 1 2 2 1 1 1 2 2 1 2 1 2 2 2 2 2 2 1 1 1 1 2
## [1185] 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 1 2 2 2 2 2 1 1 1 2 2 1 1 1
## [1222] 1 2 1 1 1 2 1 2 2 1 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [1259] 2 2 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 1 2 2 1 1 2 2 2 2 2
## [1296] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 2 1 1 2 2 2 2 2 2 2 2
## [1333] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 2 2 2 2 1 2
## [1370] 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1
## [1407] 1 2 1 2 2 2 1 1 1 2 1 2 2 2 2 2 2 2 1 1 1 2 2 1 2 2 2 2 1 1 1 2 2 2 2 2 2
## [1444] 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 1 2 2 1
## [1481] 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 1 1 2 2 2 2 2 2 2
## [1518] 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 1 2 2 2 2 2
## [1555] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [1592] 2 2 2 2 2 2 2 2
##
## Within cluster sum of squares by cluster:
## [1] 6810.586 7510.572
## (between_SS / total_SS = 18.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(
dataK2, data = data.norm,
geom = "point",
show.clust.cent = TRUE
)
dataK3 <- kmeans(data.norm, centers = 3, nstart = 25)
dataK4 <- kmeans(data.norm, centers = 4, nstart = 25)
dataK5 <- kmeans(data.norm, centers = 5, nstart = 25)
p1 <- fviz_cluster(dataK2, geom = "point", data = data.norm) + ggtitle(" K = 2")
p2 <- fviz_cluster(dataK3, geom = "point", data = data.norm) + ggtitle(" K = 3")
p3 <- fviz_cluster(dataK4, geom = "point", data = data.norm) + ggtitle(" K = 4")
p4 <- fviz_cluster(dataK5, geom = "point", data = data.norm) + ggtitle(" K = 5")
grid.arrange(p1, p2, p3, p4, nrow = 2)
fviz_nbclust(x = data.norm,FUNcluster = kmeans, method = 'wss' )
fviz_nbclust(x = data.norm,FUNcluster = kmeans, method = 'silhouette' )
set.seed(123)
final <- kmeans(data.norm, centers = 2, nstart = 25)
print(final)
## K-means clustering with 2 clusters of sizes 590, 1009
##
## Cluster means:
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 0.9207271 -0.6361774 0.9851367 0.16097077 0.2641746
## 2 -0.5383835 0.3719967 -0.5760462 -0.09412562 -0.1544728
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates
## 1 -0.2356324 -0.2152177 0.4994664 -0.7077417 0.5314734
## 2 0.1377831 0.1258458 -0.2920566 0.4138430 -0.3107723
## alcohol
## 1 0.1730540
## 2 -0.1011911
##
## Clustering vector:
## [1] 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 1 1 2 1 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2
## [38] 1 2 1 1 2 1 1 2 2 2 1 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [75] 1 1 1 2 2 2 2 1 2 1 2 2 1 2 1 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2
## [112] 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
## [149] 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2
## [186] 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 1 2 2 2 1 1 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2
## [223] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1 2 2 2 2 2 1 2 1 2 2 2 1 2 1
## [260] 1 2 2 2 2 1 1 2 1 2 1 2 1 1 2 2 2 2 1 1 1 1 1 2 1 2 2 1 2 2 1 2 1 1 2 1 1
## [297] 1 2 2 2 2 1 2 2 1 1 2 1 1 2 1 2 2 2 2 2 2 2 1 2 1 2 2 1 1 1 1 1 1 1 1 1 2
## [334] 2 2 1 1 2 1 1 1 1 1 1 1 2 2 1 1 2 1 2 2 1 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1
## [371] 2 1 1 2 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 2 1 1 1 1 1 2 1 1 2 2 2 1 1 2 1 1
## [408] 1 1 1 2 2 2 1 2 1 1 2 1 2 1 2 2 1 2 2 2 2 1 1 1 2 1 1 1 1 2 1 1 2 1 1 1 1
## [445] 2 2 1 1 2 1 1 1 2 1 2 1 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1
## [482] 1 1 1 1 1 1 1 1 1 2 1 1 2 2 1 2 2 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [519] 1 2 1 2 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 2 1 1 1 2 1 1 2 1
## [556] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 1 1 2
## [593] 1 1 2 1 1 1 2 1 2 1 2 1 2 2 1 1 1 2 1 1 2 1 1 2 2 1 1 1 2 2 1 2 2 2 2 2 2
## [630] 2 2 1 2 2 2 2 2 2 2 1 1 2 1 2 1 2 2 2 2 2 1 2 1 1 1 1 1 1 2 2 2 2 2 1 1 2
## [667] 1 1 1 1 2 2 2 2 1 1 1 2 2 1 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 1 1 2 2
## [704] 1 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2
## [741] 2 2 2 1 1 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2
## [778] 2 1 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 1 1 1 2 2 2 1 1 2
## [815] 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 1 2 1 2 1 2 1 2 2 2 2 2 1
## [852] 1 1 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 2 2 2 1 2 2 2 2 1
## [889] 2 1 2 2 1 2 2 2 1 2 1 2 1 2 2 2 2 2 2 2 2 2 1 1 1 1 2 1 2 2 2 2 1 2 2 2 1
## [926] 2 1 2 1 1 2 2 2 2 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 2 1 2
## [963] 2 1 1 1 1 2 1 2 1 1 1 1 1 2 2 2 2 1 1 2 2 1 1 2 1 2 2 1 2 2 2 2 1 2 2 2 2
## [1000] 2 2 1 1 2 2 2 1 1 1 1 1 1 2 2 2 1 1 2 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2 2 2 1
## [1037] 2 2 1 2 2 2 2 1 2 2 2 2 1 1 2 1 2 1 2 2 1 2 1 1 1 1 1 1 2 2 2 1 1 2 1 2 2
## [1074] 2 2 1 1 1 1 1 1 1 2 1 2 2 1 1 1 1 1 1 2 1 2 1 2 1 1 1 1 2 2 2 2 2 1 1 2 1
## [1111] 2 2 1 1 2 2 2 2 2 2 2 2 2 1 2 1 2 2 1 1 2 2 2 2 1 2 1 1 2 2 2 1 2 2 2 1 2
## [1148] 1 2 1 1 2 2 1 2 2 1 2 1 1 1 1 1 2 2 1 1 1 2 2 1 2 1 2 2 2 2 2 2 1 1 1 1 2
## [1185] 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 1 2 2 2 2 2 1 1 1 2 2 1 1 1
## [1222] 1 2 1 1 1 2 1 2 2 1 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [1259] 2 2 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 1 2 2 1 1 2 2 2 2 2
## [1296] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 2 1 1 2 2 2 2 2 2 2 2
## [1333] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 2 2 2 2 1 2
## [1370] 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1
## [1407] 1 2 1 2 2 2 1 1 1 2 1 2 2 2 2 2 2 2 1 1 1 2 2 1 2 2 2 2 1 1 1 2 2 2 2 2 2
## [1444] 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 1 2 2 1
## [1481] 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 1 1 2 2 2 2 2 2 2
## [1518] 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 1 2 2 2 2 2
## [1555] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [1592] 2 2 2 2 2 2 2 2
##
## Within cluster sum of squares by cluster:
## [1] 6810.586 7510.572
## (between_SS / total_SS = 18.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(final, data = data.norm)
data.norm %>%
mutate(Cluster = final$cluster) %>%
group_by(Cluster) %>%
summarize_all('median')
## # A tibble: 2 × 12
## Cluster fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.908 -0.714 1.02 -0.169 -0.116
## 2 2 -0.586 0.347 -0.672 -0.311 -0.201
## # ℹ 6 more variables: free.sulfur.dioxide <dbl>, total.sulfur.dioxide <dbl>,
## # density <dbl>, pH <dbl>, sulphates <dbl>, alcohol <dbl>