RED WINE

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Importing Data & cleaning data to produce data table in R

library(GGally)

## Warning: package 'GGally' was built under R version 4.4.3

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.4.3

library(corrplot)

## Warning: package 'corrplot' was built under R version 4.4.3

## corrplot 0.95 loaded

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.4.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(inspectdf)

## Warning: package 'inspectdf' was built under R version 4.4.3

library(ggplot2) 
library(gridExtra)

## Warning: package 'gridExtra' was built under R version 4.4.3

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

library(caret)

## Warning: package 'caret' was built under R version 4.4.3

## Loading required package: lattice

library(readr)

## Warning: package 'readr' was built under R version 4.4.3

library(stats)
library(factoextra)

## Warning: package 'factoextra' was built under R version 4.4.3

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(scales)

## Warning: package 'scales' was built under R version 4.4.3

## 
## Attaching package: 'scales'

## The following object is masked from 'package:readr':
## 
##     col_factor

library(Hmisc)

## Warning: package 'Hmisc' was built under R version 4.4.3

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(ggfortify)

## Warning: package 'ggfortify' was built under R version 4.4.3

wine <- read.csv(
  "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv",
  
  sep = ";",           # split on semicolons
  quote = "\"",        # strip the quotes in the header
  stringsAsFactors = FALSE,
  check.names = TRUE   # makes names like fixed.acidity, volatile.acidity, ...
)

Exploratory data analysis

data <- wine[-12]
summary(data)

##  fixed.acidity   volatile.acidity  citric.acid    residual.sugar  
##  Min.   : 4.60   Min.   :0.1200   Min.   :0.000   Min.   : 0.900  
##  1st Qu.: 7.10   1st Qu.:0.3900   1st Qu.:0.090   1st Qu.: 1.900  
##  Median : 7.90   Median :0.5200   Median :0.260   Median : 2.200  
##  Mean   : 8.32   Mean   :0.5278   Mean   :0.271   Mean   : 2.539  
##  3rd Qu.: 9.20   3rd Qu.:0.6400   3rd Qu.:0.420   3rd Qu.: 2.600  
##  Max.   :15.90   Max.   :1.5800   Max.   :1.000   Max.   :15.500  
##    chlorides       free.sulfur.dioxide total.sulfur.dioxide    density      
##  Min.   :0.01200   Min.   : 1.00       Min.   :  6.00       Min.   :0.9901  
##  1st Qu.:0.07000   1st Qu.: 7.00       1st Qu.: 22.00       1st Qu.:0.9956  
##  Median :0.07900   Median :14.00       Median : 38.00       Median :0.9968  
##  Mean   :0.08747   Mean   :15.87       Mean   : 46.47       Mean   :0.9967  
##  3rd Qu.:0.09000   3rd Qu.:21.00       3rd Qu.: 62.00       3rd Qu.:0.9978  
##  Max.   :0.61100   Max.   :72.00       Max.   :289.00       Max.   :1.0037  
##        pH          sulphates         alcohol     
##  Min.   :2.740   Min.   :0.3300   Min.   : 8.40  
##  1st Qu.:3.210   1st Qu.:0.5500   1st Qu.: 9.50  
##  Median :3.310   Median :0.6200   Median :10.20  
##  Mean   :3.311   Mean   :0.6581   Mean   :10.42  
##  3rd Qu.:3.400   3rd Qu.:0.7300   3rd Qu.:11.10  
##  Max.   :4.010   Max.   :2.0000   Max.   :14.90

str(data)

## 'data.frame':    1599 obs. of  11 variables:
##  $ fixed.acidity       : num  7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
##  $ volatile.acidity    : num  0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
##  $ citric.acid         : num  0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
##  $ residual.sugar      : num  1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
##  $ chlorides           : num  0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
##  $ free.sulfur.dioxide : num  11 25 15 17 11 13 15 15 9 17 ...
##  $ total.sulfur.dioxide: num  34 67 54 60 34 40 59 21 18 102 ...
##  $ density             : num  0.998 0.997 0.997 0.998 0.998 ...
##  $ pH                  : num  3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
##  $ sulphates           : num  0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
##  $ alcohol             : num  9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...

hist.data.frame(data)

preproc <- preProcess(data, method=c("center", "scale"))
data.norm <- predict(preproc, data)

GGally::ggcorr(data.norm, hjust = 1, layout.exp = 2, label = T, label_size = 2.9)

PCA

missingincols <- sapply(data, function(x) sum(is.na(x))/nrow(data))
percent(missingincols)

##        fixed.acidity     volatile.acidity          citric.acid 
##                 "0%"                 "0%"                 "0%" 
##       residual.sugar            chlorides  free.sulfur.dioxide 
##                 "0%"                 "0%"                 "0%" 
## total.sulfur.dioxide              density                   pH 
##                 "0%"                 "0%"                 "0%" 
##            sulphates              alcohol 
##                 "0%"                 "0%"

pca<- prcomp(data.norm, center=FALSE, scale.=FALSE)
pca$rotation

##                              PC1          PC2         PC3          PC4
## fixed.acidity         0.48931422  0.110502738 -0.12330157  0.229617370
## volatile.acidity     -0.23858436 -0.274930480 -0.44996253 -0.078959783
## citric.acid           0.46363166  0.151791356  0.23824707  0.079418256
## residual.sugar        0.14610715 -0.272080238  0.10128338  0.372792562
## chlorides             0.21224658 -0.148051555 -0.09261383 -0.666194756
## free.sulfur.dioxide  -0.03615752 -0.513566812  0.42879287  0.043537818
## total.sulfur.dioxide  0.02357485 -0.569486959  0.32241450  0.034577115
## density               0.39535301 -0.233575490 -0.33887135  0.174499758
## pH                   -0.43851962 -0.006710793  0.05769735  0.003787746
## sulphates             0.24292133  0.037553916  0.27978615 -0.550872362
## alcohol              -0.11323206  0.386180959  0.47167322  0.122181088
##                              PC5         PC6         PC7         PC8
## fixed.acidity        -0.08261366 -0.10147858  0.35022736 -0.17759545
## volatile.acidity      0.21873452 -0.41144893  0.53373510 -0.07877531
## citric.acid          -0.05857268 -0.06959338 -0.10549701 -0.37751558
## residual.sugar        0.73214429 -0.04915555 -0.29066341  0.29984469
## chlorides             0.24650090 -0.30433857 -0.37041337 -0.35700936
## free.sulfur.dioxide  -0.15915198  0.01400021  0.11659611 -0.20478050
## total.sulfur.dioxide -0.22246456 -0.13630755  0.09366237  0.01903597
## density               0.15707671  0.39115230  0.17048116 -0.23922267
## pH                    0.26752977  0.52211645  0.02513762 -0.56139075
## sulphates             0.22596222  0.38126343  0.44746911  0.37460432
## alcohol               0.35068141 -0.36164504  0.32765090 -0.21762556
##                               PC9        PC10         PC11
## fixed.acidity        -0.194020908  0.24952314 -0.639691452
## volatile.acidity      0.129110301 -0.36592473 -0.002388597
## citric.acid           0.381449669 -0.62167708  0.070910304
## residual.sugar       -0.007522949 -0.09287208 -0.184029964
## chlorides            -0.111338666  0.21767112 -0.053065322
## free.sulfur.dioxide  -0.635405218 -0.24848326  0.051420865
## total.sulfur.dioxide  0.592115893  0.37075027 -0.068701598
## density              -0.020718675  0.23999012  0.567331898
## pH                    0.167745886  0.01096960 -0.340710903
## sulphates             0.058367062 -0.11232046 -0.069555381
## alcohol              -0.037603106  0.30301450  0.314525906

data.eigen<-eigen(cov(data.norm))
data.eigen$values

##  [1] 3.09913244 1.92590969 1.55054349 1.21323253 0.95929207 0.65960826
##  [7] 0.58379122 0.42295670 0.34464212 0.18133317 0.05955831

fviz_eig(pca, choice = "eigenvalue", ncp = 25, barfill = "red", barcolor = "black", linecolor = "black",  addlabels = TRUE)

summary(pca)

## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     1.7604 1.3878 1.2452 1.1015 0.97943 0.81216 0.76406
## Proportion of Variance 0.2817 0.1751 0.1410 0.1103 0.08721 0.05996 0.05307
## Cumulative Proportion  0.2817 0.4568 0.5978 0.7081 0.79528 0.85525 0.90832
##                            PC8     PC9    PC10    PC11
## Standard deviation     0.65035 0.58706 0.42583 0.24405
## Proportion of Variance 0.03845 0.03133 0.01648 0.00541
## Cumulative Proportion  0.94677 0.97810 0.99459 1.00000

fviz_eig(pca,  ncp = 25, barfill = "red", barcolor = "black", linecolor = "black",  addlabels = TRUE)

fviz_pca_var(pca, col.var="contrib")+
  scale_color_gradient2(low="red", mid="blue", high="green3", midpoint=10)

autoplot(pca, loadings=TRUE, loadings.colour='red', loadings.label=TRUE, loadings.label.size=3)

PC1 <- fviz_contrib(pca, choice = "var", axes = 1,fill = "red",color = "red")
PC2 <- fviz_contrib(pca, choice = "var", axes = 2,fill = "red",color = "red")
PC3 <- fviz_contrib(pca, choice = "var", axes = 3,fill = "red",color = "red")
PC4 <- fviz_contrib(pca, choice = "var", axes = 4,fill = "red",color = "red")

grid.arrange(PC1, PC2, PC3, PC4,ncol=2, nrow=2)

PC1: fixed acidity, citric acid, pH and density PC2: alcohol, quality, total sulfur dioxide, volatile acidity and density PC3: free sulfur dioxide and total sulfur dioxide PC4: chlorides, sulphates and residual sugar

K-means clustering:

set.seed(123)

dataK2 <- kmeans(data.norm, centers = 2, nstart = 25)
print(dataK2)

## K-means clustering with 2 clusters of sizes 590, 1009
## 
## Cluster means:
##   fixed.acidity volatile.acidity citric.acid residual.sugar  chlorides
## 1     0.9207271       -0.6361774   0.9851367     0.16097077  0.2641746
## 2    -0.5383835        0.3719967  -0.5760462    -0.09412562 -0.1544728
##   free.sulfur.dioxide total.sulfur.dioxide    density         pH  sulphates
## 1          -0.2356324           -0.2152177  0.4994664 -0.7077417  0.5314734
## 2           0.1377831            0.1258458 -0.2920566  0.4138430 -0.3107723
##      alcohol
## 1  0.1730540
## 2 -0.1011911
## 
## Clustering vector:
##    [1] 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 1 1 2 1 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2
##   [38] 1 2 1 1 2 1 1 2 2 2 1 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2
##   [75] 1 1 1 2 2 2 2 1 2 1 2 2 1 2 1 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2
##  [112] 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
##  [149] 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2
##  [186] 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 1 2 2 2 1 1 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2
##  [223] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1 2 2 2 2 2 1 2 1 2 2 2 1 2 1
##  [260] 1 2 2 2 2 1 1 2 1 2 1 2 1 1 2 2 2 2 1 1 1 1 1 2 1 2 2 1 2 2 1 2 1 1 2 1 1
##  [297] 1 2 2 2 2 1 2 2 1 1 2 1 1 2 1 2 2 2 2 2 2 2 1 2 1 2 2 1 1 1 1 1 1 1 1 1 2
##  [334] 2 2 1 1 2 1 1 1 1 1 1 1 2 2 1 1 2 1 2 2 1 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1
##  [371] 2 1 1 2 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 2 1 1 1 1 1 2 1 1 2 2 2 1 1 2 1 1
##  [408] 1 1 1 2 2 2 1 2 1 1 2 1 2 1 2 2 1 2 2 2 2 1 1 1 2 1 1 1 1 2 1 1 2 1 1 1 1
##  [445] 2 2 1 1 2 1 1 1 2 1 2 1 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1
##  [482] 1 1 1 1 1 1 1 1 1 2 1 1 2 2 1 2 2 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [519] 1 2 1 2 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 2 1 1 1 2 1 1 2 1
##  [556] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 1 1 2
##  [593] 1 1 2 1 1 1 2 1 2 1 2 1 2 2 1 1 1 2 1 1 2 1 1 2 2 1 1 1 2 2 1 2 2 2 2 2 2
##  [630] 2 2 1 2 2 2 2 2 2 2 1 1 2 1 2 1 2 2 2 2 2 1 2 1 1 1 1 1 1 2 2 2 2 2 1 1 2
##  [667] 1 1 1 1 2 2 2 2 1 1 1 2 2 1 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 1 1 2 2
##  [704] 1 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2
##  [741] 2 2 2 1 1 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2
##  [778] 2 1 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 1 1 1 2 2 2 1 1 2
##  [815] 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 1 2 1 2 1 2 1 2 2 2 2 2 1
##  [852] 1 1 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 2 2 2 1 2 2 2 2 1
##  [889] 2 1 2 2 1 2 2 2 1 2 1 2 1 2 2 2 2 2 2 2 2 2 1 1 1 1 2 1 2 2 2 2 1 2 2 2 1
##  [926] 2 1 2 1 1 2 2 2 2 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 2 1 2
##  [963] 2 1 1 1 1 2 1 2 1 1 1 1 1 2 2 2 2 1 1 2 2 1 1 2 1 2 2 1 2 2 2 2 1 2 2 2 2
## [1000] 2 2 1 1 2 2 2 1 1 1 1 1 1 2 2 2 1 1 2 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2 2 2 1
## [1037] 2 2 1 2 2 2 2 1 2 2 2 2 1 1 2 1 2 1 2 2 1 2 1 1 1 1 1 1 2 2 2 1 1 2 1 2 2
## [1074] 2 2 1 1 1 1 1 1 1 2 1 2 2 1 1 1 1 1 1 2 1 2 1 2 1 1 1 1 2 2 2 2 2 1 1 2 1
## [1111] 2 2 1 1 2 2 2 2 2 2 2 2 2 1 2 1 2 2 1 1 2 2 2 2 1 2 1 1 2 2 2 1 2 2 2 1 2
## [1148] 1 2 1 1 2 2 1 2 2 1 2 1 1 1 1 1 2 2 1 1 1 2 2 1 2 1 2 2 2 2 2 2 1 1 1 1 2
## [1185] 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 1 2 2 2 2 2 1 1 1 2 2 1 1 1
## [1222] 1 2 1 1 1 2 1 2 2 1 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [1259] 2 2 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 1 2 2 1 1 2 2 2 2 2
## [1296] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 2 1 1 2 2 2 2 2 2 2 2
## [1333] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 2 2 2 2 1 2
## [1370] 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1
## [1407] 1 2 1 2 2 2 1 1 1 2 1 2 2 2 2 2 2 2 1 1 1 2 2 1 2 2 2 2 1 1 1 2 2 2 2 2 2
## [1444] 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 1 2 2 1
## [1481] 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 1 1 2 2 2 2 2 2 2
## [1518] 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 1 2 2 2 2 2
## [1555] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [1592] 2 2 2 2 2 2 2 2
## 
## Within cluster sum of squares by cluster:
## [1] 6810.586 7510.572
##  (between_SS / total_SS =  18.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

fviz_cluster(
  dataK2, data = data.norm,
  geom = "point",
  show.clust.cent = TRUE
)

dataK3 <- kmeans(data.norm, centers = 3, nstart = 25)
dataK4 <- kmeans(data.norm, centers = 4, nstart = 25)
dataK5 <- kmeans(data.norm, centers = 5, nstart = 25)

p1 <- fviz_cluster(dataK2, geom = "point", data = data.norm) + ggtitle(" K = 2")
p2 <- fviz_cluster(dataK3, geom = "point", data = data.norm) + ggtitle(" K = 3")
p3 <- fviz_cluster(dataK4, geom = "point", data = data.norm) + ggtitle(" K = 4")
p4 <- fviz_cluster(dataK5, geom = "point", data = data.norm) + ggtitle(" K = 5")

grid.arrange(p1, p2, p3, p4, nrow = 2)

fviz_nbclust(x = data.norm,FUNcluster = kmeans, method = 'wss' )

fviz_nbclust(x = data.norm,FUNcluster = kmeans, method = 'silhouette' )

set.seed(123)
final <- kmeans(data.norm, centers = 2, nstart = 25)
print(final)

## K-means clustering with 2 clusters of sizes 590, 1009
## 
## Cluster means:
##   fixed.acidity volatile.acidity citric.acid residual.sugar  chlorides
## 1     0.9207271       -0.6361774   0.9851367     0.16097077  0.2641746
## 2    -0.5383835        0.3719967  -0.5760462    -0.09412562 -0.1544728
##   free.sulfur.dioxide total.sulfur.dioxide    density         pH  sulphates
## 1          -0.2356324           -0.2152177  0.4994664 -0.7077417  0.5314734
## 2           0.1377831            0.1258458 -0.2920566  0.4138430 -0.3107723
##      alcohol
## 1  0.1730540
## 2 -0.1011911
## 
## Clustering vector:
##    [1] 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 1 1 2 1 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2
##   [38] 1 2 1 1 2 1 1 2 2 2 1 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2
##   [75] 1 1 1 2 2 2 2 1 2 1 2 2 1 2 1 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2
##  [112] 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
##  [149] 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2
##  [186] 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 1 2 2 2 1 1 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2
##  [223] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1 2 2 2 2 2 1 2 1 2 2 2 1 2 1
##  [260] 1 2 2 2 2 1 1 2 1 2 1 2 1 1 2 2 2 2 1 1 1 1 1 2 1 2 2 1 2 2 1 2 1 1 2 1 1
##  [297] 1 2 2 2 2 1 2 2 1 1 2 1 1 2 1 2 2 2 2 2 2 2 1 2 1 2 2 1 1 1 1 1 1 1 1 1 2
##  [334] 2 2 1 1 2 1 1 1 1 1 1 1 2 2 1 1 2 1 2 2 1 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1
##  [371] 2 1 1 2 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 2 1 1 1 1 1 2 1 1 2 2 2 1 1 2 1 1
##  [408] 1 1 1 2 2 2 1 2 1 1 2 1 2 1 2 2 1 2 2 2 2 1 1 1 2 1 1 1 1 2 1 1 2 1 1 1 1
##  [445] 2 2 1 1 2 1 1 1 2 1 2 1 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1
##  [482] 1 1 1 1 1 1 1 1 1 2 1 1 2 2 1 2 2 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [519] 1 2 1 2 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 2 1 1 1 2 1 1 2 1
##  [556] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 1 1 2
##  [593] 1 1 2 1 1 1 2 1 2 1 2 1 2 2 1 1 1 2 1 1 2 1 1 2 2 1 1 1 2 2 1 2 2 2 2 2 2
##  [630] 2 2 1 2 2 2 2 2 2 2 1 1 2 1 2 1 2 2 2 2 2 1 2 1 1 1 1 1 1 2 2 2 2 2 1 1 2
##  [667] 1 1 1 1 2 2 2 2 1 1 1 2 2 1 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 1 1 2 2
##  [704] 1 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2
##  [741] 2 2 2 1 1 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2
##  [778] 2 1 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 1 1 1 2 2 2 1 1 2
##  [815] 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 1 2 1 2 1 2 1 2 2 2 2 2 1
##  [852] 1 1 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 2 2 2 1 2 2 2 2 1
##  [889] 2 1 2 2 1 2 2 2 1 2 1 2 1 2 2 2 2 2 2 2 2 2 1 1 1 1 2 1 2 2 2 2 1 2 2 2 1
##  [926] 2 1 2 1 1 2 2 2 2 2 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 2 1 2
##  [963] 2 1 1 1 1 2 1 2 1 1 1 1 1 2 2 2 2 1 1 2 2 1 1 2 1 2 2 1 2 2 2 2 1 2 2 2 2
## [1000] 2 2 1 1 2 2 2 1 1 1 1 1 1 2 2 2 1 1 2 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2 2 2 1
## [1037] 2 2 1 2 2 2 2 1 2 2 2 2 1 1 2 1 2 1 2 2 1 2 1 1 1 1 1 1 2 2 2 1 1 2 1 2 2
## [1074] 2 2 1 1 1 1 1 1 1 2 1 2 2 1 1 1 1 1 1 2 1 2 1 2 1 1 1 1 2 2 2 2 2 1 1 2 1
## [1111] 2 2 1 1 2 2 2 2 2 2 2 2 2 1 2 1 2 2 1 1 2 2 2 2 1 2 1 1 2 2 2 1 2 2 2 1 2
## [1148] 1 2 1 1 2 2 1 2 2 1 2 1 1 1 1 1 2 2 1 1 1 2 2 1 2 1 2 2 2 2 2 2 1 1 1 1 2
## [1185] 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 1 2 2 2 2 2 1 1 1 2 2 1 1 1
## [1222] 1 2 1 1 1 2 1 2 2 1 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [1259] 2 2 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 1 2 2 1 1 2 2 2 2 2
## [1296] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 2 1 1 2 2 2 2 2 2 2 2
## [1333] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 2 2 2 2 1 2
## [1370] 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1
## [1407] 1 2 1 2 2 2 1 1 1 2 1 2 2 2 2 2 2 2 1 1 1 2 2 1 2 2 2 2 1 1 1 2 2 2 2 2 2
## [1444] 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 1 2 2 1
## [1481] 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 1 1 2 2 2 2 2 2 2
## [1518] 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 1 2 2 2 2 2
## [1555] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [1592] 2 2 2 2 2 2 2 2
## 
## Within cluster sum of squares by cluster:
## [1] 6810.586 7510.572
##  (between_SS / total_SS =  18.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

fviz_cluster(final, data = data.norm)

data.norm %>% 
  mutate(Cluster = final$cluster) %>%
  group_by(Cluster) %>%
  summarize_all('median')

## # A tibble: 2 × 12
##   Cluster fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
##     <int>         <dbl>            <dbl>       <dbl>          <dbl>     <dbl>
## 1       1         0.908           -0.714       1.02          -0.169    -0.116
## 2       2        -0.586            0.347      -0.672         -0.311    -0.201
## # ℹ 6 more variables: free.sulfur.dioxide <dbl>, total.sulfur.dioxide <dbl>,
## #   density <dbl>, pH <dbl>, sulphates <dbl>, alcohol <dbl>

RED WINE

j.hawes

2025-10-13

R Markdown