K Means Clustering Project Usually when dealing with an unsupervised learning problem, its difficult to get a good measure of how well the model performed. For this project, we will use data from the UCI archive based off of red and white wines (this is a very commonly used data set in ML).

We will then add a label to the a combined data set, we’ll bring this label back later to see how well we can cluster the wine into groups.

Get the Data

library(readr)
df1 <- read.csv('winequality-red.csv',sep=';')
df2 <- read.csv('winequality-white.csv',sep=';')

Checking the head of both dataset

df1$label <- sapply(df1$pH,function(x){'red'})
df2$label <- sapply(df2$pH,function(x){'white'})
head(df1)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.4             0.70        0.00            1.9     0.076
## 2           7.8             0.88        0.00            2.6     0.098
## 3           7.8             0.76        0.04            2.3     0.092
## 4          11.2             0.28        0.56            1.9     0.075
## 5           7.4             0.70        0.00            1.9     0.076
## 6           7.4             0.66        0.00            1.8     0.075
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  11                   34  0.9978 3.51      0.56     9.4
## 2                  25                   67  0.9968 3.20      0.68     9.8
## 3                  15                   54  0.9970 3.26      0.65     9.8
## 4                  17                   60  0.9980 3.16      0.58     9.8
## 5                  11                   34  0.9978 3.51      0.56     9.4
## 6                  13                   40  0.9978 3.51      0.56     9.4
##   quality label
## 1       5   red
## 2       5   red
## 3       5   red
## 4       6   red
## 5       5   red
## 6       5   red
head(df2)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.0             0.27        0.36           20.7     0.045
## 2           6.3             0.30        0.34            1.6     0.049
## 3           8.1             0.28        0.40            6.9     0.050
## 4           7.2             0.23        0.32            8.5     0.058
## 5           7.2             0.23        0.32            8.5     0.058
## 6           8.1             0.28        0.40            6.9     0.050
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  45                  170  1.0010 3.00      0.45     8.8
## 2                  14                  132  0.9940 3.30      0.49     9.5
## 3                  30                   97  0.9951 3.26      0.44    10.1
## 4                  47                  186  0.9956 3.19      0.40     9.9
## 5                  47                  186  0.9956 3.19      0.40     9.9
## 6                  30                   97  0.9951 3.26      0.44    10.1
##   quality label
## 1       6 white
## 2       6 white
## 3       6 white
## 4       6 white
## 5       6 white
## 6       6 white
wine <- rbind(df1,df2)
str(wine)
## 'data.frame':    6497 obs. of  13 variables:
##  $ fixed.acidity       : num  7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
##  $ volatile.acidity    : num  0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
##  $ citric.acid         : num  0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
##  $ residual.sugar      : num  1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
##  $ chlorides           : num  0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
##  $ free.sulfur.dioxide : num  11 25 15 17 11 13 15 15 9 17 ...
##  $ total.sulfur.dioxide: num  34 67 54 60 34 40 59 21 18 102 ...
##  $ density             : num  0.998 0.997 0.997 0.998 0.998 ...
##  $ pH                  : num  3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
##  $ sulphates           : num  0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
##  $ alcohol             : num  9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
##  $ quality             : int  5 5 5 6 5 5 5 7 7 5 ...
##  $ label               : chr  "red" "red" "red" "red" ...

EDA

library(ggplot2)
pl <- ggplot(wine,aes(x=residual.sugar)) + geom_histogram(aes(fill=label),color='black',bins=50)
pl + scale_fill_manual(values = c('#ae4554','#F2E4B7')) + theme_bw()

ggplot(wine,aes(citric.acid))+geom_histogram(bins = 50,aes(fill=label),color='black')+
  scale_fill_manual(values = c('#ae4554','#F2E4B7'))

ggplot(wine,aes(alcohol))+geom_histogram(bins = 50,aes(fill=label),color='black')+
  scale_fill_manual(values = c('#ae4554','#F2E4B7'))

ggplot(wine,aes(citric.acid,residual.sugar))+geom_point(alpha=0.2,aes(color=label))+theme_dark()+
  scale_color_manual(values = c('#ae4554','#faf7ea'))

ggplot(wine,aes(volatile.acidity,residual.sugar))+geom_point(alpha=0.2,aes(color=label))+theme_dark()+
  scale_color_manual(values = c('#ae4554','#faf7ea'))

Creating the clus.data without the lable column

clus.data <- wine[,1:12]
head(clus.data)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.4             0.70        0.00            1.9     0.076
## 2           7.8             0.88        0.00            2.6     0.098
## 3           7.8             0.76        0.04            2.3     0.092
## 4          11.2             0.28        0.56            1.9     0.075
## 5           7.4             0.70        0.00            1.9     0.076
## 6           7.4             0.66        0.00            1.8     0.075
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  11                   34  0.9978 3.51      0.56     9.4
## 2                  25                   67  0.9968 3.20      0.68     9.8
## 3                  15                   54  0.9970 3.26      0.65     9.8
## 4                  17                   60  0.9980 3.16      0.58     9.8
## 5                  11                   34  0.9978 3.51      0.56     9.4
## 6                  13                   40  0.9978 3.51      0.56     9.4
##   quality
## 1       5
## 2       5
## 3       5
## 4       6
## 5       5
## 6       5

Buliding the clusters

wine.cluster <-kmeans(clus.data,2)
print(wine.cluster)
## K-means clustering with 2 clusters of sizes 2808, 3689
## 
## Cluster means:
##   fixed.acidity volatile.acidity citric.acid residual.sugar  chlorides
## 1      7.623219        0.4086378   0.2908725       3.076425 0.06580983
## 2      6.904812        0.2871659   0.3397642       7.244809 0.04859257
##   free.sulfur.dioxide total.sulfur.dioxide   density       pH sulphates
## 1            18.39868             63.26318 0.9945736 3.254882 0.5724145
## 2            39.75590            155.69246 0.9947903 3.190808 0.4999485
##    alcohol  quality
## 1 10.79722 5.810541
## 2 10.25932 5.824343
## 
## Clustering vector:
##    [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1
##   [38] 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1
##   [75] 1 1 1 1 1 2 1 1 1 1 1 1 2 1 2 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1
##  [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1
##  [149] 1 1 1 1 1 1 2 2 2 2 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [186] 1 1 1 2 2 2 1 2 1 1 2 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 2 1 1
##  [223] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1
##  [260] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2
##  [334] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [371] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1
##  [408] 1 1 1 1 1 1 1 2 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [482] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1
##  [519] 1 1 1 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [556] 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2
##  [593] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [630] 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [667] 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1
##  [704] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [741] 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1
##  [778] 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [815] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [852] 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [889] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [926] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [963] 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1037] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1074] 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1111] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1148] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1185] 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1222] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1259] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1296] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1333] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1370] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1
## [1407] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1444] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1481] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1518] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1555] 1 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1592] 1 1 1 1 1 1 1 1 2 2 1 2 2 1 2 2 2 2 1 1 1 2 2 2 1 1 2 2 1 1 2 2 2 2 2 2 2
## [1629] 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 1 2 2
## [1666] 2 2 1 1 2 2 2 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2
## [1703] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 1
## [1740] 1 2 1 1 1 2 2 1 1 2 2 2 2 1 1 2 2 2 2 2 2 2 1 2 2 2 2 1 2 1 2 1 1 2 2 2 1
## [1777] 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2
## [1814] 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 1 1 2 2
## [1851] 2 2 1 2 2 1 1 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 2 2 2 2 2
## [1888] 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 2
## [1925] 2 2 2 2 1 1 2 1 2 1 1 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
## [1962] 2 2 1 2 2 2 2 1 2 2 2 2 1 1 2 1 2 2 1 2 2 2 2 1 2 2 2 2 2 1 2 1 2 2 1 1 2
## [1999] 1 1 2 1 2 2 2 1 2 2 1 2 2 1 1 2 2 1 2 1 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 1 1
## [2036] 2 2 2 1 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 1
## [2073] 2 1 2 1 2 2 2 2 1 2 2 2 2 2 1 2 2 1 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [2110] 2 2 2 1 2 2 2 2 1 1 2 2 1 1 1 2 1 2 1 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2
## [2147] 1 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 1
## [2184] 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 1 2 1 2 2 2 2 1 2 2 2 2
## [2221] 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2
## [2258] 2 2 1 2 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [2295] 2 1 2 2 2 2 2 1 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2
## [2332] 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2
## [2369] 2 2 2 2 2 2 1 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 1 2 2 2 2 2 2
## [2406] 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 1 2 1 2 2 2 1 1 2 2 1 1 2 1 2 2 2 2 2 2 2
## [2443] 1 2 2 2 1 2 1 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 1 1 2 2 1 2 1
## [2480] 2 2 2 2 2 1 2 1 2 1 2 2 2 2 2 1 2 1 1 2 2 2 2 2 2 1 1 2 2 2 2 2 2 1 1 1 2
## [2517] 2 1 1 2 2 2 2 2 1 1 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 1 1 2 2 1 2
## [2554] 2 2 2 1 1 2 2 1 2 2 2 1 2 2 1 1 1 1 2 1 2 1 2 2 2 1 1 2 1 1 2 2 2 2 2 2 2
## [2591] 1 2 1 2 2 1 2 2 2 1 2 2 2 2 1 2 1 2 2 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1
## [2628] 2 2 2 2 2 2 2 2 1 1 1 1 2 1 1 2 1 1 1 1 1 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2
## [2665] 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2
## [2702] 2 2 2 2 2 1 1 2 1 1 2 1 1 1 1 2 1 2 1 2 1 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2
## [2739] 1 1 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 1 2 1 2 1 2 2 2 2 1 2 2
## [2776] 2 2 2 1 2 2 1 2 2 2 2 1 2 1 2 2 1 2 2 2 2 2 1 1 1 1 2 1 1 2 2 2 2 2 1 2 2
## [2813] 1 1 2 1 2 2 1 2 2 2 2 1 1 1 1 1 1 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 1 2
## [2850] 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [2887] 2 1 1 1 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 1 2 1 2 2 2
## [2924] 2 2 2 2 1 1 1 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2
## [2961] 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 1 2 2 2 1 1 1 2 1 2 2
## [2998] 2 1 2 2 2 2 1 2 1 1 2 2 1 1 2 2 2 1 2 2 2 1 2 1 1 2 2 2 2 2 1 1 2 1 1 1 2
## [3035] 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2
## [3072] 1 2 1 2 2 1 2 2 2 2 2 1 2 2 1 2 2 2 2 1 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2
## [3109] 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 1 1 2 1 1 1 1 2
## [3146] 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 1 2 2 1 2 2 2 2 2 2 1 2 1 2 2 2 2 1 2
## [3183] 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 1 2 2 1 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2
## [3220] 1 2 2 2 2 2 2 2 1 1 2 1 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 1
## [3257] 2 2 2 2 2 2 2 2 2 1 2 1 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [3294] 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 1 2 2 2 1
## [3331] 2 2 2 2 2 1 2 1 1 2 2 2 2 2 2 1 2 1 2 2 2 2 2 1 2 2 2 1 2 2 2 1 2 2 2 2 2
## [3368] 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2
## [3405] 2 2 2 2 2 2 2 1 1 1 2 2 2 1 2 2 1 1 2 2 1 2 2 2 2 2 2 2 2 2 2 1 1 2 1 2 1
## [3442] 2 2 1 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2
## [3479] 1 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 1 2 2 2 2 1 2 2 1 2 2 2 1
## [3516] 2 1 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [3553] 2 2 2 1 1 2 1 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1
## [3590] 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 2 2 2 1 1 2 1 2 2 2 2 2 2 2 2 1
## [3627] 2 2 2 2 2 1 2 2 1 2 2 1 1 2 1 1 1 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 1 2 2
## [3664] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2
## [3701] 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [3738] 1 1 2 2 2 1 2 2 1 2 1 1 1 2 1 1 2 2 1 1 1 1 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2
## [3775] 2 2 2 2 1 2 2 2 2 1 1 1 2 1 2 2 2 2 2 1 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [3812] 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1
## [3849] 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 1 2 2 2 1 2 2 1 1 2 2 2 2
## [3886] 2 2 2 1 1 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2
## [3923] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 1 2 2 1 2 2 2 2 2 1 1 2 2 1 2 2
## [3960] 2 1 2 2 2 2 2 2 2 2 1 2 1 1 2 2 2 1 2 2 2 2 1 1 1 2 2 2 1 1 2 2 2 2 2 2 2
## [3997] 1 1 1 1 1 2 2 2 2 1 1 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [4034] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 1 2 2
## [4071] 2 1 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2
## [4108] 2 2 2 2 1 2 2 1 2 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 1 2 1 2 2 1 2 2 2 1 2
## [4145] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 1 2 2 2 1 2 2 2 2 1 1 2 2 2 2 2 1 2 2 2
## [4182] 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 1 1 2 2 2 1 2 2 1 2 1 2 2 2 2
## [4219] 2 2 2 2 2 1 2 2 2 2 2 1 1 2 2 2 1 2 2 1 1 1 2 1 2 2 2 1 2 2 2 2 2 1 2 2 2
## [4256] 2 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2
## [4293] 2 1 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 1 2 2 2 1 2 2 1
## [4330] 2 2 2 1 2 2 2 1 2 1 2 2 2 1 1 1 2 2 2 2 2 2 2 1 1 2 2 1 1 2 2 2 2 2 2 2 1
## [4367] 2 2 2 2 2 2 1 2 2 2 2 1 1 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 1 1 2 2 2 2 2 1 1
## [4404] 1 2 2 2 2 2 1 2 1 2 1 1 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 1 1
## [4441] 1 1 1 1 1 1 1 2 2 2 1 2 1 1 2 2 1 2 2 2 1 1 1 1 2 2 2 2 1 2 1 2 1 2 1 2 2
## [4478] 2 1 1 1 2 1 2 1 1 1 1 2 2 2 2 2 1 2 2 2 1 2 1 1 2 1 2 2 2 1 1 1 2 2 1 2 1
## [4515] 1 2 2 1 2 1 2 2 2 2 2 1 2 2 2 2 1 2 2 1 1 1 2 1 1 2 1 2 1 2 2 1 1 2 2 1 1
## [4552] 1 1 1 2 1 1 1 1 2 2 1 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 1
## [4589] 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 1 1 1 1 1 1 2 2 1 1 1 2 1 1 2
## [4626] 2 2 2 2 2 2 1 2 2 1 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2 1 2 2 1 2 2 2
## [4663] 2 2 2 2 2 2 1 2 1 2 2 2 1 2 2 1 2 1 2 1 1 1 1 1 2 1 1 1 2 2 2 1 1 1 2 2 2
## [4700] 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 2 1 1 2 1 2 2 1 1 2 2 2 1 1 2 2 2 2 2 2 2 1
## [4737] 2 2 2 2 1 2 1 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 2 1 2 2
## [4774] 2 2 1 2 1 1 1 1 2 1 1 2 1 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1
## [4811] 2 2 2 1 1 2 1 1 1 1 1 2 2 2 1 2 2 2 2 2 1 1 1 2 2 2 2 2 1 2 2 2 1 1 2 2 2
## [4848] 2 2 2 2 1 1 2 2 2 2 2 2 2 1 2 1 2 2 1 2 2 2 2 1 1 2 2 1 2 2 2 2 2 2 2 2 1
## [4885] 2 2 2 2 2 1 1 2 1 2 2 2 2 2 1 1 1 1 1 2 1 2 2 2 1 2 2 1 1 2 1 1 1 1 2 2 1
## [4922] 1 1 2 2 2 1 2 2 2 2 2 2 1 2 2 2 1 1 2 1 1 2 2 2 2 2 1 1 2 1 1 1 2 2 2 1 1
## [4959] 1 1 1 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 1 2 2 2
## [4996] 2 2 2 1 2 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2
## [5033] 1 1 1 1 2 2 2 1 2 1 1 2 2 2 1 2 2 1 1 2 1 1 1 2 2 2 2 1 2 2 1 2 2 2 2 1 2
## [5070] 2 1 2 1 2 2 1 2 2 1 1 2 1 1 1 2 1 2 1 1 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2
## [5107] 2 2 2 2 1 2 1 1 1 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 1 2 1 1 2 2 2
## [5144] 2 2 2 2 2 2 2 2 2 2 1 2 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 2
## [5181] 2 1 1 1 1 2 2 2 2 1 2 2 2 1 2 2 2 2 2 1 2 2 2 1 2 2 1 2 2 2 1 2 2 2 1 2 2
## [5218] 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1 2 2 2 2 2 2 1 1 2 2 2 2 1 2 1
## [5255] 2 2 2 2 2 2 2 1 2 2 2 2 2 1 1 2 1 1 1 2 2 1 2 1 1 2 2 2 2 2 2 2 1 2 2 2 2
## [5292] 1 2 1 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 1
## [5329] 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 1 1 1 1 2 2
## [5366] 2 2 2 2 2 2 2 1 2 1 2 1 1 1 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 1 1 2 2 1 1 2
## [5403] 1 1 1 1 1 1 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 1 2 1 2 1 2 2 2 2 2 2 1 1 2 2
## [5440] 1 2 2 1 1 2 1 2 1 1 1 2 1 2 2 2 1 1 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 1 2
## [5477] 1 2 1 2 2 2 2 2 1 2 1 2 2 1 2 2 1 2 1 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2 1 1
## [5514] 2 1 2 2 2 1 2 2 1 1 1 1 1 1 2 2 1 1 2 1 1 2 1 2 1 2 2 2 1 2 2 2 2 2 1 2 2
## [5551] 2 1 2 2 1 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 1 1 2
## [5588] 2 1 2 1 1 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 1 2 2 2 2 2 2 1 2 2 2 1 1 1 2 2 2
## [5625] 1 2 2 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 1 2 2 2 1 1 1
## [5662] 2 2 1 2 2 2 2 2 2 1 2 1 1 2 2 1 1 1 2 2 2 2 1 1 1 1 1 2 2 1 2 1 1 1 2 1 2
## [5699] 2 1 2 2 1 1 2 2 2 1 2 2 1 1 1 1 1 2 2 2 2 2 2 2 1 1 2 2 2 2 1 2 2 2 2 1 2
## [5736] 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 1 2
## [5773] 1 2 2 2 2 2 2 2 1 2 1 1 2 1 2 2 1 1 2 1 1 1 1 1 2 2 1 1 1 2 2 2 1 2 2 1 1
## [5810] 2 2 2 1 2 2 2 2 2 2 1 2 2 1 1 2 2 1 2 2 2 1 1 2 2 2 2 1 2 1 2 2 2 1 2 2 1
## [5847] 2 2 2 1 1 1 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1
## [5884] 1 1 1 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 1 2 2 1 2 1 2 2 1 2 2 2 2
## [5921] 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 1 1 1 2 2 2 1 2 2 2 2 2 2 2
## [5958] 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 1 2 2 2 1 2 2 2 2 2 1 2 2 2 1 1 2 2 2 2 2 2
## [5995] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
## [6032] 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1
## [6069] 1 1 1 1 2 2 2 1 2 2 2 2 2 2 1 2 1 1 1 2 2 2 1 1 2 1 2 1 2 1 1 2 2 2 1 2 2
## [6106] 1 1 2 1 2 1 1 1 2 2 1 1 1 2 2 2 1 2 2 2 2 1 1 2 2 2 2 2 1 2 2 1 2 1 1 1 1
## [6143] 1 1 1 1 1 2 1 1 2 1 1 1 1 2 2 2 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 1 2 1 1 1 2
## [6180] 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 1 1 2 1 1 1 1 2 2 2 2 1
## [6217] 2 2 2 2 1 2 2 1 2 2 1 1 1 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 1 2 1 1 2 2 1
## [6254] 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2
## [6291] 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 1 2 1 2 2 1 2 1 1 2 2 1 1 1 2 2 1 2 1 2
## [6328] 2 1 1 2 2 2 1 1 1 2 2 1 2 2 2 2 1 2 2 1 2 2 2 2 1 1 1 1 2 2 2 2 2 2 1 2 2
## [6365] 1 2 2 2 2 2 2 2 1 2 2 2 1 1 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2
## [6402] 2 2 1 1 2 1 1 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 1 2 2 2 1 1 1 1 2 2
## [6439] 1 2 2 1 2 2 1 2 1 2 2 2 2 1 1 1 2 2 2 2 2 2 1 1 1 2 1 2 1 2 1 2 1 2 2 2 1
## [6476] 2 1 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 1 2 2 1 1
## 
## Within cluster sum of squares by cluster:
## [1] 3256954 5337874
##  (between_SS / total_SS =  62.6 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
print(wine.cluster$centers)
##   fixed.acidity volatile.acidity citric.acid residual.sugar  chlorides
## 1      7.623219        0.4086378   0.2908725       3.076425 0.06580983
## 2      6.904812        0.2871659   0.3397642       7.244809 0.04859257
##   free.sulfur.dioxide total.sulfur.dioxide   density       pH sulphates
## 1            18.39868             63.26318 0.9945736 3.254882 0.5724145
## 2            39.75590            155.69246 0.9947903 3.190808 0.4999485
##    alcohol  quality
## 1 10.79722 5.810541
## 2 10.25932 5.824343

Evaluating the Clusters

table(wine$label,wine.cluster$cluster)
##        
##            1    2
##   red   1514   85
##   white 1294 3604

We can see that red is easier to cluster together, which makes sense given our previous visualizations. There seems to be a lot of noise with white wines, this could also be due to “Rose” wines being categorized as white wine, while still retaining the qualities of a red wine. Overall this makes sense since wine is essentially just fermented grape juice and the chemical measurements we were provided may not correlate well with whether or not the wine is red or white!

It’s important to note here, that K-Means can only give you the clusters, it can’t directly tell you what the labels should be, or even how many clusters you should have, we are just lucky to know we expected two types of wine. This is where domain knowledge really comes into play.