library(ggplot2)
library(cluster)


Load data

fileURL <- "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality"
df1 <- read.csv(paste0(fileURL, "/winequality-red.csv"), header = TRUE, sep = ";")
df2 <- read.csv(paste0(fileURL, "/winequality-white.csv"), header = TRUE, sep = ";")


Add wine color indicator

df1$wine.color <- 'red'
df2$wine.color <- 'white'

head(df1)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.4             0.70        0.00            1.9     0.076
## 2           7.8             0.88        0.00            2.6     0.098
## 3           7.8             0.76        0.04            2.3     0.092
## 4          11.2             0.28        0.56            1.9     0.075
## 5           7.4             0.70        0.00            1.9     0.076
## 6           7.4             0.66        0.00            1.8     0.075
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  11                   34  0.9978 3.51      0.56     9.4
## 2                  25                   67  0.9968 3.20      0.68     9.8
## 3                  15                   54  0.9970 3.26      0.65     9.8
## 4                  17                   60  0.9980 3.16      0.58     9.8
## 5                  11                   34  0.9978 3.51      0.56     9.4
## 6                  13                   40  0.9978 3.51      0.56     9.4
##   quality wine.color
## 1       5        red
## 2       5        red
## 3       5        red
## 4       6        red
## 5       5        red
## 6       5        red
head(df2)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.0             0.27        0.36           20.7     0.045
## 2           6.3             0.30        0.34            1.6     0.049
## 3           8.1             0.28        0.40            6.9     0.050
## 4           7.2             0.23        0.32            8.5     0.058
## 5           7.2             0.23        0.32            8.5     0.058
## 6           8.1             0.28        0.40            6.9     0.050
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  45                  170  1.0010 3.00      0.45     8.8
## 2                  14                  132  0.9940 3.30      0.49     9.5
## 3                  30                   97  0.9951 3.26      0.44    10.1
## 4                  47                  186  0.9956 3.19      0.40     9.9
## 5                  47                  186  0.9956 3.19      0.40     9.9
## 6                  30                   97  0.9951 3.26      0.44    10.1
##   quality wine.color
## 1       6      white
## 2       6      white
## 3       6      white
## 4       6      white
## 5       6      white
## 6       6      white


Combine red and white datasets. Will remove labels later.

wine <- rbind(df1, df2)
str(wine)
## 'data.frame':    6497 obs. of  13 variables:
##  $ fixed.acidity       : num  7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
##  $ volatile.acidity    : num  0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
##  $ citric.acid         : num  0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
##  $ residual.sugar      : num  1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
##  $ chlorides           : num  0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
##  $ free.sulfur.dioxide : num  11 25 15 17 11 13 15 15 9 17 ...
##  $ total.sulfur.dioxide: num  34 67 54 60 34 40 59 21 18 102 ...
##  $ density             : num  0.998 0.997 0.997 0.998 0.998 ...
##  $ pH                  : num  3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
##  $ sulphates           : num  0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
##  $ alcohol             : num  9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
##  $ quality             : int  5 5 5 6 5 5 5 7 7 5 ...
##  $ wine.color          : chr  "red" "red" "red" "red" ...
table(wine$wine.color)
## 
##   red white 
##  1599  4898


Lots of histograms

ggplot(data = wine, aes(x = residual.sugar,
                        #set levels = c('white','red') to stack white on top of red
                       fill = factor(wine.color, levels = c('white','red')))) + 
  geom_histogram(bins= 50, color = 'black')+
  scale_fill_manual(name = 'label', values=c('#FEF9E7','#800020'), label = c('red', 'white'),
                    #set breaks to arrange order of legend labels
                    breaks = c('red', 'white')) +
  theme_bw()  #turn background to light color

ggplot(data = wine, aes(x = citric.acid,
                        #set levels = c('white','red') to stack white on top of red
                       fill = factor(wine.color, levels = c('white','red')))) + 
  geom_histogram(bins= 50, color = 'black')+
  scale_fill_manual(name = 'label', values=c('#FEF9E7','#800020'), label = c('red', 'white'),
                    #set breaks to arrange order of legend labels
                    breaks = c('red', 'white')) +
  theme_bw()  #turn background to light color

ggplot(data = wine, aes(x = alcohol,
                        #set levels = c('white','red') to stack white on top of red
                       fill = factor(wine.color, levels = c('white','red')))) + 
  geom_histogram(bins= 50, color = 'black')+
  scale_fill_manual(name = 'label', values=c('#FEF9E7','#800020'), label = c('red', 'white'),
                    #set breaks to arrange order of legend labels
                    breaks = c('red', 'white')) +
  theme_bw()  #turn background to light color


More scatter plots

ggplot(data = wine, aes(x = citric.acid, y = residual.sugar)) +
  geom_point(shape = 1, aes(color = wine.color)) +
   scale_color_manual(name = 'label', values=c('#800020', '#FEF9E7')) +
  theme_dark()

ggplot(data = wine, aes(x = volatile.acidity, y = residual.sugar)) +
  geom_point(shape = 1, aes(color = wine.color)) +
   scale_color_manual(name = 'label', values=c('#800020', '#FEF9E7')) +
  theme_dark()


Kmeans clustering

clus.data <- wine[c(-13)]
head(clus.data)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.4             0.70        0.00            1.9     0.076
## 2           7.8             0.88        0.00            2.6     0.098
## 3           7.8             0.76        0.04            2.3     0.092
## 4          11.2             0.28        0.56            1.9     0.075
## 5           7.4             0.70        0.00            1.9     0.076
## 6           7.4             0.66        0.00            1.8     0.075
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  11                   34  0.9978 3.51      0.56     9.4
## 2                  25                   67  0.9968 3.20      0.68     9.8
## 3                  15                   54  0.9970 3.26      0.65     9.8
## 4                  17                   60  0.9980 3.16      0.58     9.8
## 5                  11                   34  0.9978 3.51      0.56     9.4
## 6                  13                   40  0.9978 3.51      0.56     9.4
##   quality
## 1       5
## 2       5
## 3       5
## 4       6
## 5       5
## 6       5
set.seed(123)

wine.cluster <- kmeans(clus.data, centers = 2)
print(wine.cluster$centers)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1      7.619044        0.4079451   0.2911080       3.082690 0.0656846
## 2      6.904698        0.2871364   0.3398094       7.259286 0.0486092
##   free.sulfur.dioxide total.sulfur.dioxide   density       pH sulphates
## 1            18.43735             63.54832 0.9945680 3.255147 0.5718655
## 2            39.82503            155.90101 0.9947956 3.190308 0.5000354
##    alcohol  quality
## 1 10.79529 5.809204
## 2 10.25832 5.825436
print(wine.cluster)
## K-means clustering with 2 clusters of sizes 2825, 3672
## 
## Cluster means:
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1      7.619044        0.4079451   0.2911080       3.082690 0.0656846
## 2      6.904698        0.2871364   0.3398094       7.259286 0.0486092
##   free.sulfur.dioxide total.sulfur.dioxide   density       pH sulphates
## 1            18.43735             63.54832 0.9945680 3.255147 0.5718655
## 2            39.82503            155.90101 0.9947956 3.190308 0.5000354
##    alcohol  quality
## 1 10.79529 5.809204
## 2 10.25832 5.825436
## 
## Clustering vector:
##    [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##   [35] 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 2 1 1 1 2 1 1 1 1 1 1
##   [69] 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 2 1 2 2 2 1 1 1 1 1 1 1 1 1
##  [103] 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1
##  [137] 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 2 2 2 1 1 1 1 1 2 2 1 1 1 1 1
##  [171] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1 2 1 1 2 1 1 1 1 1 2 1 1
##  [205] 1 1 1 2 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [239] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [273] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [307] 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1
##  [341] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [375] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1
##  [409] 1 1 1 1 1 1 2 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [443] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1
##  [477] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1
##  [511] 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [545] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [579] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [613] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1
##  [647] 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1
##  [681] 1 1 1 1 2 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [715] 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1
##  [749] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1
##  [783] 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [817] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [851] 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [885] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1
##  [919] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [953] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1
##  [987] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1
## [1021] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1055] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1
## [1089] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1123] 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1157] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1
## [1191] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1225] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1259] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1293] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1327] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1361] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1
## [1395] 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1
## [1429] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1463] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1
## [1497] 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1531] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 1 1
## [1565] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [1599] 1 2 2 1 2 2 1 2 2 2 2 1 1 1 2 2 2 1 1 2 2 1 1 2 2 2 2 2 2 2 2 2 1 1
## [1633] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 1 2 2 2
## [1667] 2 1 1 2 2 2 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2
## [1701] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2
## [1735] 2 2 2 1 1 1 2 1 1 1 2 2 1 1 2 2 2 2 1 1 2 2 2 2 2 2 2 1 2 2 2 2 1 2
## [1769] 1 2 1 1 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [1803] 2 2 2 2 2 1 2 1 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [1837] 2 2 2 2 1 1 2 2 2 2 1 1 2 2 2 2 1 2 2 1 1 2 1 1 2 1 2 2 2 2 2 2 2 2
## [1871] 2 2 2 2 2 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2
## [1905] 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 1 1 2 1 2 1 1 1 2 2
## [1939] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 1 2 2 2
## [1973] 2 1 1 2 1 2 2 1 2 2 2 2 1 2 2 2 2 2 1 2 1 2 2 1 1 2 1 1 2 1 2 2 2 1
## [2007] 2 2 1 2 2 1 1 2 2 1 2 1 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 1 1 2 2 2 1 1
## [2041] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 1 2 1
## [2075] 2 1 2 2 2 2 1 2 2 2 2 2 1 2 2 1 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [2109] 2 2 2 2 1 2 2 2 2 1 1 2 2 1 1 1 2 1 2 1 1 1 2 2 2 2 2 2 2 1 2 2 2 2
## [2143] 2 2 2 2 1 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 1 2 2 2 2 2
## [2177] 2 1 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 1 2
## [2211] 1 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [2245] 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 1 1
## [2279] 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 2 1 2 2 2
## [2313] 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2
## [2347] 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 1
## [2381] 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [2415] 2 1 2 2 2 1 2 1 2 1 2 2 2 1 1 2 2 1 1 2 1 2 2 2 2 2 2 2 1 2 2 2 1 2
## [2449] 1 1 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 1 2 2 2 1 1 2 2 1 2 1 2 2 2
## [2483] 2 2 1 2 1 2 1 2 2 2 2 2 1 2 1 1 2 2 2 2 2 2 1 1 2 2 2 2 2 2 1 1 1 2
## [2517] 2 1 1 2 2 2 2 2 1 1 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 1 1 2
## [2551] 2 1 2 2 2 2 1 1 2 2 1 2 2 2 1 2 2 1 1 1 1 2 1 2 1 2 2 2 1 1 2 1 1 2
## [2585] 2 2 2 2 2 2 1 2 1 2 2 1 2 2 2 1 2 2 2 2 1 2 1 2 2 1 2 1 2 2 2 2 1 2
## [2619] 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 1 1 1 2 1 1 2 1 1 1 1 1 2 2 2 1
## [2653] 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [2687] 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1 2 1 1 1 1 2 1 2 1
## [2721] 2 1 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 1 1 2
## [2755] 2 2 2 2 2 2 2 2 2 1 2 1 2 1 2 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 2 2 1 2
## [2789] 1 2 2 1 2 2 2 2 2 1 1 1 1 2 1 1 2 2 2 2 2 1 2 2 1 1 2 1 2 2 1 2 2 2
## [2823] 2 1 1 1 1 1 1 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 1 2
## [2857] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1
## [2891] 2 1 1 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2
## [2925] 2 2 2 1 1 1 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1
## [2959] 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 1 2 2 2 1 1
## [2993] 1 2 1 2 2 2 1 2 2 2 2 1 2 1 1 2 2 1 1 2 2 2 1 2 2 2 1 2 1 1 2 2 2 2
## [3027] 2 1 1 2 1 1 1 2 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2
## [3061] 2 2 2 2 2 2 1 1 2 2 2 1 2 1 2 2 1 2 2 2 2 2 1 2 2 1 2 2 2 2 1 2 2 2
## [3095] 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2
## [3129] 2 2 2 2 1 2 2 1 2 1 1 2 1 1 1 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 1 2
## [3163] 2 1 2 2 1 2 2 2 2 2 2 1 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2
## [3197] 2 1 1 2 2 1 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2
## [3231] 1 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2
## [3265] 2 1 2 1 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [3299] 2 2 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 1 2 2 2 1 2 2
## [3333] 2 2 2 1 2 1 1 2 2 2 2 2 2 1 2 1 2 2 2 2 2 1 2 2 2 1 2 2 2 1 1 2 2 2
## [3367] 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [3401] 1 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 1 2 2 1 1 2 2 1 2 2 2 2 2 2 2 2 2
## [3435] 2 1 1 2 1 2 1 2 2 1 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2
## [3469] 2 2 2 2 2 2 2 2 1 2 1 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2
## [3503] 1 2 2 2 2 1 2 2 1 2 2 2 1 2 1 2 2 2 2 2 1 1 1 1 2 2 2 2 2 1 2 1 2 2
## [3537] 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1 2 1 2 2 2 2 2 2 2 2
## [3571] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 1 2 1 2 2 2 2 2 2 2 2 2 2 2 2
## [3605] 2 2 1 2 1 1 2 2 2 1 1 1 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 1
## [3639] 1 2 1 1 1 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2
## [3673] 2 2 2 2 2 2 2 2 1 2 1 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [3707] 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2
## [3741] 2 2 1 2 2 1 2 1 1 1 2 1 1 2 2 1 1 1 1 2 1 2 1 2 2 2 2 2 2 2 2 2 2 2
## [3775] 2 2 2 2 1 2 2 2 2 1 1 1 2 1 2 2 2 2 2 1 2 1 1 2 2 2 2 2 2 2 2 2 2 2
## [3809] 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2
## [3843] 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 2 1 1 1 2 2
## [3877] 1 2 2 1 1 2 2 2 2 2 2 2 1 1 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2
## [3911] 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 1
## [3945] 2 2 1 2 2 2 2 2 1 1 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 1 2 1 1 2 2 2 1 2
## [3979] 2 2 2 1 1 1 2 2 2 1 1 2 2 2 2 2 2 2 1 1 1 1 1 2 2 2 2 1 1 2 1 2 2 2
## [4013] 1 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [4047] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 1 2 2 2 1 2 1 2 2 1 2 2 2
## [4081] 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2
## [4115] 1 2 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 1 2 1 2 2 1 2 2 2 1 2 2 2 2 2
## [4149] 2 2 2 1 2 2 2 2 2 2 2 1 2 1 2 2 2 1 2 2 2 2 1 1 2 2 2 2 2 1 2 2 2 2
## [4183] 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 1 1 2 2 2 1 2 2 1 2 1 2 2
## [4217] 2 2 2 2 2 2 2 1 2 2 2 2 2 1 1 2 2 2 1 2 2 1 1 1 2 1 2 2 2 1 2 2 2 2
## [4251] 2 1 2 2 2 2 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 2 2
## [4285] 1 2 2 2 2 2 1 2 2 1 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
## [4319] 2 1 2 1 2 2 2 1 2 2 1 2 2 2 1 2 2 2 1 2 1 2 2 2 1 1 1 2 2 2 2 2 2 2
## [4353] 1 1 2 2 1 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 1 1 1 2 2 2 2 2 2 1
## [4387] 2 2 2 2 2 2 1 2 1 1 2 2 2 2 2 1 1 1 2 2 2 2 2 1 2 1 2 1 1 2 1 2 2 2
## [4421] 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 2 2 2 1 2 1 1
## [4455] 2 2 1 2 2 2 1 1 1 1 2 2 2 2 1 2 1 2 1 2 1 2 2 2 1 1 1 2 1 2 1 1 1 1
## [4489] 2 2 2 2 2 1 2 2 2 1 2 1 1 2 1 2 2 2 1 1 1 2 2 1 2 1 1 2 2 1 2 1 2 2
## [4523] 2 2 2 1 2 2 2 2 1 2 2 1 1 1 2 1 1 2 1 2 1 2 2 1 1 2 2 1 1 1 1 1 2 1
## [4557] 1 1 1 2 2 1 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 2 1 2 2
## [4591] 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 2 2 2 2 1 1 1 1 1 1 2 2 1 1 1 2 1 1
## [4625] 2 2 2 2 2 2 2 1 2 2 1 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2 1 2 2
## [4659] 1 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 1 2 2 1 2 1 2 1 1 1 1 1 2 1 1 1 2 2
## [4693] 2 1 1 1 2 2 2 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 2 1 1 2 1 2 2 1 1 2 2 2
## [4727] 1 1 2 2 2 2 2 2 2 1 2 2 2 2 1 2 1 2 2 2 2 2 2 1 2 2 1 2 2 2 2 2 2 1
## [4761] 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 1 2 1 1 1 1 2 1 1 2 1 2 2 2 2 2 1 2 1
## [4795] 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 2 2 2 1 1 2 1 1 1 1 1 2 2 2 1 2 2 2
## [4829] 2 2 1 1 1 2 2 2 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 1 2
## [4863] 1 2 2 1 2 2 2 2 1 1 2 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 1 2 1 2 2 2
## [4897] 2 2 1 1 1 1 1 2 1 2 2 2 1 2 2 1 1 2 1 1 1 1 2 2 1 1 1 2 2 2 1 2 2 2
## [4931] 2 2 2 1 2 2 2 1 1 2 1 1 2 2 2 2 2 1 1 2 1 1 1 2 2 2 1 1 1 1 1 2 1 1
## [4965] 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 1 2 2 2 2 2 2
## [4999] 1 2 2 2 1 1 2 1 2 2 1 2 2 2 2 2 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2
## [5033] 1 1 1 1 2 2 2 1 2 1 1 2 2 2 1 2 2 1 1 2 1 1 1 2 2 2 2 1 2 2 1 2 2 2
## [5067] 2 1 2 2 1 2 1 2 2 1 2 2 1 1 2 1 1 1 2 1 2 1 1 2 1 2 2 2 1 2 2 2 2 2
## [5101] 2 2 1 2 2 2 2 2 2 2 1 2 1 1 1 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2
## [5135] 2 1 1 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 2 1 1 1 2 1 1 1 1 1 1
## [5169] 1 1 1 1 1 2 2 2 2 2 1 2 2 1 1 1 1 2 2 2 2 1 2 2 2 1 2 2 2 2 2 1 2 2
## [5203] 2 1 2 2 1 2 2 2 1 2 2 2 1 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 2 2 2 2 1 1
## [5237] 2 1 1 2 2 2 2 2 2 1 1 2 2 2 2 1 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 1 1 2
## [5271] 1 1 1 2 2 1 2 1 1 2 2 2 2 2 2 2 1 2 2 2 2 1 2 1 2 2 2 1 2 2 2 2 2 2
## [5305] 2 1 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 2 1 1 2 2
## [5339] 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 2 2 1 1 1 1 2 2 2 2 2 2 2 2 2
## [5373] 1 2 1 2 1 1 1 2 2 1 2 2 1 2 2 2 2 2 2 2 2 2 2 1 1 2 2 1 1 2 1 1 1 1
## [5407] 1 1 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 1 1 2 1 2 1 2 2 2 2 2 2 1 1 2 2 1
## [5441] 2 2 1 1 2 1 2 1 1 1 2 1 2 2 2 1 1 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2
## [5475] 1 2 1 2 1 2 2 2 2 2 1 2 1 2 2 1 2 2 1 2 1 2 2 2 2 1 1 1 1 1 1 1 1 2
## [5509] 2 2 2 1 1 2 1 2 2 2 1 2 2 1 1 1 1 1 1 2 2 1 1 2 1 1 2 1 2 1 2 2 2 1
## [5543] 2 2 2 2 2 1 2 2 2 1 2 2 1 2 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2
## [5577] 2 2 2 2 2 2 1 2 1 1 2 2 1 2 1 1 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 1 2 2
## [5611] 2 2 2 2 1 2 2 2 1 1 1 2 2 2 1 2 2 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [5645] 2 2 2 2 1 2 1 2 2 2 1 2 2 2 1 1 1 2 2 1 2 2 2 2 2 2 1 2 1 1 2 2 1 1
## [5679] 1 2 2 2 2 1 1 1 1 1 2 2 1 1 1 1 1 2 1 2 1 1 2 2 1 1 2 2 2 1 2 2 1 1
## [5713] 1 1 1 2 2 2 2 2 2 2 1 1 2 2 2 2 1 2 2 2 2 1 2 2 2 1 2 2 2 2 2 2 2 2
## [5747] 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 2 1 2 1 2 2 2 2 2 2 2
## [5781] 1 2 1 1 2 1 2 2 1 1 2 1 1 1 1 1 2 2 1 1 1 2 2 2 1 2 2 1 1 2 2 2 1 2
## [5815] 2 2 2 2 2 1 2 2 1 1 2 2 1 2 2 2 1 1 2 2 2 2 1 2 1 2 2 2 1 2 2 1 2 2
## [5849] 2 1 1 1 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2
## [5883] 1 1 1 1 2 1 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 1 2 2 1 2 1 2 2 1
## [5917] 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 1 1 1 2 2 2 1
## [5951] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 1 2 1 2 2 2 1 2 2 2 2 2 1 2
## [5985] 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 1 2 2 2 2 2
## [6019] 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## [6053] 1 2 2 2 2 2 2 2 2 1 2 2 2 2 2 1 1 1 1 1 2 2 2 1 2 2 2 2 2 2 1 2 1 1
## [6087] 1 2 2 2 1 1 2 1 2 1 2 1 1 2 2 2 1 2 2 1 1 2 1 2 1 1 1 2 2 1 1 1 2 2
## [6121] 2 1 2 2 2 2 1 1 2 2 2 2 2 1 2 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1
## [6155] 1 2 2 2 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 1 2 1 1 1 2 2 2 2 2 1 2 2 1 2
## [6189] 2 2 2 2 2 2 2 2 1 2 1 2 2 2 2 2 1 1 2 1 1 1 1 2 2 2 2 1 2 2 2 2 1 2
## [6223] 2 1 2 2 1 1 1 2 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 1 2 1 1 2 2 1 2 2 2
## [6257] 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 1 2 2 2 2 2 2 2 2
## [6291] 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 1 2 1 2 2 1 2 1 1 2 2 1 1 1 2 2 1
## [6325] 2 1 2 2 1 1 2 2 2 1 1 1 2 2 1 2 2 2 2 1 2 2 1 2 2 2 2 1 1 1 1 2 2 2
## [6359] 2 2 2 1 2 2 1 2 2 2 2 2 2 2 1 2 2 2 1 1 1 2 2 2 2 2 1 2 2 2 2 2 2 2
## [6393] 2 2 2 2 1 2 2 2 2 2 2 1 1 2 1 1 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2
## [6427] 1 2 1 2 2 2 1 1 1 1 2 2 1 2 2 1 2 2 1 2 1 2 2 2 2 1 1 1 2 2 2 2 2 2
## [6461] 1 1 1 2 1 2 1 2 1 2 1 2 2 2 1 2 1 1 2 2 2 2 2 2 2 2 1 2 2 1 2 2 1 2
## [6495] 2 1 1
## 
## Within cluster sum of squares by cluster:
## [1] 3296871 5297991
##  (between_SS / total_SS =  62.6 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
##See cluster by scatter plot of resid sugar vs total sulfur
ggplot(data = clus.data, aes(x = total.sulfur.dioxide, y = residual.sugar)) +
         geom_point(shape = 1, aes(color = as.factor(wine.cluster$cluster))) +
         scale_color_manual(name = 'label', values=c('#800020', '#FEF9E7')
                            #,labels = c('red', 'white')
                            ) +
         theme_dark()


Evaluate clustering

table(wine$wine.color, wine.cluster$cluster)
##        
##            1    2
##   red   1515   84
##   white 1310 3588
prop.table(table(wine$wine.color, wine.cluster$cluster), margin=2)
##        
##                  1          2
##   red   0.53628319 0.02287582
##   white 0.46371681 0.97712418

There is a lot more noise in cluster 1 than in cluster 2. It’s easier to classify into white wine.

Now check cluster plot

clusplot(clus.data, clus = wine.cluster$cluster, diss = FALSE,
         main = 'cluster plot', lines = 0, labels = 5, shade = TRUE, color = TRUE)