This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

Predicting wine quality using Random Forests

This is the R code from the R-bloggers post Predicting wine quality using Random Forests.

This post uses the same white wine data that is used in the book.

The data

url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"
wine <- read.csv(url, sep = ";", header=TRUE)
head(wine)
barplot(table(wine$quality))

wine$taste <- ifelse(wine$quality < 6, 'bad', 'good')
wine$taste[wine$quality == 6] <- 'normal'
wine$taste <- as.factor(wine$taste)
table(wine$taste)

   bad   good normal 
  1640   1060   2198 
set.seed(123)
samp <- sample(nrow(wine), 0.6 * nrow(wine))
train <- wine[samp, ]
test <- wine[-samp, ]

Building the model

library(randomForest)
model <- randomForest(taste ~ . - quality, data = train, ntree=1000, mtry=5)
model

Call:
 randomForest(formula = taste ~ . - quality, data = train, ntree = 1000,      mtry = 5) 
               Type of random forest: classification
                     Number of trees: 1000
No. of variables tried at each split: 5

        OOB estimate of  error rate: 29.78%
Confusion matrix:
       bad good normal class.error
bad    679   14    280   0.3021583
good    18  396    235   0.3898305
normal 220  108    988   0.2492401
pred <- predict(model, newdata = test)

Confusion Matrix

table(pred, test$taste)
        
pred     bad good normal
  bad    476   11    129
  good    16  250     85
  normal 175  150    668

Accuracy

sum(pred==test$taste) / nrow(test)
[1] 0.7112245
LS0tCnRpdGxlOiAiUmFuZG9tIEZvcmVzdHMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KClRoaXMgaXMgYW4gW1IgTWFya2Rvd25dKGh0dHA6Ly9ybWFya2Rvd24ucnN0dWRpby5jb20pIE5vdGVib29rLiBXaGVuIHlvdSBleGVjdXRlIGNvZGUgd2l0aGluIHRoZSBub3RlYm9vaywgdGhlIHJlc3VsdHMgYXBwZWFyIGJlbmVhdGggdGhlIGNvZGUuIAoKVHJ5IGV4ZWN1dGluZyB0aGlzIGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqUnVuKiBidXR0b24gd2l0aGluIHRoZSBjaHVuayBvciBieSBwbGFjaW5nIHlvdXIgY3Vyc29yIGluc2lkZSBpdCBhbmQgcHJlc3NpbmcgKkN0cmwrU2hpZnQrRW50ZXIqLiAKCiMgUHJlZGljdGluZyB3aW5lIHF1YWxpdHkgdXNpbmcgUmFuZG9tIEZvcmVzdHMKClRoaXMgaXMgdGhlIFIgY29kZSBmcm9tIHRoZSBbUi1ibG9nZ2Vyc10oaHR0cHM6Ly93d3cuci1ibG9nZ2Vycy5jb20vKSBwb3N0IFtQcmVkaWN0aW5nIHdpbmUgcXVhbGl0eSB1c2luZyBSYW5kb20gRm9yZXN0c10oaHR0cHM6Ly93d3cuci1ibG9nZ2Vycy5jb20vcHJlZGljdGluZy13aW5lLXF1YWxpdHktdXNpbmctcmFuZG9tLWZvcmVzdHMvKS4KClRoaXMgcG9zdCB1c2VzIHRoZSBzYW1lIHdoaXRlIHdpbmUgZGF0YSB0aGF0IGlzIHVzZWQgaW4gdGhlIGJvb2suCgojIyMgVGhlIGRhdGEKCmBgYHtyfQp1cmwgPC0gImh0dHBzOi8vYXJjaGl2ZS5pY3MudWNpLmVkdS9tbC9tYWNoaW5lLWxlYXJuaW5nLWRhdGFiYXNlcy93aW5lLXF1YWxpdHkvd2luZXF1YWxpdHktd2hpdGUuY3N2Igp3aW5lIDwtIHJlYWQuY3N2KHVybCwgc2VwID0gIjsiLCBoZWFkZXI9VFJVRSkKaGVhZCh3aW5lKQpgYGAKCmBgYHtyfQpiYXJwbG90KHRhYmxlKHdpbmUkcXVhbGl0eSkpCmBgYAoKYGBge3J9CndpbmUkdGFzdGUgPC0gaWZlbHNlKHdpbmUkcXVhbGl0eSA8IDYsICdiYWQnLCAnZ29vZCcpCndpbmUkdGFzdGVbd2luZSRxdWFsaXR5ID09IDZdIDwtICdub3JtYWwnCndpbmUkdGFzdGUgPC0gYXMuZmFjdG9yKHdpbmUkdGFzdGUpCmBgYAoKYGBge3J9CnRhYmxlKHdpbmUkdGFzdGUpCmBgYAoKYGBge3J9CnNldC5zZWVkKDEyMykKc2FtcCA8LSBzYW1wbGUobnJvdyh3aW5lKSwgMC42ICogbnJvdyh3aW5lKSkKdHJhaW4gPC0gd2luZVtzYW1wLCBdCnRlc3QgPC0gd2luZVstc2FtcCwgXQpgYGAKCiMjIyBCdWlsZGluZyB0aGUgbW9kZWwKCmBgYHtyfQpsaWJyYXJ5KHJhbmRvbUZvcmVzdCkKbW9kZWwgPC0gcmFuZG9tRm9yZXN0KHRhc3RlIH4gLiAtIHF1YWxpdHksIGRhdGEgPSB0cmFpbiwgbnRyZWU9MTAwMCwgbXRyeT01KQpgYGAKCmBgYHtyfQptb2RlbApgYGAKCmBgYHtyfQpwcmVkIDwtIHByZWRpY3QobW9kZWwsIG5ld2RhdGEgPSB0ZXN0KQpgYGAKCkNvbmZ1c2lvbiBNYXRyaXgKCmBgYHtyfQp0YWJsZShwcmVkLCB0ZXN0JHRhc3RlKQpgYGAKCkFjY3VyYWN5CgpgYGB7cn0Kc3VtKHByZWQ9PXRlc3QkdGFzdGUpIC8gbnJvdyh0ZXN0KQpgYGAKCg==