Example: Calculating SDR

# set up the data
tee <- c(1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 7, 7, 7)
at1 <- c(1, 1, 1, 2, 2, 3, 4, 5, 5)
at2 <- c(6, 6, 7, 7, 7, 7)
bt1 <- c(1, 1, 1, 2, 2, 3, 4)
bt2 <- c(5, 5, 6, 6, 7, 7, 7, 7)
# compute the SDR
sdr_a <- sd(tee) - (length(at1) / length(tee) * sd(at1) + length(at2) / length(tee) * sd(at2))
sdr_b <- sd(tee) - (length(bt1) / length(tee) * sd(bt1) + length(bt2) / length(tee) * sd(bt2))
# compare the SDR for each split
sdr_a
## [1] 1.202815
sdr_b
## [1] 1.392751

Exercise No 3: Estimating Wine Quality

Step 2: Exploring and preparing the data

wine <- read.csv("whitewines.csv")
# examine the wine data
str(wine)
## 'data.frame':    4898 obs. of  12 variables:
##  $ fixed.acidity       : num  6.7 5.7 5.9 5.3 6.4 7 7.9 6.6 7 6.5 ...
##  $ volatile.acidity    : num  0.62 0.22 0.19 0.47 0.29 0.14 0.12 0.38 0.16 0.37 ...
##  $ citric.acid         : num  0.24 0.2 0.26 0.1 0.21 0.41 0.49 0.28 0.3 0.33 ...
##  $ residual.sugar      : num  1.1 16 7.4 1.3 9.65 0.9 5.2 2.8 2.6 3.9 ...
##  $ chlorides           : num  0.039 0.044 0.034 0.036 0.041 0.037 0.049 0.043 0.043 0.027 ...
##  $ free.sulfur.dioxide : num  6 41 33 11 36 22 33 17 34 40 ...
##  $ total.sulfur.dioxide: num  62 113 123 74 119 95 152 67 90 130 ...
##  $ density             : num  0.993 0.999 0.995 0.991 0.993 ...
##  $ pH                  : num  3.41 3.22 3.49 3.48 2.99 3.25 3.18 3.21 2.88 3.28 ...
##  $ sulphates           : num  0.32 0.46 0.42 0.54 0.34 0.43 0.47 0.47 0.47 0.39 ...
##  $ alcohol             : num  10.4 8.9 10.1 11.2 10.9 ...
##  $ quality             : int  5 6 6 4 6 6 6 6 6 7 ...
# the distribution of quality ratings
hist(wine$quality, col = "gold", border = "black", main = "Wine Quality Histogram",
     xlab="Quality", ylab = "Count")

# summary statistics of the wine data
summary(wine)
##  fixed.acidity    volatile.acidity  citric.acid     residual.sugar  
##  Min.   : 3.800   Min.   :0.0800   Min.   :0.0000   Min.   : 0.600  
##  1st Qu.: 6.300   1st Qu.:0.2100   1st Qu.:0.2700   1st Qu.: 1.700  
##  Median : 6.800   Median :0.2600   Median :0.3200   Median : 5.200  
##  Mean   : 6.855   Mean   :0.2782   Mean   :0.3342   Mean   : 6.391  
##  3rd Qu.: 7.300   3rd Qu.:0.3200   3rd Qu.:0.3900   3rd Qu.: 9.900  
##  Max.   :14.200   Max.   :1.1000   Max.   :1.6600   Max.   :65.800  
##    chlorides       free.sulfur.dioxide total.sulfur.dioxide    density      
##  Min.   :0.00900   Min.   :  2.00      Min.   :  9.0        Min.   :0.9871  
##  1st Qu.:0.03600   1st Qu.: 23.00      1st Qu.:108.0        1st Qu.:0.9917  
##  Median :0.04300   Median : 34.00      Median :134.0        Median :0.9937  
##  Mean   :0.04577   Mean   : 35.31      Mean   :138.4        Mean   :0.9940  
##  3rd Qu.:0.05000   3rd Qu.: 46.00      3rd Qu.:167.0        3rd Qu.:0.9961  
##  Max.   :0.34600   Max.   :289.00      Max.   :440.0        Max.   :1.0390  
##        pH          sulphates         alcohol         quality     
##  Min.   :2.720   Min.   :0.2200   Min.   : 8.00   Min.   :3.000  
##  1st Qu.:3.090   1st Qu.:0.4100   1st Qu.: 9.50   1st Qu.:5.000  
##  Median :3.180   Median :0.4700   Median :10.40   Median :6.000  
##  Mean   :3.188   Mean   :0.4898   Mean   :10.51   Mean   :5.878  
##  3rd Qu.:3.280   3rd Qu.:0.5500   3rd Qu.:11.40   3rd Qu.:6.000  
##  Max.   :3.820   Max.   :1.0800   Max.   :14.20   Max.   :9.000
wine_train <- wine[1:3750, ]
wine_test <- wine[3751:4898, ]

Step 3: Training a model on the data

# regression tree using rpart
library(rpart)
m.rpart <- rpart(quality ~ ., data = wine_train)
# get basic information about the tree
m.rpart
## n= 3750 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 3750 2945.53200 5.870933  
##    2) alcohol< 10.85 2372 1418.86100 5.604975  
##      4) volatile.acidity>=0.2275 1611  821.30730 5.432030  
##        8) volatile.acidity>=0.3025 688  278.97670 5.255814 *
##        9) volatile.acidity< 0.3025 923  505.04230 5.563380 *
##      5) volatile.acidity< 0.2275 761  447.36400 5.971091 *
##    3) alcohol>=10.85 1378 1070.08200 6.328737  
##      6) free.sulfur.dioxide< 10.5 84   95.55952 5.369048 *
##      7) free.sulfur.dioxide>=10.5 1294  892.13600 6.391036  
##       14) alcohol< 11.76667 629  430.11130 6.173291  
##         28) volatile.acidity>=0.465 11   10.72727 4.545455 *
##         29) volatile.acidity< 0.465 618  389.71680 6.202265 *
##       15) alcohol>=11.76667 665  403.99400 6.596992 *
# get more detailed information about the tree
summary(m.rpart)
## Call:
## rpart(formula = quality ~ ., data = wine_train)
##   n= 3750 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.15501053      0 1.0000000 1.0004996 0.02445439
## 2 0.05098911      1 0.8449895 0.8459058 0.02332725
## 3 0.02796998      2 0.7940004 0.8022026 0.02273808
## 4 0.01970128      3 0.7660304 0.7820275 0.02171426
## 5 0.01265926      4 0.7463291 0.7599030 0.02093256
## 6 0.01007193      5 0.7336698 0.7542015 0.02088634
## 7 0.01000000      6 0.7235979 0.7545502 0.02094457
## 
## Variable importance
##              alcohol              density     volatile.acidity 
##                   34                   21                   15 
##            chlorides total.sulfur.dioxide  free.sulfur.dioxide 
##                   11                    7                    6 
##       residual.sugar            sulphates          citric.acid 
##                    3                    1                    1 
## 
## Node number 1: 3750 observations,    complexity param=0.1550105
##   mean=5.870933, MSE=0.7854751 
##   left son=2 (2372 obs) right son=3 (1378 obs)
##   Primary splits:
##       alcohol              < 10.85    to the left,  improve=0.15501050, (0 missing)
##       density              < 0.992035 to the right, improve=0.10915940, (0 missing)
##       chlorides            < 0.0395   to the right, improve=0.07682258, (0 missing)
##       total.sulfur.dioxide < 158.5    to the right, improve=0.04089663, (0 missing)
##       citric.acid          < 0.235    to the left,  improve=0.03636458, (0 missing)
##   Surrogate splits:
##       density              < 0.991995 to the right, agree=0.869, adj=0.644, (0 split)
##       chlorides            < 0.0375   to the right, agree=0.757, adj=0.339, (0 split)
##       total.sulfur.dioxide < 103.5    to the right, agree=0.690, adj=0.155, (0 split)
##       residual.sugar       < 5.375    to the right, agree=0.667, adj=0.094, (0 split)
##       sulphates            < 0.345    to the right, agree=0.647, adj=0.038, (0 split)
## 
## Node number 2: 2372 observations,    complexity param=0.05098911
##   mean=5.604975, MSE=0.5981709 
##   left son=4 (1611 obs) right son=5 (761 obs)
##   Primary splits:
##       volatile.acidity    < 0.2275   to the right, improve=0.10585250, (0 missing)
##       free.sulfur.dioxide < 13.5     to the left,  improve=0.03390500, (0 missing)
##       citric.acid         < 0.235    to the left,  improve=0.03204075, (0 missing)
##       alcohol             < 10.11667 to the left,  improve=0.03136524, (0 missing)
##       chlorides           < 0.0585   to the right, improve=0.01633599, (0 missing)
##   Surrogate splits:
##       pH                   < 3.485    to the left,  agree=0.694, adj=0.047, (0 split)
##       sulphates            < 0.755    to the left,  agree=0.685, adj=0.020, (0 split)
##       total.sulfur.dioxide < 105.5    to the right, agree=0.683, adj=0.011, (0 split)
##       residual.sugar       < 0.75     to the right, agree=0.681, adj=0.007, (0 split)
##       chlorides            < 0.0285   to the right, agree=0.680, adj=0.003, (0 split)
## 
## Node number 3: 1378 observations,    complexity param=0.02796998
##   mean=6.328737, MSE=0.7765472 
##   left son=6 (84 obs) right son=7 (1294 obs)
##   Primary splits:
##       free.sulfur.dioxide  < 10.5     to the left,  improve=0.07699080, (0 missing)
##       alcohol              < 11.76667 to the left,  improve=0.06210660, (0 missing)
##       total.sulfur.dioxide < 67.5     to the left,  improve=0.04438619, (0 missing)
##       residual.sugar       < 1.375    to the left,  improve=0.02905351, (0 missing)
##       fixed.acidity        < 7.35     to the right, improve=0.02613259, (0 missing)
##   Surrogate splits:
##       total.sulfur.dioxide < 53.5     to the left,  agree=0.952, adj=0.214, (0 split)
##       volatile.acidity     < 0.875    to the right, agree=0.940, adj=0.024, (0 split)
## 
## Node number 4: 1611 observations,    complexity param=0.01265926
##   mean=5.43203, MSE=0.5098121 
##   left son=8 (688 obs) right son=9 (923 obs)
##   Primary splits:
##       volatile.acidity    < 0.3025   to the right, improve=0.04540111, (0 missing)
##       alcohol             < 10.05    to the left,  improve=0.03874403, (0 missing)
##       free.sulfur.dioxide < 13.5     to the left,  improve=0.03338886, (0 missing)
##       chlorides           < 0.0495   to the right, improve=0.02574623, (0 missing)
##       citric.acid         < 0.195    to the left,  improve=0.02327981, (0 missing)
##   Surrogate splits:
##       citric.acid          < 0.215    to the left,  agree=0.633, adj=0.141, (0 split)
##       free.sulfur.dioxide  < 20.5     to the left,  agree=0.600, adj=0.063, (0 split)
##       chlorides            < 0.0595   to the right, agree=0.593, adj=0.047, (0 split)
##       residual.sugar       < 1.15     to the left,  agree=0.583, adj=0.023, (0 split)
##       total.sulfur.dioxide < 219.25   to the right, agree=0.582, adj=0.022, (0 split)
## 
## Node number 5: 761 observations
##   mean=5.971091, MSE=0.5878633 
## 
## Node number 6: 84 observations
##   mean=5.369048, MSE=1.137613 
## 
## Node number 7: 1294 observations,    complexity param=0.01970128
##   mean=6.391036, MSE=0.6894405 
##   left son=14 (629 obs) right son=15 (665 obs)
##   Primary splits:
##       alcohol              < 11.76667 to the left,  improve=0.06504696, (0 missing)
##       chlorides            < 0.0395   to the right, improve=0.02758705, (0 missing)
##       fixed.acidity        < 7.35     to the right, improve=0.02750932, (0 missing)
##       pH                   < 3.055    to the left,  improve=0.02307356, (0 missing)
##       total.sulfur.dioxide < 191.5    to the right, improve=0.02186818, (0 missing)
##   Surrogate splits:
##       density              < 0.990885 to the right, agree=0.720, adj=0.424, (0 split)
##       volatile.acidity     < 0.2675   to the left,  agree=0.637, adj=0.253, (0 split)
##       chlorides            < 0.0365   to the right, agree=0.630, adj=0.238, (0 split)
##       residual.sugar       < 1.475    to the left,  agree=0.575, adj=0.126, (0 split)
##       total.sulfur.dioxide < 128.5    to the right, agree=0.574, adj=0.124, (0 split)
## 
## Node number 8: 688 observations
##   mean=5.255814, MSE=0.4054895 
## 
## Node number 9: 923 observations
##   mean=5.56338, MSE=0.5471747 
## 
## Node number 14: 629 observations,    complexity param=0.01007193
##   mean=6.173291, MSE=0.6838017 
##   left son=28 (11 obs) right son=29 (618 obs)
##   Primary splits:
##       volatile.acidity     < 0.465    to the right, improve=0.06897561, (0 missing)
##       total.sulfur.dioxide < 200      to the right, improve=0.04223066, (0 missing)
##       residual.sugar       < 0.975    to the left,  improve=0.03061714, (0 missing)
##       fixed.acidity        < 7.35     to the right, improve=0.02978501, (0 missing)
##       sulphates            < 0.575    to the left,  improve=0.02165970, (0 missing)
##   Surrogate splits:
##       citric.acid          < 0.045    to the left,  agree=0.986, adj=0.182, (0 split)
##       total.sulfur.dioxide < 279.25   to the right, agree=0.986, adj=0.182, (0 split)
## 
## Node number 15: 665 observations
##   mean=6.596992, MSE=0.6075098 
## 
## Node number 28: 11 observations
##   mean=4.545455, MSE=0.9752066 
## 
## Node number 29: 618 observations
##   mean=6.202265, MSE=0.6306098
#install.packages("rpart.plot")
# use the rpart.plot package to create a visualization
install.packages("rpart.plot")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(rpart.plot)
# a basic decision tree diagram
rpart.plot(m.rpart, digits = 3)
This tree highlights alcohol variable as the primary driver of predicted wine quality, with low alcohol wines generally receiving lower scores. Within the low‑alcohol node, volatile acidity further separates poorer from slightly better wines, showing its strong influence on quality. For higher‑alcohol wines, free sulfur dioxide and a secondary alcohol split refine the predictions, but the sample sizes become small, making interpretation very difficult.
# a few adjustments to the diagram
rpart.plot(m.rpart, digits = 4, fallen.leaves = TRUE, type = 3, extra = 101)

Step 4: Evaluate model performance

# generate predictions for the testing dataset
p.rpart <- predict(m.rpart, wine_test)
# compare the distribution of predicted values vs. actual values
summary(p.rpart)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.545   5.563   5.971   5.893   6.202   6.597
summary(wine_test$quality)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   5.000   6.000   5.901   6.000   9.000
# compare the correlation
cor(p.rpart, wine_test$quality)
## [1] 0.5369525
# function to calculate the mean absolute error
MAE <- function(actual, predicted) {
  mean(abs(actual - predicted))  
}
# mean absolute error between predicted and actual values
MAE(p.rpart, wine_test$quality)
## [1] 0.5872652
# mean absolute error between actual values and mean value
mean(wine_train$quality) # result = 5.87
## [1] 5.870933
MAE(5.87, wine_test$quality)
## [1] 0.6722474

Step 5: Improving model performance

#install.packages("plyr")
#install.packages("Cubist")
# train a Cubist Model Tree
install.packages("Cubist")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(Cubist)
## Loading required package: lattice
m.cubist <- cubist(x = wine_train[-12], y = wine_train$quality)
# display basic information about the model tree
m.cubist
## 
## Call:
## cubist.default(x = wine_train[-12], y = wine_train$quality)
## 
## Number of samples: 3750 
## Number of predictors: 11 
## 
## Number of committees: 1 
## Number of rules: 25
# display the tree itself
summary(m.cubist)
## 
## Call:
## cubist.default(x = wine_train[-12], y = wine_train$quality)
## 
## 
## Cubist [Release 2.07 GPL Edition]  Tue Feb 24 01:01:38 2026
## ---------------------------------
## 
##     Target attribute `outcome'
## 
## Read 3750 cases (12 attributes) from undefined.data
## 
## Model:
## 
##   Rule 1: [21 cases, mean 5.0, range 4 to 6, est err 0.5]
## 
##     if
##  free.sulfur.dioxide > 30
##  total.sulfur.dioxide > 195
##  total.sulfur.dioxide <= 235
##  sulphates > 0.64
##  alcohol > 9.1
##     then
##  outcome = 573.6 + 0.0478 total.sulfur.dioxide - 573 density
##            - 0.788 alcohol + 0.186 residual.sugar - 4.73 volatile.acidity
## 
##   Rule 2: [28 cases, mean 5.0, range 4 to 8, est err 0.7]
## 
##     if
##  volatile.acidity > 0.31
##  citric.acid <= 0.36
##  residual.sugar <= 1.45
##  total.sulfur.dioxide <= 97
##  alcohol > 9.1
##     then
##  outcome = 168.2 + 4.75 citric.acid + 0.0123 total.sulfur.dioxide
##            - 170 density + 0.057 residual.sugar - 6.4 chlorides + 0.84 pH
##            + 0.14 fixed.acidity
## 
##   Rule 3: [171 cases, mean 5.1, range 3 to 6, est err 0.3]
## 
##     if
##  volatile.acidity > 0.205
##  chlorides <= 0.054
##  density <= 0.99839
##  alcohol <= 9.1
##     then
##  outcome = 147.4 - 144 density + 0.08 residual.sugar + 0.117 alcohol
##            - 0.87 volatile.acidity - 0.09 pH - 0.01 fixed.acidity
## 
##   Rule 4: [37 cases, mean 5.3, range 3 to 6, est err 0.5]
## 
##     if
##  free.sulfur.dioxide > 30
##  total.sulfur.dioxide > 235
##  alcohol > 9.1
##     then
##  outcome = 19.5 - 0.013 total.sulfur.dioxide - 2.7 volatile.acidity
##            - 10 density + 0.005 residual.sugar + 0.008 alcohol
## 
##   Rule 5: [64 cases, mean 5.3, range 5 to 6, est err 0.3]
## 
##     if
##  volatile.acidity > 0.205
##  residual.sugar > 17.85
##     then
##  outcome = -23.6 + 0.233 alcohol - 5.2 chlorides - 0.75 citric.acid
##            + 28 density - 0.81 volatile.acidity - 0.19 pH
##            - 0.002 residual.sugar
## 
##   Rule 6: [56 cases, mean 5.3, range 4 to 7, est err 0.6]
## 
##     if
##  fixed.acidity <= 7.1
##  volatile.acidity > 0.205
##  chlorides > 0.054
##  density <= 0.99839
##  alcohol <= 9.1
##     then
##  outcome = 40.6 + 0.374 alcohol - 1.62 volatile.acidity
##            + 0.026 residual.sugar - 38 density - 0.21 pH
##            - 0.01 fixed.acidity
## 
##   Rule 7: [337 cases, mean 5.3, range 3 to 7, est err 0.4]
## 
##     if
##  fixed.acidity <= 7.8
##  volatile.acidity > 0.305
##  chlorides <= 0.09
##  free.sulfur.dioxide <= 82.5
##  total.sulfur.dioxide > 130
##  total.sulfur.dioxide <= 235
##  sulphates <= 0.64
##  alcohol <= 10.4
##     then
##  outcome = -32.1 + 0.233 alcohol - 9.7 chlorides
##            + 0.0038 total.sulfur.dioxide - 0.0081 free.sulfur.dioxide
##            + 35 density + 0.81 volatile.acidity
## 
##   Rule 8: [30 cases, mean 5.5, range 3 to 7, est err 0.5]
## 
##     if
##  fixed.acidity > 7.1
##  volatile.acidity > 0.205
##  chlorides > 0.054
##  density <= 0.99839
##  alcohol <= 9.1
##     then
##  outcome = 244 - 1.56 fixed.acidity - 228 density
##            + 0.0252 free.sulfur.dioxide - 7.3 chlorides
##            - 0.19 volatile.acidity + 0.003 residual.sugar
## 
##   Rule 9: [98 cases, mean 5.5, range 4 to 8, est err 0.5]
## 
##     if
##  volatile.acidity > 0.155
##  chlorides > 0.09
##  total.sulfur.dioxide <= 235
##  sulphates <= 0.64
##     then
##  outcome = 55.9 - 3.85 volatile.acidity - 52 density
##            + 0.023 residual.sugar + 0.092 alcohol + 0.35 pH
##            + 0.05 fixed.acidity + 0.3 sulphates
##            + 0.001 free.sulfur.dioxide
## 
##   Rule 10: [446 cases, mean 5.6, range 4 to 8, est err 0.5]
## 
##     if
##  fixed.acidity <= 7.8
##  volatile.acidity > 0.155
##  volatile.acidity <= 0.305
##  chlorides <= 0.09
##  free.sulfur.dioxide <= 82.5
##  total.sulfur.dioxide > 130
##  total.sulfur.dioxide <= 235
##  sulphates <= 0.64
##  alcohol > 9.1
##  alcohol <= 10.4
##     then
##  outcome = 15.1 + 0.35 alcohol - 3.09 volatile.acidity - 14.7 chlorides
##            + 1.16 sulphates - 0.0022 total.sulfur.dioxide
##            + 0.11 fixed.acidity + 0.45 pH + 0.5 citric.acid - 14 density
##            + 0.006 residual.sugar
## 
##   Rule 11: [31 cases, mean 5.6, range 3 to 8, est err 0.8]
## 
##     if
##  volatile.acidity > 0.31
##  citric.acid > 0.36
##  free.sulfur.dioxide <= 30
##  total.sulfur.dioxide <= 97
##     then
##  outcome = 3.2 + 0.0584 total.sulfur.dioxide + 7.77 volatile.acidity
##            + 0.328 alcohol - 9 density + 0.003 residual.sugar
## 
##   Rule 12: [20 cases, mean 5.7, range 3 to 8, est err 0.9]
## 
##     if
##  free.sulfur.dioxide > 82.5
##  total.sulfur.dioxide <= 235
##  sulphates <= 0.64
##  alcohol > 9.1
##     then
##  outcome = -8.9 + 109.3 chlorides + 0.948 alcohol
## 
##   Rule 13: [331 cases, mean 5.8, range 4 to 8, est err 0.5]
## 
##     if
##  volatile.acidity > 0.31
##  free.sulfur.dioxide <= 30
##  total.sulfur.dioxide > 97
##  alcohol > 9.1
##     then
##  outcome = 89.8 + 0.0234 free.sulfur.dioxide + 0.324 alcohol
##            + 0.07 residual.sugar - 90 density - 1.47 volatile.acidity
##            + 0.48 pH
## 
##   Rule 14: [116 cases, mean 5.8, range 3 to 8, est err 0.6]
## 
##     if
##  fixed.acidity > 7.8
##  volatile.acidity > 0.155
##  free.sulfur.dioxide > 30
##  total.sulfur.dioxide > 130
##  total.sulfur.dioxide <= 235
##  sulphates <= 0.64
##  alcohol > 9.1
##     then
##  outcome = 6 + 0.346 alcohol - 0.41 fixed.acidity - 1.69 volatile.acidity
##            - 2.9 chlorides + 0.19 sulphates + 0.07 pH
## 
##   Rule 15: [115 cases, mean 5.8, range 4 to 7, est err 0.5]
## 
##     if
##  volatile.acidity > 0.205
##  residual.sugar <= 17.85
##  density > 0.99839
##  alcohol <= 9.1
##     then
##  outcome = -110.2 + 120 density - 3.46 volatile.acidity - 0.97 pH
##            - 0.022 residual.sugar + 0.088 alcohol - 0.6 citric.acid
##            - 0.01 fixed.acidity
## 
##   Rule 16: [986 cases, mean 5.9, range 3 to 9, est err 0.6]
## 
##     if
##  volatile.acidity <= 0.31
##  free.sulfur.dioxide <= 30
##  alcohol > 9.1
##     then
##  outcome = 280.4 - 282 density + 0.128 residual.sugar
##            + 0.0264 free.sulfur.dioxide - 3 volatile.acidity + 1.2 pH
##            + 0.65 citric.acid + 0.09 fixed.acidity + 0.56 sulphates
##            + 0.015 alcohol
## 
##   Rule 17: [49 cases, mean 6.0, range 5 to 8, est err 0.5]
## 
##     if
##  volatile.acidity > 0.155
##  residual.sugar > 8.8
##  free.sulfur.dioxide > 30
##  total.sulfur.dioxide <= 130
##  pH <= 3.26
##  alcohol > 9.1
##     then
##  outcome = 173.5 - 169 density + 0.055 alcohol + 0.38 sulphates
##            + 0.002 residual.sugar
## 
##   Rule 18: [114 cases, mean 6.1, range 3 to 9, est err 0.6]
## 
##     if
##  volatile.acidity > 0.31
##  citric.acid <= 0.36
##  residual.sugar > 1.45
##  total.sulfur.dioxide <= 97
##  alcohol > 9.1
##     then
##  outcome = 302.3 - 305 density + 0.0128 total.sulfur.dioxide
##            + 0.096 residual.sugar + 1.94 citric.acid + 1.05 pH
##            + 0.17 fixed.acidity - 6.7 chlorides
##            + 0.0022 free.sulfur.dioxide - 0.21 volatile.acidity
##            + 0.013 alcohol + 0.09 sulphates
## 
##   Rule 19: [145 cases, mean 6.1, range 5 to 8, est err 0.6]
## 
##     if
##  volatile.acidity > 0.155
##  free.sulfur.dioxide > 30
##  total.sulfur.dioxide <= 195
##  sulphates > 0.64
##     then
##  outcome = 206 - 209 density + 0.069 residual.sugar + 0.38 fixed.acidity
##            + 2.79 sulphates + 0.0155 free.sulfur.dioxide
##            - 0.0051 total.sulfur.dioxide - 1.71 citric.acid + 1.04 pH
## 
##   Rule 20: [555 cases, mean 6.1, range 3 to 9, est err 0.6]
## 
##     if
##  total.sulfur.dioxide > 130
##  total.sulfur.dioxide <= 235
##  sulphates <= 0.64
##  alcohol > 10.4
##     then
##  outcome = 108 + 0.276 alcohol - 109 density + 0.05 residual.sugar
##            + 0.77 pH - 1.02 volatile.acidity - 4.2 chlorides
##            + 0.78 sulphates + 0.08 fixed.acidity
##            + 0.0016 free.sulfur.dioxide - 0.0003 total.sulfur.dioxide
## 
##   Rule 21: [73 cases, mean 6.2, range 4 to 8, est err 0.4]
## 
##     if
##  volatile.acidity > 0.155
##  citric.acid <= 0.28
##  residual.sugar <= 8.8
##  free.sulfur.dioxide > 30
##  total.sulfur.dioxide <= 130
##  pH <= 3.26
##  sulphates <= 0.64
##  alcohol > 9.1
##     then
##  outcome = 4.2 + 0.147 residual.sugar + 0.47 alcohol + 3.75 sulphates
##            - 2.5 volatile.acidity - 5 density
## 
##   Rule 22: [244 cases, mean 6.3, range 4 to 8, est err 0.6]
## 
##     if
##  citric.acid > 0.28
##  residual.sugar <= 8.8
##  free.sulfur.dioxide > 30
##  total.sulfur.dioxide <= 130
##  pH <= 3.26
##     then
##  outcome = 40.1 + 0.278 alcohol + 1.3 sulphates - 39 density
##            + 0.017 residual.sugar + 0.001 total.sulfur.dioxide + 0.17 pH
##            + 0.03 fixed.acidity
## 
##   Rule 23: [106 cases, mean 6.3, range 4 to 8, est err 0.6]
## 
##     if
##  volatile.acidity <= 0.155
##  free.sulfur.dioxide > 30
##     then
##  outcome = 139.1 - 138 density + 0.058 residual.sugar + 0.71 pH
##            + 0.92 sulphates + 0.11 fixed.acidity - 0.73 volatile.acidity
##            + 0.055 alcohol - 0.0012 total.sulfur.dioxide
##            + 0.0007 free.sulfur.dioxide
## 
##   Rule 24: [137 cases, mean 6.5, range 4 to 9, est err 0.6]
## 
##     if
##  volatile.acidity > 0.155
##  free.sulfur.dioxide > 30
##  total.sulfur.dioxide <= 130
##  pH > 3.26
##  sulphates <= 0.64
##  alcohol > 9.1
##     then
##  outcome = 114.2 + 0.0142 total.sulfur.dioxide - 107 density
##            - 11.8 chlorides - 1.57 pH + 0.124 alcohol + 1.21 sulphates
##            + 1.16 volatile.acidity + 0.021 residual.sugar
##            + 0.04 fixed.acidity
## 
##   Rule 25: [92 cases, mean 6.5, range 4 to 8, est err 0.6]
## 
##     if
##  volatile.acidity <= 0.205
##  alcohol <= 9.1
##     then
##  outcome = -200.7 + 210 density + 5.88 volatile.acidity + 23.9 chlorides
##            - 2.83 citric.acid - 1.17 pH
## 
## 
## Evaluation on training data (3750 cases):
## 
##     Average  |error|                0.5
##     Relative |error|               0.67
##     Correlation coefficient        0.66
## 
## 
##  Attribute usage:
##    Conds  Model
## 
##     84%    93%    alcohol
##     80%    89%    volatile.acidity
##     70%    61%    free.sulfur.dioxide
##     63%    50%    total.sulfur.dioxide
##     44%    70%    sulphates
##     26%    44%    chlorides
##     22%    76%    fixed.acidity
##     16%    87%    residual.sugar
##     11%    86%    pH
##     11%    45%    citric.acid
##      8%    97%    density
## 
## 
## Time: 0.2 secs
# generate predictions for the model
p.cubist <- predict(m.cubist, wine_test)
# summary statistics about the predictions
summary(p.cubist)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.677   5.416   5.906   5.848   6.238   7.393
# correlation between the predicted and true values
cor(p.cubist, wine_test$quality)
## [1] 0.6201015
# mean absolute error of predicted and true values
# (uses a custom function defined above)
MAE(wine_test$quality, p.cubist) 
## [1] 0.5339725

Overall, with a low MAE of 0.53, the decision tree model does a good job at predicting quality in white wines, with a mean value close to a quality of 6. Also, the correlation between wine quality and the predicted values is decent