##Step 1 - collecting data
#The data include examples of red and white Vinho Verde wines from Portugal-one of the world's leading wine-producing countries. Because the factors that contribute to a highly-rated wine may differ between the red and white varieties, for this analysis, we will examine only the more popular white wines.The white wine data includes information on 11 chemical properties of 4,898 wine samples. For each wine, a laboratory analysis measured characteristics such as the acidity, sugar content, chlorides, sulfur, alcohol, pH, and density. The samples were then rated in a blind tasting by panels of no less than three judges on a quality scale ranging from zero (very bad) to 10 (excellent). In the case that the judges disagreed on the rating, the median value was used.

## Step 2: Exploring and preparing the data ----
wine <- read.csv("http://www.sci.csueastbay.edu/~esuess/classes/Statistics_6620/Presentations/ml10/whitewines.csv")

# examine the wine data
str(wine)
## 'data.frame':    4898 obs. of  12 variables:
##  $ fixed.acidity       : num  6.7 5.7 5.9 5.3 6.4 7 7.9 6.6 7 6.5 ...
##  $ volatile.acidity    : num  0.62 0.22 0.19 0.47 0.29 0.14 0.12 0.38 0.16 0.37 ...
##  $ citric.acid         : num  0.24 0.2 0.26 0.1 0.21 0.41 0.49 0.28 0.3 0.33 ...
##  $ residual.sugar      : num  1.1 16 7.4 1.3 9.65 0.9 5.2 2.8 2.6 3.9 ...
##  $ chlorides           : num  0.039 0.044 0.034 0.036 0.041 0.037 0.049 0.043 0.043 0.027 ...
##  $ free.sulfur.dioxide : num  6 41 33 11 36 22 33 17 34 40 ...
##  $ total.sulfur.dioxide: num  62 113 123 74 119 95 152 67 90 130 ...
##  $ density             : num  0.993 0.999 0.995 0.991 0.993 ...
##  $ pH                  : num  3.41 3.22 3.49 3.48 2.99 3.25 3.18 3.21 2.88 3.28 ...
##  $ sulphates           : num  0.32 0.46 0.42 0.54 0.34 0.43 0.47 0.47 0.47 0.39 ...
##  $ alcohol             : num  10.4 8.9 10.1 11.2 10.9 ...
##  $ quality             : int  5 6 6 4 6 6 6 6 6 7 ...
#The wine data include 11 features and the quality outcome
# Compared to other types of machine learning models, one of the advantages of trees is that they can handle many types of data without preprocessing. This means we do not need to normalize or standardize the features.
hist(wine$quality)

# summary statistics of the wine data
summary(wine)
##  fixed.acidity    volatile.acidity  citric.acid     residual.sugar  
##  Min.   : 3.800   Min.   :0.0800   Min.   :0.0000   Min.   : 0.600  
##  1st Qu.: 6.300   1st Qu.:0.2100   1st Qu.:0.2700   1st Qu.: 1.700  
##  Median : 6.800   Median :0.2600   Median :0.3200   Median : 5.200  
##  Mean   : 6.855   Mean   :0.2782   Mean   :0.3342   Mean   : 6.391  
##  3rd Qu.: 7.300   3rd Qu.:0.3200   3rd Qu.:0.3900   3rd Qu.: 9.900  
##  Max.   :14.200   Max.   :1.1000   Max.   :1.6600   Max.   :65.800  
##    chlorides       free.sulfur.dioxide total.sulfur.dioxide
##  Min.   :0.00900   Min.   :  2.00      Min.   :  9.0       
##  1st Qu.:0.03600   1st Qu.: 23.00      1st Qu.:108.0       
##  Median :0.04300   Median : 34.00      Median :134.0       
##  Mean   :0.04577   Mean   : 35.31      Mean   :138.4       
##  3rd Qu.:0.05000   3rd Qu.: 46.00      3rd Qu.:167.0       
##  Max.   :0.34600   Max.   :289.00      Max.   :440.0       
##     density             pH          sulphates         alcohol     
##  Min.   :0.9871   Min.   :2.720   Min.   :0.2200   Min.   : 8.00  
##  1st Qu.:0.9917   1st Qu.:3.090   1st Qu.:0.4100   1st Qu.: 9.50  
##  Median :0.9937   Median :3.180   Median :0.4700   Median :10.40  
##  Mean   :0.9940   Mean   :3.188   Mean   :0.4898   Mean   :10.51  
##  3rd Qu.:0.9961   3rd Qu.:3.280   3rd Qu.:0.5500   3rd Qu.:11.40  
##  Max.   :1.0390   Max.   :3.820   Max.   :1.0800   Max.   :14.20  
##     quality     
##  Min.   :3.000  
##  1st Qu.:5.000  
##  Median :6.000  
##  Mean   :5.878  
##  3rd Qu.:6.000  
##  Max.   :9.000
#Our last step then is to divide into training and testing datasets. Since the wine data were already sorted into random order, we can partition into two sets of contiguous rows
wine_train <- wine[1:3750, ]
wine_test <- wine[3751:4898, ]
## Step 3: Training a model on the data ----
# Although almost any implementation of decision trees can be used to perform regression tree modeling, the rpart (recursive partitioning) package offers perhaps the most faithful implementation of regression trees as they were described by the CART team.
library(rpart)
m.rpart <- rpart(quality ~ ., data = wine_train)

# get basic information about the tree
m.rpart
## n= 3750 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 3750 2945.53200 5.870933  
##    2) alcohol< 10.85 2372 1418.86100 5.604975  
##      4) volatile.acidity>=0.2275 1611  821.30730 5.432030  
##        8) volatile.acidity>=0.3025 688  278.97670 5.255814 *
##        9) volatile.acidity< 0.3025 923  505.04230 5.563380 *
##      5) volatile.acidity< 0.2275 761  447.36400 5.971091 *
##    3) alcohol>=10.85 1378 1070.08200 6.328737  
##      6) free.sulfur.dioxide< 10.5 84   95.55952 5.369048 *
##      7) free.sulfur.dioxide>=10.5 1294  892.13600 6.391036  
##       14) alcohol< 11.76667 629  430.11130 6.173291  
##         28) volatile.acidity>=0.465 11   10.72727 4.545455 *
##         29) volatile.acidity< 0.465 618  389.71680 6.202265 *
##       15) alcohol>=11.76667 665  403.99400 6.596992 *
# get more detailed information about the tree
summary(m.rpart)
## Call:
## rpart(formula = quality ~ ., data = wine_train)
##   n= 3750 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.15501053      0 1.0000000 1.0008763 0.02446762
## 2 0.05098911      1 0.8449895 0.8458386 0.02333416
## 3 0.02796998      2 0.7940004 0.8038789 0.02277807
## 4 0.01970128      3 0.7660304 0.7862819 0.02194886
## 5 0.01265926      4 0.7463291 0.7625430 0.02092908
## 6 0.01007193      5 0.7336698 0.7535891 0.02050616
## 7 0.01000000      6 0.7235979 0.7447673 0.02025170
## 
## Variable importance
##              alcohol              density     volatile.acidity 
##                   34                   21                   15 
##            chlorides total.sulfur.dioxide  free.sulfur.dioxide 
##                   11                    7                    6 
##       residual.sugar            sulphates          citric.acid 
##                    3                    1                    1 
## 
## Node number 1: 3750 observations,    complexity param=0.1550105
##   mean=5.870933, MSE=0.7854751 
##   left son=2 (2372 obs) right son=3 (1378 obs)
##   Primary splits:
##       alcohol              < 10.85    to the left,  improve=0.15501050, (0 missing)
##       density              < 0.992035 to the right, improve=0.10915940, (0 missing)
##       chlorides            < 0.0395   to the right, improve=0.07682258, (0 missing)
##       total.sulfur.dioxide < 158.5    to the right, improve=0.04089663, (0 missing)
##       citric.acid          < 0.235    to the left,  improve=0.03636458, (0 missing)
##   Surrogate splits:
##       density              < 0.991995 to the right, agree=0.869, adj=0.644, (0 split)
##       chlorides            < 0.0375   to the right, agree=0.757, adj=0.339, (0 split)
##       total.sulfur.dioxide < 103.5    to the right, agree=0.690, adj=0.155, (0 split)
##       residual.sugar       < 5.375    to the right, agree=0.667, adj=0.094, (0 split)
##       sulphates            < 0.345    to the right, agree=0.647, adj=0.038, (0 split)
## 
## Node number 2: 2372 observations,    complexity param=0.05098911
##   mean=5.604975, MSE=0.5981709 
##   left son=4 (1611 obs) right son=5 (761 obs)
##   Primary splits:
##       volatile.acidity    < 0.2275   to the right, improve=0.10585250, (0 missing)
##       free.sulfur.dioxide < 13.5     to the left,  improve=0.03390500, (0 missing)
##       citric.acid         < 0.235    to the left,  improve=0.03204075, (0 missing)
##       alcohol             < 10.11667 to the left,  improve=0.03136524, (0 missing)
##       chlorides           < 0.0585   to the right, improve=0.01633599, (0 missing)
##   Surrogate splits:
##       pH                   < 3.485    to the left,  agree=0.694, adj=0.047, (0 split)
##       sulphates            < 0.755    to the left,  agree=0.685, adj=0.020, (0 split)
##       total.sulfur.dioxide < 105.5    to the right, agree=0.683, adj=0.011, (0 split)
##       residual.sugar       < 0.75     to the right, agree=0.681, adj=0.007, (0 split)
##       chlorides            < 0.0285   to the right, agree=0.680, adj=0.003, (0 split)
## 
## Node number 3: 1378 observations,    complexity param=0.02796998
##   mean=6.328737, MSE=0.7765472 
##   left son=6 (84 obs) right son=7 (1294 obs)
##   Primary splits:
##       free.sulfur.dioxide  < 10.5     to the left,  improve=0.07699080, (0 missing)
##       alcohol              < 11.76667 to the left,  improve=0.06210660, (0 missing)
##       total.sulfur.dioxide < 67.5     to the left,  improve=0.04438619, (0 missing)
##       residual.sugar       < 1.375    to the left,  improve=0.02905351, (0 missing)
##       fixed.acidity        < 7.35     to the right, improve=0.02613259, (0 missing)
##   Surrogate splits:
##       total.sulfur.dioxide < 53.5     to the left,  agree=0.952, adj=0.214, (0 split)
##       volatile.acidity     < 0.875    to the right, agree=0.940, adj=0.024, (0 split)
## 
## Node number 4: 1611 observations,    complexity param=0.01265926
##   mean=5.43203, MSE=0.5098121 
##   left son=8 (688 obs) right son=9 (923 obs)
##   Primary splits:
##       volatile.acidity    < 0.3025   to the right, improve=0.04540111, (0 missing)
##       alcohol             < 10.05    to the left,  improve=0.03874403, (0 missing)
##       free.sulfur.dioxide < 13.5     to the left,  improve=0.03338886, (0 missing)
##       chlorides           < 0.0495   to the right, improve=0.02574623, (0 missing)
##       citric.acid         < 0.195    to the left,  improve=0.02327981, (0 missing)
##   Surrogate splits:
##       citric.acid          < 0.215    to the left,  agree=0.633, adj=0.141, (0 split)
##       free.sulfur.dioxide  < 20.5     to the left,  agree=0.600, adj=0.063, (0 split)
##       chlorides            < 0.0595   to the right, agree=0.593, adj=0.047, (0 split)
##       residual.sugar       < 1.15     to the left,  agree=0.583, adj=0.023, (0 split)
##       total.sulfur.dioxide < 219.25   to the right, agree=0.582, adj=0.022, (0 split)
## 
## Node number 5: 761 observations
##   mean=5.971091, MSE=0.5878633 
## 
## Node number 6: 84 observations
##   mean=5.369048, MSE=1.137613 
## 
## Node number 7: 1294 observations,    complexity param=0.01970128
##   mean=6.391036, MSE=0.6894405 
##   left son=14 (629 obs) right son=15 (665 obs)
##   Primary splits:
##       alcohol              < 11.76667 to the left,  improve=0.06504696, (0 missing)
##       chlorides            < 0.0395   to the right, improve=0.02758705, (0 missing)
##       fixed.acidity        < 7.35     to the right, improve=0.02750932, (0 missing)
##       pH                   < 3.055    to the left,  improve=0.02307356, (0 missing)
##       total.sulfur.dioxide < 191.5    to the right, improve=0.02186818, (0 missing)
##   Surrogate splits:
##       density              < 0.990885 to the right, agree=0.720, adj=0.424, (0 split)
##       volatile.acidity     < 0.2675   to the left,  agree=0.637, adj=0.253, (0 split)
##       chlorides            < 0.0365   to the right, agree=0.630, adj=0.238, (0 split)
##       residual.sugar       < 1.475    to the left,  agree=0.575, adj=0.126, (0 split)
##       total.sulfur.dioxide < 128.5    to the right, agree=0.574, adj=0.124, (0 split)
## 
## Node number 8: 688 observations
##   mean=5.255814, MSE=0.4054895 
## 
## Node number 9: 923 observations
##   mean=5.56338, MSE=0.5471747 
## 
## Node number 14: 629 observations,    complexity param=0.01007193
##   mean=6.173291, MSE=0.6838017 
##   left son=28 (11 obs) right son=29 (618 obs)
##   Primary splits:
##       volatile.acidity     < 0.465    to the right, improve=0.06897561, (0 missing)
##       total.sulfur.dioxide < 200      to the right, improve=0.04223066, (0 missing)
##       residual.sugar       < 0.975    to the left,  improve=0.03061714, (0 missing)
##       fixed.acidity        < 7.35     to the right, improve=0.02978501, (0 missing)
##       sulphates            < 0.575    to the left,  improve=0.02165970, (0 missing)
##   Surrogate splits:
##       citric.acid          < 0.045    to the left,  agree=0.986, adj=0.182, (0 split)
##       total.sulfur.dioxide < 279.25   to the right, agree=0.986, adj=0.182, (0 split)
## 
## Node number 15: 665 observations
##   mean=6.596992, MSE=0.6075098 
## 
## Node number 28: 11 observations
##   mean=4.545455, MSE=0.9752066 
## 
## Node number 29: 618 observations
##   mean=6.202265, MSE=0.6306098
#Nodes indicated by * are terminal or leaf nodes, which means that they result in a prediction.


# use the rpart.plot package to create a visualization
library(rpart.plot)

# a basic decision tree diagram
rpart.plot(m.rpart, digits = 3)

# a few adjustments to the diagram
rpart.plot(m.rpart, digits = 4, fallen.leaves = TRUE, type = 3, extra = 101)

## Step 4: Evaluate model performance ----

# To use the regression tree model to make predictions on the test data, we use the predict() function. By default, this returns the estimated numeric value for the outcome variable
p.rpart <- predict(m.rpart, wine_test)

# compare the distribution of predicted values vs. actual values
summary(p.rpart)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.545   5.563   5.971   5.893   6.202   6.597
summary(wine_test$quality)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   5.000   6.000   5.901   6.000   9.000
# compare the correlation
cor(p.rpart, wine_test$quality)
## [1] 0.5369525
#A correlation of 0.54 is certainly acceptable. However, the correlation only measures how strongly the predictions are related to the true value; it is not a measure of how far off the predictions were from the true values.


#Another way to think about the model's performance is to consider how far, on average, its prediction was from the true value. This measurement is called the mean absolute error (MAE).
# function to calculate the mean absolute error
MAE <- function(actual, predicted) {
  mean(abs(actual - predicted))  
}

# mean absolute error between predicted and actual values
MAE(p.rpart, wine_test$quality)
## [1] 0.5872652
# mean absolute error between actual values and mean value
mean(wine_train$quality) # result = 5.87
## [1] 5.870933
MAE(5.87, wine_test$quality)
## [1] 0.6722474
## Step 5: Improving model performance ----
#To improve the performance of our learner, let's try to build a model tree. Recall that a model tree improves on regression trees by replacing the leaf nodes with regression models. This often results in more accurate results than regression trees, which use only a single value for prediction at the leaf nodes.
# train a M5' Model Tree
library(RWeka)
m.m5p <- M5P(quality ~ ., data = wine_train)

# display the tree
m.m5p
## M5 pruned model tree:
## (using smoothed linear models)
## 
## alcohol <= 10.85 : 
## |   volatile.acidity <= 0.237 : 
## |   |   fixed.acidity <= 6.85 : LM1 (406/66.024%)
## |   |   fixed.acidity >  6.85 : 
## |   |   |   free.sulfur.dioxide <= 24.5 : LM2 (113/87.697%)
## |   |   |   free.sulfur.dioxide >  24.5 : 
## |   |   |   |   alcohol <= 9.15 : 
## |   |   |   |   |   citric.acid <= 0.305 : 
## |   |   |   |   |   |   residual.sugar <= 14.45 : 
## |   |   |   |   |   |   |   residual.sugar <= 13.8 : 
## |   |   |   |   |   |   |   |   chlorides <= 0.052 : LM3 (6/77.537%)
## |   |   |   |   |   |   |   |   chlorides >  0.052 : LM4 (13/0%)
## |   |   |   |   |   |   |   residual.sugar >  13.8 : LM5 (11/0%)
## |   |   |   |   |   |   residual.sugar >  14.45 : LM6 (12/0%)
## |   |   |   |   |   citric.acid >  0.305 : 
## |   |   |   |   |   |   total.sulfur.dioxide <= 169.5 : 
## |   |   |   |   |   |   |   total.sulfur.dioxide <= 161.5 : 
## |   |   |   |   |   |   |   |   pH <= 3.355 : 
## |   |   |   |   |   |   |   |   |   volatile.acidity <= 0.215 : 
## |   |   |   |   |   |   |   |   |   |   free.sulfur.dioxide <= 44 : LM7 (3/53.19%)
## |   |   |   |   |   |   |   |   |   |   free.sulfur.dioxide >  44 : LM8 (8/48.858%)
## |   |   |   |   |   |   |   |   |   volatile.acidity >  0.215 : LM9 (3/0%)
## |   |   |   |   |   |   |   |   pH >  3.355 : LM10 (4/0%)
## |   |   |   |   |   |   |   total.sulfur.dioxide >  161.5 : LM11 (6/0%)
## |   |   |   |   |   |   total.sulfur.dioxide >  169.5 : 
## |   |   |   |   |   |   |   sulphates <= 0.56 : 
## |   |   |   |   |   |   |   |   free.sulfur.dioxide <= 48.5 : LM12 (7/0%)
## |   |   |   |   |   |   |   |   free.sulfur.dioxide >  48.5 : 
## |   |   |   |   |   |   |   |   |   fixed.acidity <= 7.3 : LM13 (5/0%)
## |   |   |   |   |   |   |   |   |   fixed.acidity >  7.3 : LM14 (4/0%)
## |   |   |   |   |   |   |   sulphates >  0.56 : LM15 (11/0%)
## |   |   |   |   alcohol >  9.15 : 
## |   |   |   |   |   density <= 0.996 : 
## |   |   |   |   |   |   sulphates <= 0.395 : LM16 (38/85.791%)
## |   |   |   |   |   |   sulphates >  0.395 : LM17 (120/71.353%)
## |   |   |   |   |   density >  0.996 : 
## |   |   |   |   |   |   residual.sugar <= 14.7 : LM18 (84/45.874%)
## |   |   |   |   |   |   residual.sugar >  14.7 : LM19 (24/62.764%)
## |   volatile.acidity >  0.237 : 
## |   |   alcohol <= 10.15 : 
## |   |   |   volatile.acidity <= 0.302 : 
## |   |   |   |   citric.acid <= 0.265 : 
## |   |   |   |   |   free.sulfur.dioxide <= 25.5 : LM20 (39/41.77%)
## |   |   |   |   |   free.sulfur.dioxide >  25.5 : LM21 (131/61.681%)
## |   |   |   |   citric.acid >  0.265 : 
## |   |   |   |   |   citric.acid <= 0.395 : LM22 (213/72.749%)
## |   |   |   |   |   citric.acid >  0.395 : LM23 (189/62.097%)
## |   |   |   volatile.acidity >  0.302 : LM24 (552/64.09%)
## |   |   alcohol >  10.15 : 
## |   |   |   free.sulfur.dioxide <= 26.5 : LM25 (151/75.998%)
## |   |   |   free.sulfur.dioxide >  26.5 : 
## |   |   |   |   total.sulfur.dioxide <= 161.5 : LM26 (142/74.4%)
## |   |   |   |   total.sulfur.dioxide >  161.5 : LM27 (77/77.736%)
## alcohol >  10.85 : 
## |   alcohol <= 11.767 : 
## |   |   free.sulfur.dioxide <= 21.5 : 
## |   |   |   free.sulfur.dioxide <= 11.5 : 
## |   |   |   |   density <= 0.992 : LM28 (19/84.403%)
## |   |   |   |   density >  0.992 : 
## |   |   |   |   |   fixed.acidity <= 6.85 : LM29 (6/108.029%)
## |   |   |   |   |   fixed.acidity >  6.85 : LM30 (21/69.935%)
## |   |   |   free.sulfur.dioxide >  11.5 : 
## |   |   |   |   volatile.acidity <= 0.195 : LM31 (36/61.98%)
## |   |   |   |   volatile.acidity >  0.195 : 
## |   |   |   |   |   chlorides <= 0.036 : LM32 (34/115.199%)
## |   |   |   |   |   chlorides >  0.036 : LM33 (59/78.207%)
## |   |   free.sulfur.dioxide >  21.5 : LM34 (495/84.229%)
## |   alcohol >  11.767 : 
## |   |   free.sulfur.dioxide <= 21.5 : LM35 (181/88.599%)
## |   |   free.sulfur.dioxide >  21.5 : LM36 (527/81.837%)
## 
## LM num: 1
## quality = 
##  0.266 * fixed.acidity 
##  - 2.3082 * volatile.acidity 
##  - 0.012 * citric.acid 
##  + 0.0421 * residual.sugar 
##  + 0.1126 * chlorides 
##  + 0 * free.sulfur.dioxide 
##  - 0.0015 * total.sulfur.dioxide 
##  - 109.8813 * density 
##  + 0.035 * pH 
##  + 1.4122 * sulphates 
##  - 0.0046 * alcohol 
##  + 113.1021
## 
## LM num: 2
## quality = 
##  -0.2557 * fixed.acidity 
##  - 0.8082 * volatile.acidity 
##  - 0.1062 * citric.acid 
##  + 0.0738 * residual.sugar 
##  + 0.0973 * chlorides 
##  + 0.0006 * free.sulfur.dioxide 
##  + 0.0003 * total.sulfur.dioxide 
##  - 210.1018 * density 
##  + 0.0323 * pH 
##  - 0.9604 * sulphates 
##  - 0.0231 * alcohol 
##  + 216.8857
## 
## LM num: 3
## quality = 
##  0.0725 * fixed.acidity 
##  - 1.0921 * volatile.acidity 
##  - 0.6118 * citric.acid 
##  + 0.0294 * residual.sugar 
##  + 105.3735 * chlorides 
##  - 0.0027 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.0323 * pH 
##  + 0.1199 * sulphates 
##  - 0.0373 * alcohol 
##  + 32.2345
## 
## LM num: 4
## quality = 
##  0.0725 * fixed.acidity 
##  - 1.0921 * volatile.acidity 
##  - 0.6118 * citric.acid 
##  + 0.0294 * residual.sugar 
##  + 99.4295 * chlorides 
##  - 0.0027 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.0323 * pH 
##  + 0.1199 * sulphates 
##  - 0.0373 * alcohol 
##  + 32.6786
## 
## LM num: 5
## quality = 
##  0.0944 * fixed.acidity 
##  - 1.0921 * volatile.acidity 
##  - 0.6118 * citric.acid 
##  + 0.0255 * residual.sugar 
##  + 95.8527 * chlorides 
##  - 0.0027 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.0323 * pH 
##  + 0.1199 * sulphates 
##  - 0.0373 * alcohol 
##  + 32.9544
## 
## LM num: 6
## quality = 
##  0.0012 * fixed.acidity 
##  - 1.0921 * volatile.acidity 
##  - 0.6118 * citric.acid 
##  + 0.0491 * residual.sugar 
##  + 54.3184 * chlorides 
##  - 0.0027 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.0323 * pH 
##  + 0.1199 * sulphates 
##  - 0.0373 * alcohol 
##  + 35.4429
## 
## LM num: 7
## quality = 
##  0.0012 * fixed.acidity 
##  - 2.7131 * volatile.acidity 
##  - 1.0049 * citric.acid 
##  + 0.0297 * residual.sugar 
##  + 5.7935 * chlorides 
##  - 0.0147 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.633 * pH 
##  + 0.1199 * sulphates 
##  - 0.0373 * alcohol 
##  + 36.9235
## 
## LM num: 8
## quality = 
##  0.0012 * fixed.acidity 
##  - 2.7131 * volatile.acidity 
##  - 1.0049 * citric.acid 
##  + 0.0297 * residual.sugar 
##  + 5.7935 * chlorides 
##  - 0.0141 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.633 * pH 
##  + 0.1199 * sulphates 
##  - 0.0373 * alcohol 
##  + 36.8808
## 
## LM num: 9
## quality = 
##  0.0012 * fixed.acidity 
##  - 3.4336 * volatile.acidity 
##  - 1.0049 * citric.acid 
##  + 0.0297 * residual.sugar 
##  + 5.7935 * chlorides 
##  - 0.0146 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.633 * pH 
##  + 0.1199 * sulphates 
##  - 0.0373 * alcohol 
##  + 37.0118
## 
## LM num: 10
## quality = 
##  0.0012 * fixed.acidity 
##  - 1.0921 * volatile.acidity 
##  - 1.0049 * citric.acid 
##  + 0.0297 * residual.sugar 
##  + 5.7935 * chlorides 
##  - 0.0065 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.8211 * pH 
##  + 0.1199 * sulphates 
##  - 0.0373 * alcohol 
##  + 35.686
## 
## LM num: 11
## quality = 
##  0.0012 * fixed.acidity 
##  - 1.0921 * volatile.acidity 
##  - 1.0049 * citric.acid 
##  + 0.0297 * residual.sugar 
##  + 5.7935 * chlorides 
##  - 0.0065 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.2757 * pH 
##  + 0.1199 * sulphates 
##  - 0.0373 * alcohol 
##  + 37.5168
## 
## LM num: 12
## quality = 
##  -0.0571 * fixed.acidity 
##  - 1.0921 * volatile.acidity 
##  - 1.534 * citric.acid 
##  + 0.0297 * residual.sugar 
##  + 5.7935 * chlorides 
##  - 0.0098 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.2583 * pH 
##  + 0.3345 * sulphates 
##  - 0.0373 * alcohol 
##  + 38.0548
## 
## LM num: 13
## quality = 
##  -0.304 * fixed.acidity 
##  - 1.0921 * volatile.acidity 
##  + 0.3698 * citric.acid 
##  + 0.0297 * residual.sugar 
##  + 5.7935 * chlorides 
##  - 0.0097 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.2583 * pH 
##  + 0.3345 * sulphates 
##  - 0.0373 * alcohol 
##  + 39.1208
## 
## LM num: 14
## quality = 
##  -0.317 * fixed.acidity 
##  - 1.0921 * volatile.acidity 
##  - 1.5116 * citric.acid 
##  + 0.0297 * residual.sugar 
##  + 5.7935 * chlorides 
##  - 0.0097 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.2583 * pH 
##  + 0.3345 * sulphates 
##  - 0.0373 * alcohol 
##  + 39.9144
## 
## LM num: 15
## quality = 
##  -0.0683 * fixed.acidity 
##  - 1.0921 * volatile.acidity 
##  - 1.3217 * citric.acid 
##  + 0.0297 * residual.sugar 
##  + 5.7935 * chlorides 
##  - 0.0088 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 31.5856 * density 
##  + 0.2583 * pH 
##  + 0.3758 * sulphates 
##  - 0.0373 * alcohol 
##  + 37.9875
## 
## LM num: 16
## quality = 
##  -0.4138 * fixed.acidity 
##  - 2.4188 * volatile.acidity 
##  - 0.1001 * citric.acid 
##  + 0.0519 * residual.sugar 
##  + 1.2445 * chlorides 
##  + 0.0002 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  + 146.7811 * density 
##  + 0.5635 * pH 
##  + 0.3884 * sulphates 
##  + 0.7403 * alcohol 
##  - 145.8266
## 
## LM num: 17
## quality = 
##  0.2744 * fixed.acidity 
##  - 3.6766 * volatile.acidity 
##  - 0.1001 * citric.acid 
##  + 0.0846 * residual.sugar 
##  + 0.5477 * chlorides 
##  + 0.0002 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 239.7241 * density 
##  + 1.5648 * pH 
##  + 0.8289 * sulphates 
##  - 0.0207 * alcohol 
##  + 237.4198
## 
## LM num: 18
## quality = 
##  0.0178 * fixed.acidity 
##  - 1.19 * volatile.acidity 
##  - 0.1001 * citric.acid 
##  + 0.041 * residual.sugar 
##  + 0.0973 * chlorides 
##  + 0.0002 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 46.7151 * density 
##  + 0.1443 * pH 
##  + 0.1669 * sulphates 
##  - 0.0207 * alcohol 
##  + 51.8086
## 
## LM num: 19
## quality = 
##  0.0178 * fixed.acidity 
##  - 11.6553 * volatile.acidity 
##  - 0.1001 * citric.acid 
##  + 0.0199 * residual.sugar 
##  + 0.0973 * chlorides 
##  + 0.0002 * free.sulfur.dioxide 
##  + 0.0044 * total.sulfur.dioxide 
##  - 46.7151 * density 
##  + 2.2855 * pH 
##  + 0.1669 * sulphates 
##  - 0.0207 * alcohol 
##  + 46.4726
## 
## LM num: 20
## quality = 
##  -0.0389 * fixed.acidity 
##  - 0.2704 * volatile.acidity 
##  + 0.6445 * citric.acid 
##  + 0.0043 * residual.sugar 
##  - 11.7525 * chlorides 
##  + 0.0148 * free.sulfur.dioxide 
##  + 13.1536 * density 
##  - 0.2235 * pH 
##  + 0.0154 * sulphates 
##  + 0.1335 * alcohol 
##  - 8.119
## 
## LM num: 21
## quality = 
##  -0.0139 * fixed.acidity 
##  - 0.2704 * volatile.acidity 
##  + 2.7911 * citric.acid 
##  + 0.0043 * residual.sugar 
##  - 16.394 * chlorides 
##  - 0.0093 * free.sulfur.dioxide 
##  - 0.0028 * total.sulfur.dioxide 
##  + 2.2255 * density 
##  - 0.088 * pH 
##  + 0.0154 * sulphates 
##  + 0.285 * alcohol 
##  + 1.9775
## 
## LM num: 22
## quality = 
##  0.0008 * fixed.acidity 
##  - 3.3571 * volatile.acidity 
##  - 0.0474 * citric.acid 
##  + 0.0167 * residual.sugar 
##  + 0.0002 * free.sulfur.dioxide 
##  - 0.0001 * total.sulfur.dioxide 
##  - 2.6496 * density 
##  + 0.0071 * pH 
##  + 0.0154 * sulphates 
##  + 0.0295 * alcohol 
##  + 8.7127
## 
## LM num: 23
## quality = 
##  0.0008 * fixed.acidity 
##  - 0.1682 * volatile.acidity 
##  - 0.0533 * citric.acid 
##  + 0.0034 * residual.sugar 
##  + 0.0112 * free.sulfur.dioxide 
##  - 0.004 * total.sulfur.dioxide 
##  - 2.4685 * density 
##  + 0.0071 * pH 
##  + 0.0154 * sulphates 
##  + 0.3099 * alcohol 
##  + 5.1272
## 
## LM num: 24
## quality = 
##  -0.1011 * fixed.acidity 
##  - 0.8767 * volatile.acidity 
##  + 0.0025 * citric.acid 
##  + 0.0183 * residual.sugar 
##  - 1.5815 * chlorides 
##  + 0 * free.sulfur.dioxide 
##  + 0.0015 * total.sulfur.dioxide 
##  - 4.1889 * density 
##  + 0.0195 * pH 
##  + 0.0154 * sulphates 
##  + 0.2656 * alcohol 
##  + 7.556
## 
## LM num: 25
## quality = 
##  0.1885 * fixed.acidity 
##  - 1.6681 * volatile.acidity 
##  + 0.0075 * citric.acid 
##  + 0.1434 * residual.sugar 
##  + 0.0181 * free.sulfur.dioxide 
##  - 438.9263 * density 
##  + 1.5263 * pH 
##  + 1.5041 * sulphates 
##  + 0.0067 * alcohol 
##  + 434.1083
## 
## LM num: 26
## quality = 
##  0.3156 * fixed.acidity 
##  - 0.3103 * volatile.acidity 
##  + 0.0075 * citric.acid 
##  + 0.0769 * residual.sugar 
##  + 0.0157 * free.sulfur.dioxide 
##  - 0.0006 * total.sulfur.dioxide 
##  - 224.3886 * density 
##  + 2.8971 * pH 
##  + 1.4123 * sulphates 
##  + 0.0067 * alcohol 
##  + 215.8849
## 
## LM num: 27
## quality = 
##  0.0704 * fixed.acidity 
##  - 1.6931 * volatile.acidity 
##  + 0.0075 * citric.acid 
##  + 0.0268 * residual.sugar 
##  + 0 * free.sulfur.dioxide 
##  - 0.0058 * total.sulfur.dioxide 
##  - 69.0546 * density 
##  + 0.5221 * pH 
##  + 0.3033 * sulphates 
##  + 0.0067 * alcohol 
##  + 73.2245
## 
## LM num: 28
## quality = 
##  -0.0359 * fixed.acidity 
##  - 2.1355 * volatile.acidity 
##  + 0.0312 * residual.sugar 
##  - 0.7007 * chlorides 
##  + 0.0139 * free.sulfur.dioxide 
##  - 3.9257 * density 
##  + 0.1002 * pH 
##  + 0.0883 * sulphates 
##  + 0.0057 * alcohol 
##  + 9.0802
## 
## LM num: 29
## quality = 
##  -0.1622 * fixed.acidity 
##  - 1.936 * volatile.acidity 
##  + 0.0312 * residual.sugar 
##  - 0.7007 * chlorides 
##  + 0.0139 * free.sulfur.dioxide 
##  - 8.2054 * density 
##  + 0.5998 * pH 
##  + 0.0883 * sulphates 
##  + 0.0057 * alcohol 
##  + 13.1705
## 
## LM num: 30
## quality = 
##  -0.1095 * fixed.acidity 
##  - 1.936 * volatile.acidity 
##  + 0.0312 * residual.sugar 
##  - 0.7007 * chlorides 
##  + 0.0139 * free.sulfur.dioxide 
##  - 8.2054 * density 
##  + 0.8708 * pH 
##  + 0.0883 * sulphates 
##  + 0.0057 * alcohol 
##  + 11.7475
## 
## LM num: 31
## quality = 
##  -0.2583 * fixed.acidity 
##  - 1.4215 * volatile.acidity 
##  - 1.371 * citric.acid 
##  + 0.0305 * residual.sugar 
##  - 3.2137 * chlorides 
##  + 0.0063 * free.sulfur.dioxide 
##  - 18.7292 * density 
##  + 0.1002 * pH 
##  + 0.0883 * sulphates 
##  + 0.1232 * alcohol 
##  + 25.7445
## 
## LM num: 32
## quality = 
##  -0.0968 * fixed.acidity 
##  - 0.9855 * volatile.acidity 
##  + 0.0245 * residual.sugar 
##  - 4.6936 * chlorides 
##  + 0.0063 * free.sulfur.dioxide 
##  - 18.7292 * density 
##  - 0.2017 * pH 
##  + 0.0883 * sulphates 
##  + 0.0612 * alcohol 
##  + 25.5306
## 
## LM num: 33
## quality = 
##  -0.0764 * fixed.acidity 
##  - 0.9855 * volatile.acidity 
##  + 0.0461 * residual.sugar 
##  - 3.7456 * chlorides 
##  + 0.0063 * free.sulfur.dioxide 
##  - 18.7292 * density 
##  - 0.0997 * pH 
##  + 0.0883 * sulphates 
##  + 0.4563 * alcohol 
##  + 20.1476
## 
## LM num: 34
## quality = 
##  0.0026 * fixed.acidity 
##  - 1.5467 * volatile.acidity 
##  + 0.5902 * citric.acid 
##  + 0.0796 * residual.sugar 
##  - 7.6293 * chlorides 
##  + 0.0004 * free.sulfur.dioxide 
##  - 0.002 * total.sulfur.dioxide 
##  - 105.9188 * density 
##  + 0.9409 * pH 
##  + 1.1632 * sulphates 
##  + 0.0057 * alcohol 
##  + 108.0478
## 
## LM num: 35
## quality = 
##  0.1974 * fixed.acidity 
##  - 1.5244 * volatile.acidity 
##  - 1.1342 * citric.acid 
##  + 0.1108 * residual.sugar 
##  - 0.5309 * chlorides 
##  + 0.0345 * free.sulfur.dioxide 
##  + 0.0002 * total.sulfur.dioxide 
##  - 306.9205 * density 
##  + 1.162 * pH 
##  + 0.0755 * sulphates 
##  - 0.0054 * alcohol 
##  + 305.176
## 
## LM num: 36
## quality = 
##  0.2738 * fixed.acidity 
##  - 0.0442 * volatile.acidity 
##  + 0.1664 * residual.sugar 
##  - 7.6486 * chlorides 
##  + 0.0005 * free.sulfur.dioxide 
##  + 0.0001 * total.sulfur.dioxide 
##  - 350.199 * density 
##  + 1.7781 * pH 
##  + 1.0583 * sulphates 
##  - 0.1722 * alcohol 
##  + 347.3058
## 
## Number of Rules : 36
# get a summary of the model's performance
summary(m.m5p)
## 
## === Summary ===
## 
## Correlation coefficient                  0.6666
## Mean absolute error                      0.5151
## Root mean squared error                  0.6614
## Relative absolute error                 76.4921 %
## Root relative squared error             74.6259 %
## Total Number of Instances             3750
# generate predictions for the model
p.m5p <- predict(m.m5p, wine_test)

# summary statistics about the predictions
summary(p.m5p)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.389   5.430   5.863   5.874   6.305   7.437
# correlation between the predicted and true values
cor(p.m5p, wine_test$quality)
## [1] 0.6272973
# mean absolute error of predicted and true values
# (uses a custom function defined above)
MAE(wine_test$quality, p.m5p)
## [1] 0.5463023

The correlation also seems to be substantially higher and the model slightly improved the mean absolute error.