Chapter 6: Regression Methods

##Chapter 6: Regression Methods #Part 1: Linear Regression #Understanding Regression

getwd()

[1] "/cloud/project"

launch <- read.csv("challenger.csv")

# Estimate beta manually
b <- cov(launch$temperature, launch$distress_ct) / var(launch$temperature)
b

[1] -0.04753968

# Estimate alpha manually
a <- mean(launch$distress_ct) - b*mean(launch$temperature)
a

[1] 3.698413

#Calculate the correlation of Launch Data
r <- cov(launch$temperature, launch$distress_ct) / (sd(launch$temperature)*sd(launch$distress_ct))
r

[1] -0.5111264

cor(launch$temperature, launch$distress_ct)

[1] -0.5111264

# Computing the slope using correlation
r * (sd(launch$distress_ct) / sd(launch$temperature))

[1] -0.04753968

#Confirming the regression line using the lm function (not in text)
model <- lm(distress_ct ~ temperature, data = launch)
model


Call:
lm(formula = distress_ct ~ temperature, data = launch)

Coefficients:
(Intercept)  temperature  
    3.69841     -0.04754

summary(model)


Call:
lm(formula = distress_ct ~ temperature, data = launch)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.5608 -0.3944 -0.0854  0.1056  1.8671 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)  3.69841    1.21951   3.033  0.00633 **
temperature -0.04754    0.01744  -2.725  0.01268 * 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.5774 on 21 degrees of freedom
Multiple R-squared:  0.2613,    Adjusted R-squared:  0.2261 
F-statistic: 7.426 on 1 and 21 DF,  p-value: 0.01268

#Creating a simple multiple regression function
reg <- function(y,x) {
  x <- as.matrix(x)
  x <- cbind(Interpret = 1,x)
  b <- solve(t(x) %*% x) %*% t(x) %*% y
  colnames(b) <- "estimate"
  print(b)
}

#Examine the launch Data
str(launch)

'data.frame':   23 obs. of  4 variables:
 $ distress_ct         : int  0 1 0 0 0 0 0 0 1 1 ...
 $ temperature         : int  66 70 69 68 67 72 73 70 57 63 ...
 $ field_check_pressure: int  50 50 50 50 50 50 100 100 200 200 ...
 $ flight_num          : int  1 2 3 4 5 6 7 8 9 10 ...

#Test regression model with Simple Linear Regression
reg(y = launch$distress_ct, x = launch[2])

               estimate
Interpret    3.69841270
temperature -0.04753968

# Use Regression Model with Multiple Regression
reg(y = launch$distress_ct, x = launch[2:4])

                         estimate
Interpret             3.527093383
temperature          -0.051385940
field_check_pressure  0.001757009
flight_num            0.014292843

#Confirming the multiple Regression result using the lm function (no t in text)
model <- lm(distress_ct ~ temperature + field_check_pressure + flight_num, data = launch)
model


Call:
lm(formula = distress_ct ~ temperature + field_check_pressure + 
    flight_num, data = launch)

Coefficients:
         (Intercept)           temperature  field_check_pressure  
            3.527093             -0.051386              0.001757  
          flight_num  
            0.014293

summary(model)


Call:
lm(formula = distress_ct ~ temperature + field_check_pressure + 
    flight_num, data = launch)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.65003 -0.24414 -0.11219  0.01279  1.67530 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)  
(Intercept)           3.527093   1.307024   2.699   0.0142 *
temperature          -0.051386   0.018341  -2.802   0.0114 *
field_check_pressure  0.001757   0.003402   0.517   0.6115  
flight_num            0.014293   0.035138   0.407   0.6887  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.565 on 19 degrees of freedom
Multiple R-squared:   0.36, Adjusted R-squared:  0.259 
F-statistic: 3.563 on 3 and 19 DF,  p-value: 0.03371

##Predicting Medical Expenses

## Step 2: Exploring and preparing the data
insurance <- read.csv("insurance.csv", stringsAsFactors = TRUE)
str(insurance)

'data.frame':   1338 obs. of  7 variables:
 $ age     : int  19 18 28 33 32 31 46 37 37 60 ...
 $ sex     : Factor w/ 2 levels "female","male": 1 2 2 2 2 1 1 1 2 1 ...
 $ bmi     : num  27.9 33.8 33 22.7 28.9 25.7 33.4 27.7 29.8 25.8 ...
 $ children: int  0 1 3 0 0 0 1 3 2 0 ...
 $ smoker  : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
 $ region  : Factor w/ 4 levels "northeast","northwest",..: 4 3 3 2 2 3 3 2 1 2 ...
 $ expenses: num  16885 1726 4449 21984 3867 ...

#Summarize the charges variable
summary(insurance$expenses)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1122    4740    9382   13270   16640   63770

# Histogram of insurance charges
hist(insurance$expenses)

table(insurance$region)


northeast northwest southeast southwest 
      324       325       364       325

# Exploring relationships among features: correlation matrix
cor(insurance[c("age","bmi","children","expenses")])

               age        bmi   children   expenses
age      1.0000000 0.10934101 0.04246900 0.29900819
bmi      0.1093410 1.00000000 0.01264471 0.19857626
children 0.0424690 0.01264471 1.00000000 0.06799823
expenses 0.2990082 0.19857626 0.06799823 1.00000000

#Visualizing relationships among features: Scatterplot Matrix
pairs(insurance[c("age","bmi","children","expenses")])

#Step 3: Training a model on the data ---
ins_model <- lm(expenses ~ age + children + bmi + sex + smoker + region,
                data = insurance)
ins_model <- lm(expenses ~ ., data = insurance) # this is equivalent to above

# see the estimated beta coefficients
ins_model


Call:
lm(formula = expenses ~ ., data = insurance)

Coefficients:
    (Intercept)              age          sexmale  
       -11941.6            256.8           -131.4  
            bmi         children        smokeryes  
          339.3            475.7          23847.5  
regionnorthwest  regionsoutheast  regionsouthwest  
         -352.8          -1035.6           -959.3

##Step 4: Evaluating Model Performance

# see more detail about the estimated beta coefficients
summary(ins_model)


Call:
lm(formula = expenses ~ ., data = insurance)

Residuals:
     Min       1Q   Median       3Q      Max 
-11302.7  -2850.9   -979.6   1383.9  29981.7 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     -11941.6      987.8 -12.089  < 2e-16 ***
age                256.8       11.9  21.586  < 2e-16 ***
sexmale           -131.3      332.9  -0.395 0.693255    
bmi                339.3       28.6  11.864  < 2e-16 ***
children           475.7      137.8   3.452 0.000574 ***
smokeryes        23847.5      413.1  57.723  < 2e-16 ***
regionnorthwest   -352.8      476.3  -0.741 0.458976    
regionsoutheast  -1035.6      478.7  -2.163 0.030685 *  
regionsouthwest   -959.3      477.9  -2.007 0.044921 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6062 on 1329 degrees of freedom
Multiple R-squared:  0.7509,    Adjusted R-squared:  0.7494 
F-statistic: 500.9 on 8 and 1329 DF,  p-value: < 2.2e-16

##Step 5: Improving Model Performance

# add a higher-order "age" term
insurance$age2 <- insurance$age^2

# add an indicator for BMI >= 30
insurance$bmi30 <- ifelse(insurance$bmi >= 30,1,0)

# Create Final model
ins_model2 <- lm(expenses ~ age + age2 + children + bmi + sex + bmi30*smoker + region, data = insurance)

summary(ins_model2)


Call:
lm(formula = expenses ~ age + age2 + children + bmi + sex + bmi30 * 
    smoker + region, data = insurance)

Residuals:
     Min       1Q   Median       3Q      Max 
-17297.1  -1656.0  -1262.7   -727.8  24161.6 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)       139.0053  1363.1359   0.102 0.918792    
age               -32.6181    59.8250  -0.545 0.585690    
age2                3.7307     0.7463   4.999 6.54e-07 ***
children          678.6017   105.8855   6.409 2.03e-10 ***
bmi               119.7715    34.2796   3.494 0.000492 ***
sexmale          -496.7690   244.3713  -2.033 0.042267 *  
bmi30            -997.9355   422.9607  -2.359 0.018449 *  
smokeryes       13404.5952   439.9591  30.468  < 2e-16 ***
regionnorthwest  -279.1661   349.2826  -0.799 0.424285    
regionsoutheast  -828.0345   351.6484  -2.355 0.018682 *  
regionsouthwest -1222.1619   350.5314  -3.487 0.000505 ***
bmi30:smokeryes 19810.1534   604.6769  32.762  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4445 on 1326 degrees of freedom
Multiple R-squared:  0.8664,    Adjusted R-squared:  0.8653 
F-statistic: 781.7 on 11 and 1326 DF,  p-value: < 2.2e-16

We have a lot of variable for this model. The R-squared is 0.86 means 86% so all of those variables are significantly important to predict the model.

# Making predictions with the Regression Model
insurance$pred <- predict(ins_model2, insurance)
cor(insurance$pred, insurance$expenses)

[1] 0.9307999

plot(insurance$pred, insurance$expenses)
abline(a = 0, b = 1, col = "red", lwd = 3, lty = 2)

predict(ins_model2,
        data.frame(age=21, age2=21^2,children=3,
                   bmi=24,sex="female",bmi30=0,
                   smoker="no",region="northwest"))

       1 
5730.437

predict(ins_model2,
        data.frame(age=22,age2=22^2,children=1,
                   bmi=27,sex="male",bmi30=0,
                   smoker="yes",region="southeast"))

       1 
17219.31

Part 2: Regression Trees and Model Trees

##Understanding Regression Trees and Model Trees

##Example: Calculating SDR

# Set up the data
tee <- c(1,1,1,2,2,3,4,5,5,6,6,7,7,7,7)
at1 <- c(1,1,1,2,2,3,4,5,5)
at2 <- c(6,6,7,7,7,7)
bt1 <- c(1,1,1,2,2,3,4)
bt2 <- c(5,5,6,6,7,7,7,7)

#Compute the SDR
sdr_a <- sd(tee) - (length(at1) / length(tee) * sd(at1) + length(at2) / length(tee) * sd(at2))
sdr_b <- sd(tee) - (length(bt1) / length(tee) * sd(bt1) + length(bt2) / length(tee) * sd(bt2))

#Compare the SDR for each split
sdr_a

[1] 1.202815

sdr_b

[1] 1.392751

##Exercise No 3: Estimating Wine Quality

##Step 2: Exploring and Preparing the Data

wine <- read.csv("winequality-red.csv")

# Examine the wine Data
str(wine)

'data.frame':   1599 obs. of  12 variables:
 $ fixed.acidity       : num  7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
 $ volatile.acidity    : num  0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
 $ citric.acid         : num  0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
 $ residual.sugar      : num  1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
 $ chlorides           : num  0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
 $ free.sulfur.dioxide : num  11 25 15 17 11 13 15 15 9 17 ...
 $ total.sulfur.dioxide: num  34 67 54 60 34 40 59 21 18 102 ...
 $ density             : num  0.998 0.997 0.997 0.998 0.998 ...
 $ pH                  : num  3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
 $ sulphates           : num  0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
 $ alcohol             : num  9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
 $ quality             : int  5 5 5 6 5 5 5 7 7 5 ...

# The Distribution of Quality Ratings
hist(wine$quality)

summary(wine)

 fixed.acidity   volatile.acidity  citric.acid   
 Min.   : 4.60   Min.   :0.1200   Min.   :0.000  
 1st Qu.: 7.10   1st Qu.:0.3900   1st Qu.:0.090  
 Median : 7.90   Median :0.5200   Median :0.260  
 Mean   : 8.32   Mean   :0.5278   Mean   :0.271  
 3rd Qu.: 9.20   3rd Qu.:0.6400   3rd Qu.:0.420  
 Max.   :15.90   Max.   :1.5800   Max.   :1.000  
 residual.sugar     chlorides       free.sulfur.dioxide
 Min.   : 0.900   Min.   :0.01200   Min.   : 1.00      
 1st Qu.: 1.900   1st Qu.:0.07000   1st Qu.: 7.00      
 Median : 2.200   Median :0.07900   Median :14.00      
 Mean   : 2.539   Mean   :0.08747   Mean   :15.87      
 3rd Qu.: 2.600   3rd Qu.:0.09000   3rd Qu.:21.00      
 Max.   :15.500   Max.   :0.61100   Max.   :72.00      
 total.sulfur.dioxide    density             pH       
 Min.   :  6.00       Min.   :0.9901   Min.   :2.740  
 1st Qu.: 22.00       1st Qu.:0.9956   1st Qu.:3.210  
 Median : 38.00       Median :0.9968   Median :3.310  
 Mean   : 46.47       Mean   :0.9967   Mean   :3.311  
 3rd Qu.: 62.00       3rd Qu.:0.9978   3rd Qu.:3.400  
 Max.   :289.00       Max.   :1.0037   Max.   :4.010  
   sulphates         alcohol         quality     
 Min.   :0.3300   Min.   : 8.40   Min.   :3.000  
 1st Qu.:0.5500   1st Qu.: 9.50   1st Qu.:5.000  
 Median :0.6200   Median :10.20   Median :6.000  
 Mean   :0.6581   Mean   :10.42   Mean   :5.636  
 3rd Qu.:0.7300   3rd Qu.:11.10   3rd Qu.:6.000  
 Max.   :2.0000   Max.   :14.90   Max.   :8.000

wine_train <- wine[1:3750, ]
wine_test <- wine[3751:4898, ]

##Step 3: Training a model on the data

#Regression Tree using rpart
library(rpart)
m.rpart <- rpart(quality ~ ., data = wine_train)

# get basic information about the tree
m.rpart

n=1599 (2151 observations deleted due to missingness)

node), split, n, deviance, yval
      * denotes terminal node

 1) root 1599 1042.16500 5.636023  
   2) alcohol< 10.525 983  424.15870 5.366226  
     4) sulphates< 0.575 391  128.09720 5.150895 *
     5) sulphates>=0.575 592  265.95780 5.508446  
      10) volatile.acidity>=0.405 448  175.87280 5.404018 *
      11) volatile.acidity< 0.405 144   70.00000 5.833333 *
   3) alcohol>=10.525 616  432.27110 6.066558  
     6) sulphates< 0.645 272  191.86760 5.727941  
      12) volatile.acidity>=1.015 10    6.00000 4.000000 *
      13) volatile.acidity< 1.015 262  154.87020 5.793893  
        26) volatile.acidity>=0.495 146   73.67123 5.575342 *
        27) volatile.acidity< 0.495 116   65.44828 6.068966 *
     7) sulphates>=0.645 344  184.55520 6.334302  
      14) alcohol< 11.55 206  101.96600 6.121359  
        28) volatile.acidity>=0.395 111   37.42342 5.927928 *
        29) volatile.acidity< 0.395 95   55.53684 6.347368  
          58) pH>=3.255 59   29.72881 6.067797 *
          59) pH< 3.255 36   13.63889 6.805556 *
      15) alcohol>=11.55 138   59.30435 6.652174 *

# get more detailed information about the tree
summary(m.rpart)

Call:
rpart(formula = quality ~ ., data = wine_train)
  n=1599 (2151 observations deleted due to missingness)

          CP nsplit rel error    xerror       xstd
1 0.17822061      0 1.0000000 1.0008755 0.03787612
2 0.05358865      1 0.8217794 0.8279516 0.03589512
3 0.02974329      2 0.7681907 0.7896176 0.03310122
4 0.02888577      3 0.7384474 0.7905807 0.03318766
5 0.02234278      4 0.7095617 0.7523823 0.03103167
6 0.01927238      5 0.6872189 0.7375748 0.03060662
7 0.01511346      6 0.6679465 0.7282584 0.02982301
8 0.01015909      7 0.6528331 0.7123437 0.02865414
9 0.01000000      9 0.6325149 0.7086338 0.02828159

Variable importance
             alcohol     volatile.acidity              density 
                  31                   17                   13 
           sulphates        fixed.acidity            chlorides 
                  13                    7                    6 
         citric.acid                   pH total.sulfur.dioxide 
                   5                    4                    3 
 free.sulfur.dioxide 
                   1 

Node number 1: 1599 observations,    complexity param=0.1782206
  mean=5.636023, MSE=0.6517605 
  left son=2 (983 obs) right son=3 (616 obs)
  Primary splits:
      alcohol          < 10.525   to the left,  improve=0.17822060, (0 missing)
      sulphates        < 0.645    to the left,  improve=0.12565160, (0 missing)
      volatile.acidity < 0.425    to the right, improve=0.11400620, (0 missing)
      citric.acid      < 0.295    to the left,  improve=0.07225368, (0 missing)
      density          < 0.99539  to the right, improve=0.06402980, (0 missing)
  Surrogate splits:
      density              < 0.995575 to the right, agree=0.762, adj=0.383, (0 split)
      chlorides            < 0.0685   to the right, agree=0.690, adj=0.195, (0 split)
      volatile.acidity     < 0.3675   to the right, agree=0.662, adj=0.123, (0 split)
      fixed.acidity        < 6.75     to the right, agree=0.654, adj=0.101, (0 split)
      total.sulfur.dioxide < 17.5     to the right, agree=0.641, adj=0.068, (0 split)

Node number 2: 983 observations,    complexity param=0.02888577
  mean=5.366226, MSE=0.4314941 
  left son=4 (391 obs) right son=5 (592 obs)
  Primary splits:
      sulphates            < 0.575    to the left,  improve=0.07097282, (0 missing)
      volatile.acidity     < 0.335    to the right, improve=0.06388554, (0 missing)
      alcohol              < 9.85     to the left,  improve=0.05212216, (0 missing)
      fixed.acidity        < 10.85    to the left,  improve=0.03084011, (0 missing)
      total.sulfur.dioxide < 83.5     to the right, improve=0.02749674, (0 missing)
  Surrogate splits:
      density              < 0.996225 to the left,  agree=0.662, adj=0.151, (0 split)
      volatile.acidity     < 0.6525   to the right, agree=0.636, adj=0.084, (0 split)
      fixed.acidity        < 6.05     to the left,  agree=0.609, adj=0.018, (0 split)
      citric.acid          < 0.115    to the left,  agree=0.609, adj=0.018, (0 split)
      total.sulfur.dioxide < 9.5      to the left,  agree=0.608, adj=0.015, (0 split)

Node number 3: 616 observations,    complexity param=0.05358865
  mean=6.066558, MSE=0.7017388 
  left son=6 (272 obs) right son=7 (344 obs)
  Primary splits:
      sulphates        < 0.645    to the left,  improve=0.12919720, (0 missing)
      volatile.acidity < 0.87     to the right, improve=0.11482610, (0 missing)
      citric.acid      < 0.295    to the left,  improve=0.10819510, (0 missing)
      alcohol          < 11.55    to the left,  improve=0.10309310, (0 missing)
      pH               < 3.355    to the right, improve=0.07557599, (0 missing)
  Surrogate splits:
      citric.acid      < 0.245    to the left,  agree=0.683, adj=0.283, (0 split)
      fixed.acidity    < 7.85     to the left,  agree=0.666, adj=0.243, (0 split)
      volatile.acidity < 0.5875   to the right, agree=0.653, adj=0.213, (0 split)
      density          < 0.994915 to the left,  agree=0.635, adj=0.173, (0 split)
      pH               < 3.405    to the right, agree=0.630, adj=0.162, (0 split)

Node number 4: 391 observations
  mean=5.150895, MSE=0.3276143 

Node number 5: 592 observations,    complexity param=0.01927238
  mean=5.508446, MSE=0.449253 
  left son=10 (448 obs) right son=11 (144 obs)
  Primary splits:
      volatile.acidity     < 0.405    to the right, improve=0.07551952, (0 missing)
      total.sulfur.dioxide < 81.5     to the right, improve=0.05845854, (0 missing)
      alcohol              < 9.85     to the left,  improve=0.05386312, (0 missing)
      fixed.acidity        < 10.95    to the left,  improve=0.05335172, (0 missing)
      chlorides            < 0.0975   to the right, improve=0.03262428, (0 missing)
  Surrogate splits:
      fixed.acidity       < 10.45    to the left,  agree=0.787, adj=0.125, (0 split)
      chlorides           < 0.0565   to the right, agree=0.765, adj=0.035, (0 split)
      citric.acid         < 0.365    to the left,  agree=0.764, adj=0.028, (0 split)
      free.sulfur.dioxide < 2.5      to the right, agree=0.764, adj=0.028, (0 split)
      alcohol             < 8.6      to the right, agree=0.758, adj=0.007, (0 split)

Node number 6: 272 observations,    complexity param=0.02974329
  mean=5.727941, MSE=0.7053958 
  left son=12 (10 obs) right son=13 (262 obs)
  Primary splits:
      volatile.acidity < 1.015    to the right, improve=0.16155630, (0 missing)
      alcohol          < 11.45    to the left,  improve=0.11901850, (0 missing)
      citric.acid      < 0.255    to the left,  improve=0.11313180, (0 missing)
      pH               < 3.365    to the right, improve=0.09055459, (0 missing)
      sulphates        < 0.585    to the left,  improve=0.04970438, (0 missing)

Node number 7: 344 observations,    complexity param=0.02234278
  mean=6.334302, MSE=0.5364978 
  left son=14 (206 obs) right son=15 (138 obs)
  Primary splits:
      alcohol              < 11.55    to the left,  improve=0.12616750, (0 missing)
      chlorides            < 0.0785   to the right, improve=0.05765389, (0 missing)
      total.sulfur.dioxide < 101.5    to the right, improve=0.05496021, (0 missing)
      density              < 0.99537  to the right, improve=0.04412990, (0 missing)
      volatile.acidity     < 0.425    to the right, improve=0.04136603, (0 missing)
  Surrogate splits:
      density        < 0.994875 to the right, agree=0.701, adj=0.254, (0 split)
      chlorides      < 0.053    to the right, agree=0.651, adj=0.130, (0 split)
      fixed.acidity  < 5.55     to the right, agree=0.640, adj=0.101, (0 split)
      residual.sugar < 4.25     to the left,  agree=0.628, adj=0.072, (0 split)
      citric.acid    < 0.635    to the left,  agree=0.622, adj=0.058, (0 split)

Node number 10: 448 observations
  mean=5.404018, MSE=0.3925731 

Node number 11: 144 observations
  mean=5.833333, MSE=0.4861111 

Node number 12: 10 observations
  mean=4, MSE=0.6 

Node number 13: 262 observations,    complexity param=0.01511346
  mean=5.793893, MSE=0.5911077 
  left son=26 (146 obs) right son=27 (116 obs)
  Primary splits:
      volatile.acidity < 0.495    to the right, improve=0.10170270, (0 missing)
      alcohol          < 11.45    to the left,  improve=0.09838534, (0 missing)
      citric.acid      < 0.255    to the left,  improve=0.09415346, (0 missing)
      pH               < 3.295    to the right, improve=0.07618253, (0 missing)
      density          < 0.995155 to the right, improve=0.05214905, (0 missing)
  Surrogate splits:
      citric.acid          < 0.235    to the left,  agree=0.866, adj=0.698, (0 split)
      pH                   < 3.305    to the right, agree=0.733, adj=0.397, (0 split)
      fixed.acidity        < 7.85     to the left,  agree=0.691, adj=0.302, (0 split)
      alcohol              < 11.85    to the left,  agree=0.641, adj=0.190, (0 split)
      total.sulfur.dioxide < 12.5     to the right, agree=0.637, adj=0.181, (0 split)

Node number 14: 206 observations,    complexity param=0.01015909
  mean=6.121359, MSE=0.4949807 
  left son=28 (111 obs) right son=29 (95 obs)
  Primary splits:
      volatile.acidity     < 0.395    to the right, improve=0.08832113, (0 missing)
      total.sulfur.dioxide < 49.5     to the right, improve=0.06808035, (0 missing)
      chlorides            < 0.0945   to the right, improve=0.05079896, (0 missing)
      citric.acid          < 0.295    to the left,  improve=0.05051307, (0 missing)
      free.sulfur.dioxide  < 25.5     to the right, improve=0.03611908, (0 missing)
  Surrogate splits:
      citric.acid    < 0.285    to the left,  agree=0.733, adj=0.421, (0 split)
      sulphates      < 0.765    to the left,  agree=0.655, adj=0.253, (0 split)
      chlorides      < 0.0675   to the right, agree=0.617, adj=0.168, (0 split)
      residual.sugar < 1.85     to the right, agree=0.612, adj=0.158, (0 split)
      fixed.acidity  < 7.05     to the left,  agree=0.597, adj=0.126, (0 split)

Node number 15: 138 observations
  mean=6.652174, MSE=0.4297417 

Node number 26: 146 observations
  mean=5.575342, MSE=0.5045975 

Node number 27: 116 observations
  mean=6.068966, MSE=0.5642093 

Node number 28: 111 observations
  mean=5.927928, MSE=0.337148 

Node number 29: 95 observations,    complexity param=0.01015909
  mean=6.347368, MSE=0.5845983 
  left son=58 (59 obs) right son=59 (36 obs)
  Primary splits:
      pH                   < 3.255    to the right, improve=0.21911830, (0 missing)
      total.sulfur.dioxide < 56.5     to the right, improve=0.18528400, (0 missing)
      fixed.acidity        < 10       to the left,  improve=0.12899290, (0 missing)
      free.sulfur.dioxide  < 24.5     to the right, improve=0.11666000, (0 missing)
      alcohol              < 10.75    to the left,  improve=0.05498168, (0 missing)
  Surrogate splits:
      fixed.acidity        < 9.7      to the left,  agree=0.737, adj=0.306, (0 split)
      total.sulfur.dioxide < 28.5     to the right, agree=0.737, adj=0.306, (0 split)
      free.sulfur.dioxide  < 9.5      to the right, agree=0.716, adj=0.250, (0 split)
      chlorides            < 0.0635   to the right, agree=0.663, adj=0.111, (0 split)
      sulphates            < 0.935    to the left,  agree=0.663, adj=0.111, (0 split)

Node number 58: 59 observations
  mean=6.067797, MSE=0.5038782 

Node number 59: 36 observations
  mean=6.805556, MSE=0.378858

install.packages("rpart.plot")

trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/rpart.plot_3.1.3.tar.gz'
Content type 'application/x-gzip' length 1014419 bytes (990 KB)
==================================================
downloaded 990 KB


The downloaded source packages are in
    ‘/tmp/Rtmp8iYKHc/downloaded_packages’

#Use the rpart.plot package to create a visualization
library(rpart.plot)

# a basic decision tree diagram
rpart.plot(m.rpart, digits = 3)

# a few adjustments to the diagram
rpart.plot(m.rpart, digits=4, fallen.leaves = TRUE, type=3, extra=101)

##Step 4: Evaluate Model Performance

# Generate Predictions for the testing dataset
p.rpart <- predict(m.rpart, wine_test)

#Compare the distribution of predicted values vs. actual values
summary(p.rpart)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  5.404   5.404   5.404   5.404   5.404   5.404

summary(wine_test$quality)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
     NA      NA      NA     NaN      NA      NA    1148

#Compare the correlation
cor(p.rpart, wine_test$quality)

[1] NA

#Function to calculate the mean absolute error
MAE <- function(actual,predicted) {
  mean(abs(actual-predicted))
}

# Mean Absolute Error between predicted and actual values
MAE(p.rpart, wine_test$quality)

[1] NA

# Mean Absolute Error between actual values and mean value
mean(wine_train$quality) # result = 5.87

[1] NA

MAE(5.87, wine_test$quality)

[1] NA

##Step 5: Improving Model Performance

install.packages("plyr")

install.packages("Cubist")

trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/plyr_1.8.9.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/reshape2_1.4.5.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/Cubist_0.5.1.tar.gz'

The downloaded source packages are in
    ‘/tmp/Rtmp8iYKHc/downloaded_packages’

#train a Cubist Model Tree
library(Cubist)

m.cubist <- cubist(x = wine_train[-12], y=wine_train$quality)

#Display Basic information about the model tree
m.cubist


Call:
cubist.default(x = wine_train[-12], y = wine_train$quality)

Number of samples: 3750 
Number of predictors: 11 

Number of committees: 1 
Number of rules: 7

# Display the tree itself
summary(m.cubist)


Call:
cubist.default(x = wine_train[-12], y = wine_train$quality)


Cubist [Release 2.07 GPL Edition]  Tue Feb  3 01:25:38 2026
---------------------------------

    Target attribute `outcome'

*** Ignoring cases with unknown or N/A target value

Read 1599 cases (12 attributes) from undefined.data

Model:

  Rule 1: [630 cases, mean 5.3, range 3 to 8, est err 0.4]

    if
    alcohol <= 9.8
    then
    outcome = 5 - 0.79 volatile.acidity - 0.099 alcohol
              + 0.052 fixed.acidity - 0.31 citric.acid + 0.33 sulphates
              + 0.29 pH - 0.0031 free.sulfur.dioxide
              - 0.0007 total.sulfur.dioxide - 0.4 chlorides

  Rule 2: [589 cases, mean 5.3, range 3 to 8, est err 0.4]

    if
    sulphates <= 0.92
    alcohol <= 9.8
    then
    outcome = 5.5 + 1.28 sulphates - 0.9 volatile.acidity - 0.33 citric.acid
              + 0.029 fixed.acidity - 0.033 alcohol
              - 0.0008 total.sulfur.dioxide - 0.0023 free.sulfur.dioxide
              - 0.4 chlorides - 0.1 pH

  Rule 3: [80 cases, mean 5.3, range 3 to 7, est err 0.7]

    if
    volatile.acidity > 0.31
    total.sulfur.dioxide <= 14
    sulphates <= 0.63
    alcohol > 9.8
    then
    outcome = 0.5 + 0.549 alcohol - 1.61 volatile.acidity + 0.36 sulphates
              - 0.18 pH - 0.0005 total.sulfur.dioxide - 0.07 citric.acid
              + 0.001 free.sulfur.dioxide

  Rule 4: [340 cases, mean 5.6, range 4 to 7, est err 0.5]

    if
    volatile.acidity > 0.31
    total.sulfur.dioxide > 14
    sulphates <= 0.63
    alcohol > 9.8
    then
    outcome = 5.1 + 2.85 sulphates + 0.19 alcohol - 0.74 citric.acid
              - 0.69 volatile.acidity - 0.74 pH
              - 0.0027 total.sulfur.dioxide + 0.0013 free.sulfur.dioxide

  Rule 5: [407 cases, mean 6.1, range 3 to 8, est err 0.6]

    if
    volatile.acidity > 0.31
    sulphates > 0.63
    alcohol > 9.8
    then
    outcome = 7.6 + 0.309 alcohol - 0.0073 total.sulfur.dioxide - 1.12 pH
              - 0.81 volatile.acidity - 0.079 fixed.acidity + 0.22 sulphates
              + 0.002 free.sulfur.dioxide

  Rule 6: [71 cases, mean 6.2, range 5 to 8, est err 0.5]

    if
    volatile.acidity <= 0.31
    sulphates <= 0.73
    alcohol > 9.8
    then
    outcome = 131.4 + 4.85 volatile.acidity - 124 density - 1.35 pH
              + 0.056 fixed.acidity + 0.54 sulphates + 0.036 alcohol
              + 0.021 residual.sugar

  Rule 7: [85 cases, mean 6.5, range 5 to 8, est err 0.4]

    if
    volatile.acidity <= 0.31
    sulphates > 0.73
    then
    outcome = 17 + 0.39 alcohol + 0.113 fixed.acidity
              + 0.25 volatile.acidity - 16 density + 0.14 sulphates


Evaluation on training data (1599 cases):

    Average  |error|                0.4
    Relative |error|               0.62
    Correlation coefficient        0.62


    Attribute usage:
      Conds  Model

       96%   100%    alcohol
       71%   100%    sulphates
       45%   100%    volatile.acidity
       19%    93%    total.sulfur.dioxide
              96%    pH
              93%    free.sulfur.dioxide
              81%    fixed.acidity
              74%    citric.acid
              55%    chlorides
               7%    density
               3%    residual.sugar


Time: 0.0 secs

# Generate predictions for the model
p.cubist <- predict(m.cubist, wine_test)

# sumarry statistic about the predictions
summary(p.cubist)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  5.865   5.865   5.865   5.865   5.865   5.865

# correlation between the predicted and true values
cor(p.cubist, wine_test$quality)

[1] NA

# mean absolute error of predicted and true values
# (uses a custom function defined above)
MAE(wine_test$quality, p.cubist)

[1] NA

LS0tCnRpdGxlOiAiQ2hhcHRlciA2OiBSZWdyZXNzaW9uIE1ldGhvZHMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMjQ2hhcHRlciA2OiBSZWdyZXNzaW9uIE1ldGhvZHMKI1BhcnQgMTogTGluZWFyIFJlZ3Jlc3Npb24KI1VuZGVyc3RhbmRpbmcgUmVncmVzc2lvbgoKYGBge3J9CmdldHdkKCkKYGBgCgpgYGB7cn0KIyBFeGFtcGxlOiBTcGFjZSBTaHV0dGxlIExhdW5jaCBEYXRhCmxhdW5jaCA8LSByZWFkLmNzdigiY2hhbGxlbmdlci5jc3YiKQpgYGAKCmBgYHtyfQojIEVzdGltYXRlIGJldGEgbWFudWFsbHkKYiA8LSBjb3YobGF1bmNoJHRlbXBlcmF0dXJlLCBsYXVuY2gkZGlzdHJlc3NfY3QpIC8gdmFyKGxhdW5jaCR0ZW1wZXJhdHVyZSkKYgpgYGAKCmBgYHtyfQojIEVzdGltYXRlIGFscGhhIG1hbnVhbGx5CmEgPC0gbWVhbihsYXVuY2gkZGlzdHJlc3NfY3QpIC0gYiptZWFuKGxhdW5jaCR0ZW1wZXJhdHVyZSkKYQpgYGAKCmBgYHtyfQojQ2FsY3VsYXRlIHRoZSBjb3JyZWxhdGlvbiBvZiBMYXVuY2ggRGF0YQpyIDwtIGNvdihsYXVuY2gkdGVtcGVyYXR1cmUsIGxhdW5jaCRkaXN0cmVzc19jdCkgLyAoc2QobGF1bmNoJHRlbXBlcmF0dXJlKSpzZChsYXVuY2gkZGlzdHJlc3NfY3QpKQpyCmBgYAoKYGBge3J9CmNvcihsYXVuY2gkdGVtcGVyYXR1cmUsIGxhdW5jaCRkaXN0cmVzc19jdCkKYGBgCgpgYGB7cn0KIyBDb21wdXRpbmcgdGhlIHNsb3BlIHVzaW5nIGNvcnJlbGF0aW9uCnIgKiAoc2QobGF1bmNoJGRpc3RyZXNzX2N0KSAvIHNkKGxhdW5jaCR0ZW1wZXJhdHVyZSkpCmBgYAoKYGBge3J9CiNDb25maXJtaW5nIHRoZSByZWdyZXNzaW9uIGxpbmUgdXNpbmcgdGhlIGxtIGZ1bmN0aW9uIChub3QgaW4gdGV4dCkKbW9kZWwgPC0gbG0oZGlzdHJlc3NfY3QgfiB0ZW1wZXJhdHVyZSwgZGF0YSA9IGxhdW5jaCkKbW9kZWwKYGBgCgpgYGB7cn0Kc3VtbWFyeShtb2RlbCkKYGBgCgpgYGB7cn0KI0NyZWF0aW5nIGEgc2ltcGxlIG11bHRpcGxlIHJlZ3Jlc3Npb24gZnVuY3Rpb24KcmVnIDwtIGZ1bmN0aW9uKHkseCkgewogIHggPC0gYXMubWF0cml4KHgpCiAgeCA8LSBjYmluZChJbnRlcnByZXQgPSAxLHgpCiAgYiA8LSBzb2x2ZSh0KHgpICUqJSB4KSAlKiUgdCh4KSAlKiUgeQogIGNvbG5hbWVzKGIpIDwtICJlc3RpbWF0ZSIKICBwcmludChiKQp9CmBgYAoKYGBge3J9CiNFeGFtaW5lIHRoZSBsYXVuY2ggRGF0YQpzdHIobGF1bmNoKQpgYGAKCmBgYHtyfQojVGVzdCByZWdyZXNzaW9uIG1vZGVsIHdpdGggU2ltcGxlIExpbmVhciBSZWdyZXNzaW9uCnJlZyh5ID0gbGF1bmNoJGRpc3RyZXNzX2N0LCB4ID0gbGF1bmNoWzJdKQpgYGAKCmBgYHtyfQojIFVzZSBSZWdyZXNzaW9uIE1vZGVsIHdpdGggTXVsdGlwbGUgUmVncmVzc2lvbgpyZWcoeSA9IGxhdW5jaCRkaXN0cmVzc19jdCwgeCA9IGxhdW5jaFsyOjRdKQpgYGAKCmBgYHtyfQojQ29uZmlybWluZyB0aGUgbXVsdGlwbGUgUmVncmVzc2lvbiByZXN1bHQgdXNpbmcgdGhlIGxtIGZ1bmN0aW9uIChubyB0IGluIHRleHQpCm1vZGVsIDwtIGxtKGRpc3RyZXNzX2N0IH4gdGVtcGVyYXR1cmUgKyBmaWVsZF9jaGVja19wcmVzc3VyZSArIGZsaWdodF9udW0sIGRhdGEgPSBsYXVuY2gpCm1vZGVsCmBgYAoKYGBge3J9CnN1bW1hcnkobW9kZWwpCmBgYAoKIyNQcmVkaWN0aW5nIE1lZGljYWwgRXhwZW5zZXMKCmBgYHtyfQojIyBTdGVwIDI6IEV4cGxvcmluZyBhbmQgcHJlcGFyaW5nIHRoZSBkYXRhCmluc3VyYW5jZSA8LSByZWFkLmNzdigiaW5zdXJhbmNlLmNzdiIsIHN0cmluZ3NBc0ZhY3RvcnMgPSBUUlVFKQpzdHIoaW5zdXJhbmNlKQpgYGAKCmBgYHtyfQojU3VtbWFyaXplIHRoZSBjaGFyZ2VzIHZhcmlhYmxlCnN1bW1hcnkoaW5zdXJhbmNlJGV4cGVuc2VzKQpgYGAKCmBgYHtyfQojIEhpc3RvZ3JhbSBvZiBpbnN1cmFuY2UgY2hhcmdlcwpoaXN0KGluc3VyYW5jZSRleHBlbnNlcykKYGBgCgpgYGB7cn0KdGFibGUoaW5zdXJhbmNlJHJlZ2lvbikKYGBgCgpgYGB7cn0KIyBFeHBsb3JpbmcgcmVsYXRpb25zaGlwcyBhbW9uZyBmZWF0dXJlczogY29ycmVsYXRpb24gbWF0cml4CmNvcihpbnN1cmFuY2VbYygiYWdlIiwiYm1pIiwiY2hpbGRyZW4iLCJleHBlbnNlcyIpXSkKYGBgCgpgYGB7cn0KI1Zpc3VhbGl6aW5nIHJlbGF0aW9uc2hpcHMgYW1vbmcgZmVhdHVyZXM6IFNjYXR0ZXJwbG90IE1hdHJpeApwYWlycyhpbnN1cmFuY2VbYygiYWdlIiwiYm1pIiwiY2hpbGRyZW4iLCJleHBlbnNlcyIpXSkKYGBgCgpgYGB7cn0KI1N0ZXAgMzogVHJhaW5pbmcgYSBtb2RlbCBvbiB0aGUgZGF0YSAtLS0KaW5zX21vZGVsIDwtIGxtKGV4cGVuc2VzIH4gYWdlICsgY2hpbGRyZW4gKyBibWkgKyBzZXggKyBzbW9rZXIgKyByZWdpb24sCiAgICAgICAgICAgICAgICBkYXRhID0gaW5zdXJhbmNlKQppbnNfbW9kZWwgPC0gbG0oZXhwZW5zZXMgfiAuLCBkYXRhID0gaW5zdXJhbmNlKSAjIHRoaXMgaXMgZXF1aXZhbGVudCB0byBhYm92ZQoKIyBzZWUgdGhlIGVzdGltYXRlZCBiZXRhIGNvZWZmaWNpZW50cwppbnNfbW9kZWwKYGBgCgoKIyNTdGVwIDQ6IEV2YWx1YXRpbmcgTW9kZWwgUGVyZm9ybWFuY2UKCmBgYHtyfQojIHNlZSBtb3JlIGRldGFpbCBhYm91dCB0aGUgZXN0aW1hdGVkIGJldGEgY29lZmZpY2llbnRzCnN1bW1hcnkoaW5zX21vZGVsKQpgYGAKCgojI1N0ZXAgNTogSW1wcm92aW5nIE1vZGVsIFBlcmZvcm1hbmNlCgoKYGBge3J9CiMgYWRkIGEgaGlnaGVyLW9yZGVyICJhZ2UiIHRlcm0KaW5zdXJhbmNlJGFnZTIgPC0gaW5zdXJhbmNlJGFnZV4yCmBgYAoKCmBgYHtyfQojIGFkZCBhbiBpbmRpY2F0b3IgZm9yIEJNSSA+PSAzMAppbnN1cmFuY2UkYm1pMzAgPC0gaWZlbHNlKGluc3VyYW5jZSRibWkgPj0gMzAsMSwwKQpgYGAKCmBgYHtyfQojIENyZWF0ZSBGaW5hbCBtb2RlbAppbnNfbW9kZWwyIDwtIGxtKGV4cGVuc2VzIH4gYWdlICsgYWdlMiArIGNoaWxkcmVuICsgYm1pICsgc2V4ICsgYm1pMzAqc21va2VyICsgcmVnaW9uLCBkYXRhID0gaW5zdXJhbmNlKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KGluc19tb2RlbDIpCmBgYAoKIyMjIFdlIGhhdmUgYSBsb3Qgb2YgdmFyaWFibGUgZm9yIHRoaXMgbW9kZWwuIFRoZSBSLXNxdWFyZWQgaXMgMC44NiBtZWFucyA4NiUgc28gYWxsIG9mIHRob3NlIHZhcmlhYmxlcyBhcmUgc2lnbmlmaWNhbnRseSBpbXBvcnRhbnQgdG8gcHJlZGljdCB0aGUgbW9kZWwuCgpgYGB7cn0KIyBNYWtpbmcgcHJlZGljdGlvbnMgd2l0aCB0aGUgUmVncmVzc2lvbiBNb2RlbAppbnN1cmFuY2UkcHJlZCA8LSBwcmVkaWN0KGluc19tb2RlbDIsIGluc3VyYW5jZSkKY29yKGluc3VyYW5jZSRwcmVkLCBpbnN1cmFuY2UkZXhwZW5zZXMpCmBgYAoKCmBgYHtyfQpwbG90KGluc3VyYW5jZSRwcmVkLCBpbnN1cmFuY2UkZXhwZW5zZXMpCmFibGluZShhID0gMCwgYiA9IDEsIGNvbCA9ICJyZWQiLCBsd2QgPSAzLCBsdHkgPSAyKQpgYGAKCmBgYHtyfQpwcmVkaWN0KGluc19tb2RlbDIsCiAgICAgICAgZGF0YS5mcmFtZShhZ2U9MjEsIGFnZTI9MjFeMixjaGlsZHJlbj0zLAogICAgICAgICAgICAgICAgICAgYm1pPTI0LHNleD0iZmVtYWxlIixibWkzMD0wLAogICAgICAgICAgICAgICAgICAgc21va2VyPSJubyIscmVnaW9uPSJub3J0aHdlc3QiKSkKYGBgCgpgYGB7cn0KcHJlZGljdChpbnNfbW9kZWwyLAogICAgICAgIGRhdGEuZnJhbWUoYWdlPTIyLGFnZTI9MjJeMixjaGlsZHJlbj0xLAogICAgICAgICAgICAgICAgICAgYm1pPTI3LHNleD0ibWFsZSIsYm1pMzA9MCwKICAgICAgICAgICAgICAgICAgIHNtb2tlcj0ieWVzIixyZWdpb249InNvdXRoZWFzdCIpKQpgYGAKCiMjIyBQYXJ0IDI6IFJlZ3Jlc3Npb24gVHJlZXMgYW5kIE1vZGVsIFRyZWVzCgojI1VuZGVyc3RhbmRpbmcgUmVncmVzc2lvbiBUcmVlcyBhbmQgTW9kZWwgVHJlZXMKCiMjRXhhbXBsZTogQ2FsY3VsYXRpbmcgU0RSCgpgYGB7cn0KIyBTZXQgdXAgdGhlIGRhdGEKdGVlIDwtIGMoMSwxLDEsMiwyLDMsNCw1LDUsNiw2LDcsNyw3LDcpCmF0MSA8LSBjKDEsMSwxLDIsMiwzLDQsNSw1KQphdDIgPC0gYyg2LDYsNyw3LDcsNykKYnQxIDwtIGMoMSwxLDEsMiwyLDMsNCkKYnQyIDwtIGMoNSw1LDYsNiw3LDcsNyw3KQpgYGAKCmBgYHtyfQojQ29tcHV0ZSB0aGUgU0RSCnNkcl9hIDwtIHNkKHRlZSkgLSAobGVuZ3RoKGF0MSkgLyBsZW5ndGgodGVlKSAqIHNkKGF0MSkgKyBsZW5ndGgoYXQyKSAvIGxlbmd0aCh0ZWUpICogc2QoYXQyKSkKc2RyX2IgPC0gc2QodGVlKSAtIChsZW5ndGgoYnQxKSAvIGxlbmd0aCh0ZWUpICogc2QoYnQxKSArIGxlbmd0aChidDIpIC8gbGVuZ3RoKHRlZSkgKiBzZChidDIpKQpgYGAKCmBgYHtyfQojQ29tcGFyZSB0aGUgU0RSIGZvciBlYWNoIHNwbGl0CnNkcl9hCmBgYAoKYGBge3J9CnNkcl9iCmBgYAoKCiMjRXhlcmNpc2UgTm8gMzogRXN0aW1hdGluZyBXaW5lIFF1YWxpdHkKCiMjU3RlcCAyOiBFeHBsb3JpbmcgYW5kIFByZXBhcmluZyB0aGUgRGF0YQoKCmBgYHtyfQp3aW5lIDwtIHJlYWQuY3N2KCJ3aW5lcXVhbGl0eS1yZWQuY3N2IikKYGBgCgpgYGB7cn0KIyBFeGFtaW5lIHRoZSB3aW5lIERhdGEKc3RyKHdpbmUpCmBgYAoKYGBge3J9CiMgVGhlIERpc3RyaWJ1dGlvbiBvZiBRdWFsaXR5IFJhdGluZ3MKaGlzdCh3aW5lJHF1YWxpdHkpCmBgYAoKCmBgYHtyfQpzdW1tYXJ5KHdpbmUpCmBgYAoKYGBge3J9CndpbmVfdHJhaW4gPC0gd2luZVsxOjM3NTAsIF0Kd2luZV90ZXN0IDwtIHdpbmVbMzc1MTo0ODk4LCBdCmBgYAoKIyNTdGVwIDM6IFRyYWluaW5nIGEgbW9kZWwgb24gdGhlIGRhdGEKCmBgYHtyfQojUmVncmVzc2lvbiBUcmVlIHVzaW5nIHJwYXJ0CmxpYnJhcnkocnBhcnQpCm0ucnBhcnQgPC0gcnBhcnQocXVhbGl0eSB+IC4sIGRhdGEgPSB3aW5lX3RyYWluKQpgYGAKCgpgYGB7cn0KIyBnZXQgYmFzaWMgaW5mb3JtYXRpb24gYWJvdXQgdGhlIHRyZWUKbS5ycGFydApgYGAKCmBgYHtyfQojIGdldCBtb3JlIGRldGFpbGVkIGluZm9ybWF0aW9uIGFib3V0IHRoZSB0cmVlCnN1bW1hcnkobS5ycGFydCkKYGBgCgoKYGBge3J9Cmluc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKQpgYGAKCgpgYGB7cn0KI1VzZSB0aGUgcnBhcnQucGxvdCBwYWNrYWdlIHRvIGNyZWF0ZSBhIHZpc3VhbGl6YXRpb24KbGlicmFyeShycGFydC5wbG90KQpgYGAKCmBgYHtyfQojIGEgYmFzaWMgZGVjaXNpb24gdHJlZSBkaWFncmFtCnJwYXJ0LnBsb3QobS5ycGFydCwgZGlnaXRzID0gMykKYGBgCgpgYGB7cn0KIyBhIGZldyBhZGp1c3RtZW50cyB0byB0aGUgZGlhZ3JhbQpycGFydC5wbG90KG0ucnBhcnQsIGRpZ2l0cz00LCBmYWxsZW4ubGVhdmVzID0gVFJVRSwgdHlwZT0zLCBleHRyYT0xMDEpCmBgYAoKIyNTdGVwIDQ6IEV2YWx1YXRlIE1vZGVsIFBlcmZvcm1hbmNlCgpgYGB7cn0KIyBHZW5lcmF0ZSBQcmVkaWN0aW9ucyBmb3IgdGhlIHRlc3RpbmcgZGF0YXNldApwLnJwYXJ0IDwtIHByZWRpY3QobS5ycGFydCwgd2luZV90ZXN0KQpgYGAKCmBgYHtyfQojQ29tcGFyZSB0aGUgZGlzdHJpYnV0aW9uIG9mIHByZWRpY3RlZCB2YWx1ZXMgdnMuIGFjdHVhbCB2YWx1ZXMKc3VtbWFyeShwLnJwYXJ0KQpgYGAKCmBgYHtyfQpzdW1tYXJ5KHdpbmVfdGVzdCRxdWFsaXR5KQpgYGAKCmBgYHtyfQojQ29tcGFyZSB0aGUgY29ycmVsYXRpb24KY29yKHAucnBhcnQsIHdpbmVfdGVzdCRxdWFsaXR5KQpgYGAKCgpgYGB7cn0KI0Z1bmN0aW9uIHRvIGNhbGN1bGF0ZSB0aGUgbWVhbiBhYnNvbHV0ZSBlcnJvcgpNQUUgPC0gZnVuY3Rpb24oYWN0dWFsLHByZWRpY3RlZCkgewogIG1lYW4oYWJzKGFjdHVhbC1wcmVkaWN0ZWQpKQp9CmBgYAoKCmBgYHtyfQojIE1lYW4gQWJzb2x1dGUgRXJyb3IgYmV0d2VlbiBwcmVkaWN0ZWQgYW5kIGFjdHVhbCB2YWx1ZXMKTUFFKHAucnBhcnQsIHdpbmVfdGVzdCRxdWFsaXR5KQpgYGAKCgpgYGB7cn0KIyBNZWFuIEFic29sdXRlIEVycm9yIGJldHdlZW4gYWN0dWFsIHZhbHVlcyBhbmQgbWVhbiB2YWx1ZQptZWFuKHdpbmVfdHJhaW4kcXVhbGl0eSkgIyByZXN1bHQgPSA1Ljg3CmBgYAoKCmBgYHtyfQpNQUUoNS44Nywgd2luZV90ZXN0JHF1YWxpdHkpCmBgYAoKCiMjU3RlcCA1OiBJbXByb3ZpbmcgTW9kZWwgUGVyZm9ybWFuY2UKCgpgYGB7cn0KaW5zdGFsbC5wYWNrYWdlcygicGx5ciIpCmBgYAoKCmBgYHtyfQppbnN0YWxsLnBhY2thZ2VzKCJDdWJpc3QiKQpgYGAKCgpgYGB7cn0KI3RyYWluIGEgQ3ViaXN0IE1vZGVsIFRyZWUKbGlicmFyeShDdWJpc3QpCmBgYAoKCmBgYHtyfQptLmN1YmlzdCA8LSBjdWJpc3QoeCA9IHdpbmVfdHJhaW5bLTEyXSwgeT13aW5lX3RyYWluJHF1YWxpdHkpCmBgYAoKCmBgYHtyfQojRGlzcGxheSBCYXNpYyBpbmZvcm1hdGlvbiBhYm91dCB0aGUgbW9kZWwgdHJlZQptLmN1YmlzdApgYGAKCgpgYGB7cn0KIyBEaXNwbGF5IHRoZSB0cmVlIGl0c2VsZgpzdW1tYXJ5KG0uY3ViaXN0KQpgYGAKCgpgYGB7cn0KIyBHZW5lcmF0ZSBwcmVkaWN0aW9ucyBmb3IgdGhlIG1vZGVsCnAuY3ViaXN0IDwtIHByZWRpY3QobS5jdWJpc3QsIHdpbmVfdGVzdCkKYGBgCgpgYGB7cn0KIyBzdW1hcnJ5IHN0YXRpc3RpYyBhYm91dCB0aGUgcHJlZGljdGlvbnMKc3VtbWFyeShwLmN1YmlzdCkKYGBgCgoKYGBge3J9CiMgY29ycmVsYXRpb24gYmV0d2VlbiB0aGUgcHJlZGljdGVkIGFuZCB0cnVlIHZhbHVlcwpjb3IocC5jdWJpc3QsIHdpbmVfdGVzdCRxdWFsaXR5KQpgYGAKCmBgYHtyfQojIG1lYW4gYWJzb2x1dGUgZXJyb3Igb2YgcHJlZGljdGVkIGFuZCB0cnVlIHZhbHVlcwojICh1c2VzIGEgY3VzdG9tIGZ1bmN0aW9uIGRlZmluZWQgYWJvdmUpCk1BRSh3aW5lX3Rlc3QkcXVhbGl0eSwgcC5jdWJpc3QpCmBgYAoK