getwd()
[1] "/cloud/project"
# Set the working directory to the folder containing the file
# Read the CSV file
launch <- read.csv("challenger2.csv")
View(launch)
# estimate beta manually
b <- cov(launch$temperature, launch$distress_ct) / var(launch$temperature)
b
[1] -0.03364796
#This value suggests a negative relationship between temperature and distress count.
# estimate alpha manually
a <- mean(launch$distress_ct) - b * mean(launch$temperature)
a
[1] 2.814585
# calculate the correlation of launch data
r <- cov(launch$temperature, launch$distress_ct) /
(sd(launch$temperature) * sd(launch$distress_ct))
r
[1] -0.3359996
# calculate the correlation between temperature and distress. we did it directly using this code because it was the same number
cor(launch$temperature, launch$distress_ct)
[1] -0.3359996
#this is a negative correlation
# computing the slope using correlation
r * (sd(launch$distress_ct) / sd(launch$temperature))
[1] -0.03364796
r
[1] -0.3359996
# confirming the regression line using the lm function (not in text)
model <- lm(distress_ct ~ temperature, data = launch)
model
Call:
lm(formula = distress_ct ~ temperature, data = launch)
Coefficients:
(Intercept) temperature
2.81458 -0.03365
#Hence, we can see that the values got through the linear regression model are very similar to the one that we got manually
summary(model)
Call:
lm(formula = distress_ct ~ temperature, data = launch)
Residuals:
Min 1Q Median 3Q Max
-1.0649 -0.4929 -0.2573 0.3052 1.7090
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.81458 1.24629 2.258 0.0322 *
temperature -0.03365 0.01815 -1.854 0.0747 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7076 on 27 degrees of freedom
Multiple R-squared: 0.1129, Adjusted R-squared: 0.08004
F-statistic: 3.436 on 1 and 27 DF, p-value: 0.07474
# creating a simple multiple regression function
reg <- function(y, x) {
x <- as.matrix(x)
x <- cbind(Intercept = 1, x)
b <- solve(t(x) %*% x) %*% t(x) %*% y
colnames(b) <- "estimate"
print(b)
}
# examine the launch data
str(launch)
'data.frame': 29 obs. of 4 variables:
$ distress_ct : int 0 1 0 0 0 0 0 0 1 1 ...
$ temperature : int 66 70 69 68 67 72 73 70 57 63 ...
$ field_check_pressure: int 50 50 50 50 50 50 100 100 200 200 ...
$ flight_num : int 1 2 3 4 5 6 7 8 9 10 ...
# test regression model with simple linear regression
reg(y = launch$distress_ct, x = launch[2])
estimate
Intercept 2.81458456
temperature -0.03364796
# use regression model with multiple regression
reg(y = launch$distress_ct, x = launch[2:4])
estimate
Intercept 2.239817e+00
temperature -3.124185e-02
field_check_pressure -2.586765e-05
flight_num 2.762455e-02
# confirming the multiple regression result using the lm function (not in text)
model <- lm(distress_ct ~ temperature + field_check_pressure + flight_num, data = launch)
model
Call:
lm(formula = distress_ct ~ temperature + field_check_pressure +
flight_num, data = launch)
Coefficients:
(Intercept) temperature field_check_pressure flight_num
2.240e+00 -3.124e-02 -2.587e-05 2.762e-02
summary(model)
Call:
lm(formula = distress_ct ~ temperature + field_check_pressure +
flight_num, data = launch)
Residuals:
Min 1Q Median 3Q Max
-1.2744 -0.3335 -0.1657 0.2975 1.5284
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.240e+00 1.267e+00 1.767 0.0894 .
temperature -3.124e-02 1.787e-02 -1.748 0.0927 .
field_check_pressure -2.587e-05 2.383e-03 -0.011 0.9914
flight_num 2.762e-02 1.798e-02 1.537 0.1369
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.6926 on 25 degrees of freedom
Multiple R-squared: 0.2132, Adjusted R-squared: 0.1188
F-statistic: 2.259 on 3 and 25 DF, p-value: 0.1063
#Display the summary of the multiple regression model
#In class, we discussed and realized that flight number and field check pressure are not significant to us
#Predicting Medical Expenses
## Step 2: Exploring and preparing the data ----
insurance <- read.csv("insurance.csv", stringsAsFactors = TRUE)
str(insurance)
'data.frame': 1338 obs. of 7 variables:
$ age : int 19 18 28 33 32 31 46 37 37 60 ...
$ sex : Factor w/ 2 levels "female","male": 1 2 2 2 2 1 1 1 2 1 ...
$ bmi : num 27.9 33.8 33 22.7 28.9 25.7 33.4 27.7 29.8 25.8 ...
$ children: int 0 1 3 0 0 0 1 3 2 0 ...
$ smoker : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
$ region : Factor w/ 4 levels "northeast","northwest",..: 4 3 3 2 2 3 3 2 1 2 ...
$ expenses: num 16885 1726 4449 21984 3867 ...
# summarize the charges variable
summary(insurance$expenses)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1122 4740 9382 13270 16640 63770
# histogram of insurance charges
hist(insurance$expenses)

# table of region
table(insurance$region)
northeast northwest southeast southwest
324 325 364 325
# exploring relationships among features: correlation matrix
cor(insurance[c("age", "bmi", "children", "expenses")])
age bmi children expenses
age 1.0000000 0.10934101 0.04246900 0.29900819
bmi 0.1093410 1.00000000 0.01264471 0.19857626
children 0.0424690 0.01264471 1.00000000 0.06799823
expenses 0.2990082 0.19857626 0.06799823 1.00000000
# visualing relationships among features: scatterplot matrix
pairs(insurance[c("age", "bmi", "children", "expenses")])

## Step 3: Training a model on the data ----
ins_model <- lm(expenses ~ age + children + bmi + sex + smoker + region,
data = insurance)
ins_model <- lm(expenses ~ ., data = insurance) # this is equivalent to above
# see the estimated beta coefficients
ins_model
Call:
lm(formula = expenses ~ ., data = insurance)
Coefficients:
(Intercept) age sexmale bmi children smokeryes
-11941.6 256.8 -131.4 339.3 475.7 23847.5
regionnorthwest regionsoutheast regionsouthwest
-352.8 -1035.6 -959.3
#Step 4: Evaluating model performance
# see more detail about the estimated beta coefficients
summary(ins_model)
Call:
lm(formula = expenses ~ ., data = insurance)
Residuals:
Min 1Q Median 3Q Max
-11302.7 -2850.9 -979.6 1383.9 29981.7
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -11941.6 987.8 -12.089 < 2e-16 ***
age 256.8 11.9 21.586 < 2e-16 ***
sexmale -131.3 332.9 -0.395 0.693255
bmi 339.3 28.6 11.864 < 2e-16 ***
children 475.7 137.8 3.452 0.000574 ***
smokeryes 23847.5 413.1 57.723 < 2e-16 ***
regionnorthwest -352.8 476.3 -0.741 0.458976
regionsoutheast -1035.6 478.7 -2.163 0.030685 *
regionsouthwest -959.3 477.9 -2.007 0.044921 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 6062 on 1329 degrees of freedom
Multiple R-squared: 0.7509, Adjusted R-squared: 0.7494
F-statistic: 500.9 on 8 and 1329 DF, p-value: < 2.2e-16
#Step 5: Improving model performance
# add a higher-order "age" term
insurance$age2 <- insurance$age^2
# add an indicator for BMI >= 30
insurance$bmi30 <- ifelse(insurance$bmi >= 30, 1, 0)
# create final model
ins_model2 <- lm(expenses ~ age + age2 + children + bmi + sex +
bmi30*smoker + region, data = insurance)
summary(ins_model2)
Call:
lm(formula = expenses ~ age + age2 + children + bmi + sex + bmi30 *
smoker + region, data = insurance)
Residuals:
Min 1Q Median 3Q Max
-17297.1 -1656.0 -1262.7 -727.8 24161.6
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 139.0053 1363.1359 0.102 0.918792
age -32.6181 59.8250 -0.545 0.585690
age2 3.7307 0.7463 4.999 6.54e-07 ***
children 678.6017 105.8855 6.409 2.03e-10 ***
bmi 119.7715 34.2796 3.494 0.000492 ***
sexmale -496.7690 244.3713 -2.033 0.042267 *
bmi30 -997.9355 422.9607 -2.359 0.018449 *
smokeryes 13404.5952 439.9591 30.468 < 2e-16 ***
regionnorthwest -279.1661 349.2826 -0.799 0.424285
regionsoutheast -828.0345 351.6484 -2.355 0.018682 *
regionsouthwest -1222.1619 350.5314 -3.487 0.000505 ***
bmi30:smokeryes 19810.1534 604.6769 32.762 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4445 on 1326 degrees of freedom
Multiple R-squared: 0.8664, Adjusted R-squared: 0.8653
F-statistic: 781.7 on 11 and 1326 DF, p-value: < 2.2e-16
# making predictions with the regression model
insurance$pred <- predict(ins_model2, insurance)
cor(insurance$pred, insurance$expenses)
[1] 0.9307999
plot(insurance$pred, insurance$expenses)
abline(a = 0, b = 1, col = "red", lwd = 3, lty = 2)

predict(ins_model2,
data.frame(age = 30, age2 = 30^2, children = 2,
bmi = 30, sex = "male", bmi30 = 1,
smoker = "no", region = "northeast"))
1
5973.774
predict(ins_model2,
data.frame(age = 30, age2 = 30^2, children = 2,
bmi = 30, sex = "female", bmi30 = 1,
smoker = "no", region = "northeast"))
1
6470.543
predict(ins_model2,
data.frame(age = 30, age2 = 30^2, children = 0,
bmi = 30, sex = "female", bmi30 = 1,
smoker = "no", region = "northeast"))
1
5113.34
#Part 2: Regression Trees and Model Trees
# set up the data
tee <- c(1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 7, 7, 7)
at1 <- c(1, 1, 1, 2, 2, 3, 4, 5, 5)
at2 <- c(6, 6, 7, 7, 7, 7)
bt1 <- c(1, 1, 1, 2, 2, 3, 4)
bt2 <- c(5, 5, 6, 6, 7, 7, 7, 7)
# compute the SDR
sdr_a <- sd(tee) - (length(at1) / length(tee) * sd(at1) + length(at2) / length(tee) * sd(at2))
sdr_b <- sd(tee) - (length(bt1) / length(tee) * sd(bt1) + length(bt2) / length(tee) * sd(bt2))
# compare the SDR for each split
sdr_a
[1] 1.202815
sdr_b
[1] 1.392751
#Exercise No 3: Estimating Wine Quality
#Step 2: Exploring and preparing the data
wine <- read.csv("whitewines.csv")
# examine the wine data
str(wine)
'data.frame': 4898 obs. of 12 variables:
$ fixed.acidity : num 6.7 5.7 5.9 5.3 6.4 7 7.9 6.6 7 6.5 ...
$ volatile.acidity : num 0.62 0.22 0.19 0.47 0.29 0.14 0.12 0.38 0.16 0.37 ...
$ citric.acid : num 0.24 0.2 0.26 0.1 0.21 0.41 0.49 0.28 0.3 0.33 ...
$ residual.sugar : num 1.1 16 7.4 1.3 9.65 0.9 5.2 2.8 2.6 3.9 ...
$ chlorides : num 0.039 0.044 0.034 0.036 0.041 0.037 0.049 0.043 0.043 0.027 ...
$ free.sulfur.dioxide : num 6 41 33 11 36 22 33 17 34 40 ...
$ total.sulfur.dioxide: num 62 113 123 74 119 95 152 67 90 130 ...
$ density : num 0.993 0.999 0.995 0.991 0.993 ...
$ pH : num 3.41 3.22 3.49 3.48 2.99 3.25 3.18 3.21 2.88 3.28 ...
$ sulphates : num 0.32 0.46 0.42 0.54 0.34 0.43 0.47 0.47 0.47 0.39 ...
$ alcohol : num 10.4 8.9 10.1 11.2 10.9 ...
$ quality : int 5 6 6 4 6 6 6 6 6 7 ...
# the distribution of quality ratings
hist(wine$quality)

# summary statistics of the wine data
summary(wine)
fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
Min. : 3.800 Min. :0.0800 Min. :0.0000 Min. : 0.600 Min. :0.00900
1st Qu.: 6.300 1st Qu.:0.2100 1st Qu.:0.2700 1st Qu.: 1.700 1st Qu.:0.03600
Median : 6.800 Median :0.2600 Median :0.3200 Median : 5.200 Median :0.04300
Mean : 6.855 Mean :0.2782 Mean :0.3342 Mean : 6.391 Mean :0.04577
3rd Qu.: 7.300 3rd Qu.:0.3200 3rd Qu.:0.3900 3rd Qu.: 9.900 3rd Qu.:0.05000
Max. :14.200 Max. :1.1000 Max. :1.6600 Max. :65.800 Max. :0.34600
free.sulfur.dioxide total.sulfur.dioxide density pH sulphates
Min. : 2.00 Min. : 9.0 Min. :0.9871 Min. :2.720 Min. :0.2200
1st Qu.: 23.00 1st Qu.:108.0 1st Qu.:0.9917 1st Qu.:3.090 1st Qu.:0.4100
Median : 34.00 Median :134.0 Median :0.9937 Median :3.180 Median :0.4700
Mean : 35.31 Mean :138.4 Mean :0.9940 Mean :3.188 Mean :0.4898
3rd Qu.: 46.00 3rd Qu.:167.0 3rd Qu.:0.9961 3rd Qu.:3.280 3rd Qu.:0.5500
Max. :289.00 Max. :440.0 Max. :1.0390 Max. :3.820 Max. :1.0800
alcohol quality
Min. : 8.00 Min. :3.000
1st Qu.: 9.50 1st Qu.:5.000
Median :10.40 Median :6.000
Mean :10.51 Mean :5.878
3rd Qu.:11.40 3rd Qu.:6.000
Max. :14.20 Max. :9.000
wine_train <- wine[1:3750, ]
wine_test <- wine[3751:4898, ]
#Step 3: Training a model on the data
# regression tree using rpart
library(rpart)
m.rpart <- rpart(quality ~ ., data = wine_train)
# get basic information about the tree
m.rpart
n= 3750
node), split, n, deviance, yval
* denotes terminal node
1) root 3750 2945.53200 5.870933
2) alcohol< 10.85 2372 1418.86100 5.604975
4) volatile.acidity>=0.2275 1611 821.30730 5.432030
8) volatile.acidity>=0.3025 688 278.97670 5.255814 *
9) volatile.acidity< 0.3025 923 505.04230 5.563380 *
5) volatile.acidity< 0.2275 761 447.36400 5.971091 *
3) alcohol>=10.85 1378 1070.08200 6.328737
6) free.sulfur.dioxide< 10.5 84 95.55952 5.369048 *
7) free.sulfur.dioxide>=10.5 1294 892.13600 6.391036
14) alcohol< 11.76667 629 430.11130 6.173291
28) volatile.acidity>=0.465 11 10.72727 4.545455 *
29) volatile.acidity< 0.465 618 389.71680 6.202265 *
15) alcohol>=11.76667 665 403.99400 6.596992 *
# get more detailed information about the tree
summary(m.rpart)
Call:
rpart(formula = quality ~ ., data = wine_train)
n= 3750
CP nsplit rel error xerror xstd
1 0.15501053 0 1.0000000 1.0006389 0.02447599
2 0.05098911 1 0.8449895 0.8508355 0.02350883
3 0.02796998 2 0.7940004 0.8064159 0.02286371
4 0.01970128 3 0.7660304 0.7924235 0.02209010
5 0.01265926 4 0.7463291 0.7737728 0.02147832
6 0.01007193 5 0.7336698 0.7557598 0.02099234
7 0.01000000 6 0.7235979 0.7493923 0.02076632
Variable importance
alcohol density volatile.acidity chlorides
34 21 15 11
total.sulfur.dioxide free.sulfur.dioxide residual.sugar sulphates
7 6 3 1
citric.acid
1
Node number 1: 3750 observations, complexity param=0.1550105
mean=5.870933, MSE=0.7854751
left son=2 (2372 obs) right son=3 (1378 obs)
Primary splits:
alcohol < 10.85 to the left, improve=0.15501050, (0 missing)
density < 0.992035 to the right, improve=0.10915940, (0 missing)
chlorides < 0.0395 to the right, improve=0.07682258, (0 missing)
total.sulfur.dioxide < 158.5 to the right, improve=0.04089663, (0 missing)
citric.acid < 0.235 to the left, improve=0.03636458, (0 missing)
Surrogate splits:
density < 0.991995 to the right, agree=0.869, adj=0.644, (0 split)
chlorides < 0.0375 to the right, agree=0.757, adj=0.339, (0 split)
total.sulfur.dioxide < 103.5 to the right, agree=0.690, adj=0.155, (0 split)
residual.sugar < 5.375 to the right, agree=0.667, adj=0.094, (0 split)
sulphates < 0.345 to the right, agree=0.647, adj=0.038, (0 split)
Node number 2: 2372 observations, complexity param=0.05098911
mean=5.604975, MSE=0.5981709
left son=4 (1611 obs) right son=5 (761 obs)
Primary splits:
volatile.acidity < 0.2275 to the right, improve=0.10585250, (0 missing)
free.sulfur.dioxide < 13.5 to the left, improve=0.03390500, (0 missing)
citric.acid < 0.235 to the left, improve=0.03204075, (0 missing)
alcohol < 10.11667 to the left, improve=0.03136524, (0 missing)
chlorides < 0.0585 to the right, improve=0.01633599, (0 missing)
Surrogate splits:
pH < 3.485 to the left, agree=0.694, adj=0.047, (0 split)
sulphates < 0.755 to the left, agree=0.685, adj=0.020, (0 split)
total.sulfur.dioxide < 105.5 to the right, agree=0.683, adj=0.011, (0 split)
residual.sugar < 0.75 to the right, agree=0.681, adj=0.007, (0 split)
chlorides < 0.0285 to the right, agree=0.680, adj=0.003, (0 split)
Node number 3: 1378 observations, complexity param=0.02796998
mean=6.328737, MSE=0.7765472
left son=6 (84 obs) right son=7 (1294 obs)
Primary splits:
free.sulfur.dioxide < 10.5 to the left, improve=0.07699080, (0 missing)
alcohol < 11.76667 to the left, improve=0.06210660, (0 missing)
total.sulfur.dioxide < 67.5 to the left, improve=0.04438619, (0 missing)
residual.sugar < 1.375 to the left, improve=0.02905351, (0 missing)
fixed.acidity < 7.35 to the right, improve=0.02613259, (0 missing)
Surrogate splits:
total.sulfur.dioxide < 53.5 to the left, agree=0.952, adj=0.214, (0 split)
volatile.acidity < 0.875 to the right, agree=0.940, adj=0.024, (0 split)
Node number 4: 1611 observations, complexity param=0.01265926
mean=5.43203, MSE=0.5098121
left son=8 (688 obs) right son=9 (923 obs)
Primary splits:
volatile.acidity < 0.3025 to the right, improve=0.04540111, (0 missing)
alcohol < 10.05 to the left, improve=0.03874403, (0 missing)
free.sulfur.dioxide < 13.5 to the left, improve=0.03338886, (0 missing)
chlorides < 0.0495 to the right, improve=0.02574623, (0 missing)
citric.acid < 0.195 to the left, improve=0.02327981, (0 missing)
Surrogate splits:
citric.acid < 0.215 to the left, agree=0.633, adj=0.141, (0 split)
free.sulfur.dioxide < 20.5 to the left, agree=0.600, adj=0.063, (0 split)
chlorides < 0.0595 to the right, agree=0.593, adj=0.047, (0 split)
residual.sugar < 1.15 to the left, agree=0.583, adj=0.023, (0 split)
total.sulfur.dioxide < 219.25 to the right, agree=0.582, adj=0.022, (0 split)
Node number 5: 761 observations
mean=5.971091, MSE=0.5878633
Node number 6: 84 observations
mean=5.369048, MSE=1.137613
Node number 7: 1294 observations, complexity param=0.01970128
mean=6.391036, MSE=0.6894405
left son=14 (629 obs) right son=15 (665 obs)
Primary splits:
alcohol < 11.76667 to the left, improve=0.06504696, (0 missing)
chlorides < 0.0395 to the right, improve=0.02758705, (0 missing)
fixed.acidity < 7.35 to the right, improve=0.02750932, (0 missing)
pH < 3.055 to the left, improve=0.02307356, (0 missing)
total.sulfur.dioxide < 191.5 to the right, improve=0.02186818, (0 missing)
Surrogate splits:
density < 0.990885 to the right, agree=0.720, adj=0.424, (0 split)
volatile.acidity < 0.2675 to the left, agree=0.637, adj=0.253, (0 split)
chlorides < 0.0365 to the right, agree=0.630, adj=0.238, (0 split)
residual.sugar < 1.475 to the left, agree=0.575, adj=0.126, (0 split)
total.sulfur.dioxide < 128.5 to the right, agree=0.574, adj=0.124, (0 split)
Node number 8: 688 observations
mean=5.255814, MSE=0.4054895
Node number 9: 923 observations
mean=5.56338, MSE=0.5471747
Node number 14: 629 observations, complexity param=0.01007193
mean=6.173291, MSE=0.6838017
left son=28 (11 obs) right son=29 (618 obs)
Primary splits:
volatile.acidity < 0.465 to the right, improve=0.06897561, (0 missing)
total.sulfur.dioxide < 200 to the right, improve=0.04223066, (0 missing)
residual.sugar < 0.975 to the left, improve=0.03061714, (0 missing)
fixed.acidity < 7.35 to the right, improve=0.02978501, (0 missing)
sulphates < 0.575 to the left, improve=0.02165970, (0 missing)
Surrogate splits:
citric.acid < 0.045 to the left, agree=0.986, adj=0.182, (0 split)
total.sulfur.dioxide < 279.25 to the right, agree=0.986, adj=0.182, (0 split)
Node number 15: 665 observations
mean=6.596992, MSE=0.6075098
Node number 28: 11 observations
mean=4.545455, MSE=0.9752066
Node number 29: 618 observations
mean=6.202265, MSE=0.6306098
install.packages("rpart.plot")
Error in install.packages : Updating loaded packages
# use the rpart.plot package to create a visualization
library(rpart.plot)
# a basic decision tree diagram
rpart.plot(m.rpart, digits = 3)

# a few adjustments to the diagram
rpart.plot(m.rpart, digits = 4, fallen.leaves = TRUE, type = 3, extra = 101)

#Step 4: Evaluate model performance
# generate predictions for the testing dataset
p.rpart <- predict(m.rpart, wine_test)
# compare the distribution of predicted values vs. actual values
summary(p.rpart)
Min. 1st Qu. Median Mean 3rd Qu. Max.
4.545 5.563 5.971 5.893 6.202 6.597
summary(wine_test$quality)
Min. 1st Qu. Median Mean 3rd Qu. Max.
3.000 5.000 6.000 5.901 6.000 9.000
# compare the correlation
cor(p.rpart, wine_test$quality)
[1] 0.5369525
# function to calculate the mean absolute error
MAE <- function(actual, predicted) {
mean(abs(actual - predicted))
}
# mean absolute error between predicted and actual values
MAE(p.rpart, wine_test$quality)
[1] 0.5872652
# mean absolute error between actual values and mean value
mean(wine_train$quality) # result = 5.87
[1] 5.870933
MAE(5.87, wine_test$quality)
[1] 0.6722474
#Step 5: Improving model performance
install.packages("plyr")
Error in install.packages : Updating loaded packages
install.packages("Cubist")
Error in install.packages : Updating loaded packages
# train a Cubist Model Tree
library(Cubist)
m.cubist <- cubist(x = wine_train[-12], y = wine_train$quality)
# display basic information about the model tree
m.cubist
Call:
cubist.default(x = wine_train[-12], y = wine_train$quality)
Number of samples: 3750
Number of predictors: 11
Number of committees: 1
Number of rules: 25
# display the tree itself
summary(m.cubist)
Call:
cubist.default(x = wine_train[-12], y = wine_train$quality)
Cubist [Release 2.07 GPL Edition] Wed Feb 26 21:06:40 2025
---------------------------------
Target attribute `outcome'
Read 3750 cases (12 attributes) from undefined.data
Model:
Rule 1: [21 cases, mean 5.0, range 4 to 6, est err 0.5]
if
free.sulfur.dioxide > 30
total.sulfur.dioxide > 195
total.sulfur.dioxide <= 235
sulphates > 0.64
alcohol > 9.1
then
outcome = 573.6 + 0.0478 total.sulfur.dioxide - 573 density
- 0.788 alcohol + 0.186 residual.sugar - 4.73 volatile.acidity
Rule 2: [28 cases, mean 5.0, range 4 to 8, est err 0.7]
if
volatile.acidity > 0.31
citric.acid <= 0.36
residual.sugar <= 1.45
total.sulfur.dioxide <= 97
alcohol > 9.1
then
outcome = 168.2 + 4.75 citric.acid + 0.0123 total.sulfur.dioxide
- 170 density + 0.057 residual.sugar - 6.4 chlorides + 0.84 pH
+ 0.14 fixed.acidity
Rule 3: [171 cases, mean 5.1, range 3 to 6, est err 0.3]
if
volatile.acidity > 0.205
chlorides <= 0.054
density <= 0.99839
alcohol <= 9.1
then
outcome = 147.4 - 144 density + 0.08 residual.sugar + 0.117 alcohol
- 0.87 volatile.acidity - 0.09 pH - 0.01 fixed.acidity
Rule 4: [37 cases, mean 5.3, range 3 to 6, est err 0.5]
if
free.sulfur.dioxide > 30
total.sulfur.dioxide > 235
alcohol > 9.1
then
outcome = 19.5 - 0.013 total.sulfur.dioxide - 2.7 volatile.acidity
- 10 density + 0.005 residual.sugar + 0.008 alcohol
Rule 5: [64 cases, mean 5.3, range 5 to 6, est err 0.3]
if
volatile.acidity > 0.205
residual.sugar > 17.85
then
outcome = -23.6 + 0.233 alcohol - 5.2 chlorides - 0.75 citric.acid
+ 28 density - 0.81 volatile.acidity - 0.19 pH
- 0.002 residual.sugar
Rule 6: [56 cases, mean 5.3, range 4 to 7, est err 0.6]
if
fixed.acidity <= 7.1
volatile.acidity > 0.205
chlorides > 0.054
density <= 0.99839
alcohol <= 9.1
then
outcome = 40.6 + 0.374 alcohol - 1.62 volatile.acidity
+ 0.026 residual.sugar - 38 density - 0.21 pH
- 0.01 fixed.acidity
Rule 7: [337 cases, mean 5.3, range 3 to 7, est err 0.4]
if
fixed.acidity <= 7.8
volatile.acidity > 0.305
chlorides <= 0.09
free.sulfur.dioxide <= 82.5
total.sulfur.dioxide > 130
total.sulfur.dioxide <= 235
sulphates <= 0.64
alcohol <= 10.4
then
outcome = -32.1 + 0.233 alcohol - 9.7 chlorides
+ 0.0038 total.sulfur.dioxide - 0.0081 free.sulfur.dioxide
+ 35 density + 0.81 volatile.acidity
Rule 8: [30 cases, mean 5.5, range 3 to 7, est err 0.5]
if
fixed.acidity > 7.1
volatile.acidity > 0.205
chlorides > 0.054
density <= 0.99839
alcohol <= 9.1
then
outcome = 244 - 1.56 fixed.acidity - 228 density
+ 0.0252 free.sulfur.dioxide - 7.3 chlorides
- 0.19 volatile.acidity + 0.003 residual.sugar
Rule 9: [98 cases, mean 5.5, range 4 to 8, est err 0.5]
if
volatile.acidity > 0.155
chlorides > 0.09
total.sulfur.dioxide <= 235
sulphates <= 0.64
then
outcome = 55.9 - 3.85 volatile.acidity - 52 density
+ 0.023 residual.sugar + 0.092 alcohol + 0.35 pH
+ 0.05 fixed.acidity + 0.3 sulphates
+ 0.001 free.sulfur.dioxide
Rule 10: [446 cases, mean 5.6, range 4 to 8, est err 0.5]
if
fixed.acidity <= 7.8
volatile.acidity > 0.155
volatile.acidity <= 0.305
chlorides <= 0.09
free.sulfur.dioxide <= 82.5
total.sulfur.dioxide > 130
total.sulfur.dioxide <= 235
sulphates <= 0.64
alcohol > 9.1
alcohol <= 10.4
then
outcome = 15.1 + 0.35 alcohol - 3.09 volatile.acidity - 14.7 chlorides
+ 1.16 sulphates - 0.0022 total.sulfur.dioxide
+ 0.11 fixed.acidity + 0.45 pH + 0.5 citric.acid - 14 density
+ 0.006 residual.sugar
Rule 11: [31 cases, mean 5.6, range 3 to 8, est err 0.8]
if
volatile.acidity > 0.31
citric.acid > 0.36
free.sulfur.dioxide <= 30
total.sulfur.dioxide <= 97
then
outcome = 3.2 + 0.0584 total.sulfur.dioxide + 7.77 volatile.acidity
+ 0.328 alcohol - 9 density + 0.003 residual.sugar
Rule 12: [20 cases, mean 5.7, range 3 to 8, est err 0.9]
if
free.sulfur.dioxide > 82.5
total.sulfur.dioxide <= 235
sulphates <= 0.64
alcohol > 9.1
then
outcome = -8.9 + 109.3 chlorides + 0.948 alcohol
Rule 13: [331 cases, mean 5.8, range 4 to 8, est err 0.5]
if
volatile.acidity > 0.31
free.sulfur.dioxide <= 30
total.sulfur.dioxide > 97
alcohol > 9.1
then
outcome = 89.8 + 0.0234 free.sulfur.dioxide + 0.324 alcohol
+ 0.07 residual.sugar - 90 density - 1.47 volatile.acidity
+ 0.48 pH
Rule 14: [116 cases, mean 5.8, range 3 to 8, est err 0.6]
if
fixed.acidity > 7.8
volatile.acidity > 0.155
free.sulfur.dioxide > 30
total.sulfur.dioxide > 130
total.sulfur.dioxide <= 235
sulphates <= 0.64
alcohol > 9.1
then
outcome = 6 + 0.346 alcohol - 0.41 fixed.acidity - 1.69 volatile.acidity
- 2.9 chlorides + 0.19 sulphates + 0.07 pH
Rule 15: [115 cases, mean 5.8, range 4 to 7, est err 0.5]
if
volatile.acidity > 0.205
residual.sugar <= 17.85
density > 0.99839
alcohol <= 9.1
then
outcome = -110.2 + 120 density - 3.46 volatile.acidity - 0.97 pH
- 0.022 residual.sugar + 0.088 alcohol - 0.6 citric.acid
- 0.01 fixed.acidity
Rule 16: [986 cases, mean 5.9, range 3 to 9, est err 0.6]
if
volatile.acidity <= 0.31
free.sulfur.dioxide <= 30
alcohol > 9.1
then
outcome = 280.4 - 282 density + 0.128 residual.sugar
+ 0.0264 free.sulfur.dioxide - 3 volatile.acidity + 1.2 pH
+ 0.65 citric.acid + 0.09 fixed.acidity + 0.56 sulphates
+ 0.015 alcohol
Rule 17: [49 cases, mean 6.0, range 5 to 8, est err 0.5]
if
volatile.acidity > 0.155
residual.sugar > 8.8
free.sulfur.dioxide > 30
total.sulfur.dioxide <= 130
pH <= 3.26
alcohol > 9.1
then
outcome = 173.5 - 169 density + 0.055 alcohol + 0.38 sulphates
+ 0.002 residual.sugar
Rule 18: [114 cases, mean 6.1, range 3 to 9, est err 0.6]
if
volatile.acidity > 0.31
citric.acid <= 0.36
residual.sugar > 1.45
total.sulfur.dioxide <= 97
alcohol > 9.1
then
outcome = 302.3 - 305 density + 0.0128 total.sulfur.dioxide
+ 0.096 residual.sugar + 1.94 citric.acid + 1.05 pH
+ 0.17 fixed.acidity - 6.7 chlorides
+ 0.0022 free.sulfur.dioxide - 0.21 volatile.acidity
+ 0.013 alcohol + 0.09 sulphates
Rule 19: [145 cases, mean 6.1, range 5 to 8, est err 0.6]
if
volatile.acidity > 0.155
free.sulfur.dioxide > 30
total.sulfur.dioxide <= 195
sulphates > 0.64
then
outcome = 206 - 209 density + 0.069 residual.sugar + 0.38 fixed.acidity
+ 2.79 sulphates + 0.0155 free.sulfur.dioxide
- 0.0051 total.sulfur.dioxide - 1.71 citric.acid + 1.04 pH
Rule 20: [555 cases, mean 6.1, range 3 to 9, est err 0.6]
if
total.sulfur.dioxide > 130
total.sulfur.dioxide <= 235
sulphates <= 0.64
alcohol > 10.4
then
outcome = 108 + 0.276 alcohol - 109 density + 0.05 residual.sugar
+ 0.77 pH - 1.02 volatile.acidity - 4.2 chlorides
+ 0.78 sulphates + 0.08 fixed.acidity
+ 0.0016 free.sulfur.dioxide - 0.0003 total.sulfur.dioxide
Rule 21: [73 cases, mean 6.2, range 4 to 8, est err 0.4]
if
volatile.acidity > 0.155
citric.acid <= 0.28
residual.sugar <= 8.8
free.sulfur.dioxide > 30
total.sulfur.dioxide <= 130
pH <= 3.26
sulphates <= 0.64
alcohol > 9.1
then
outcome = 4.2 + 0.147 residual.sugar + 0.47 alcohol + 3.75 sulphates
- 2.5 volatile.acidity - 5 density
Rule 22: [244 cases, mean 6.3, range 4 to 8, est err 0.6]
if
citric.acid > 0.28
residual.sugar <= 8.8
free.sulfur.dioxide > 30
total.sulfur.dioxide <= 130
pH <= 3.26
then
outcome = 40.1 + 0.278 alcohol + 1.3 sulphates - 39 density
+ 0.017 residual.sugar + 0.001 total.sulfur.dioxide + 0.17 pH
+ 0.03 fixed.acidity
Rule 23: [106 cases, mean 6.3, range 4 to 8, est err 0.6]
if
volatile.acidity <= 0.155
free.sulfur.dioxide > 30
then
outcome = 139.1 - 138 density + 0.058 residual.sugar + 0.71 pH
+ 0.92 sulphates + 0.11 fixed.acidity - 0.73 volatile.acidity
+ 0.055 alcohol - 0.0012 total.sulfur.dioxide
+ 0.0007 free.sulfur.dioxide
Rule 24: [137 cases, mean 6.5, range 4 to 9, est err 0.6]
if
volatile.acidity > 0.155
free.sulfur.dioxide > 30
total.sulfur.dioxide <= 130
pH > 3.26
sulphates <= 0.64
alcohol > 9.1
then
outcome = 114.2 + 0.0142 total.sulfur.dioxide - 107 density
- 11.8 chlorides - 1.57 pH + 0.124 alcohol + 1.21 sulphates
+ 1.16 volatile.acidity + 0.021 residual.sugar
+ 0.04 fixed.acidity
Rule 25: [92 cases, mean 6.5, range 4 to 8, est err 0.6]
if
volatile.acidity <= 0.205
alcohol <= 9.1
then
outcome = -200.7 + 210 density + 5.88 volatile.acidity + 23.9 chlorides
- 2.83 citric.acid - 1.17 pH
Evaluation on training data (3750 cases):
Average |error| 0.5
Relative |error| 0.67
Correlation coefficient 0.66
Attribute usage:
Conds Model
84% 93% alcohol
80% 89% volatile.acidity
70% 61% free.sulfur.dioxide
63% 50% total.sulfur.dioxide
44% 70% sulphates
26% 44% chlorides
22% 76% fixed.acidity
16% 87% residual.sugar
11% 86% pH
11% 45% citric.acid
8% 97% density
Time: 0.2 secs
# generate predictions for the model
p.cubist <- predict(m.cubist, wine_test)
# summary statistics about the predictions
summary(p.cubist)
Min. 1st Qu. Median Mean 3rd Qu. Max.
3.677 5.416 5.906 5.848 6.238 7.393
# correlation between the predicted and true values
cor(p.cubist, wine_test$quality)
[1] 0.6201015
# mean absolute error of predicted and true values
# (uses a custom function defined above)
MAE(wine_test$quality, p.cubist)
[1] 0.5339725
#Conclusion:
#By recreating the linear regression solution using the Challenger2 and insurance datasets, I discovered a negative correlation between temperature and distress count, highlighting the importance of validating results with different methods. In Part 2, we delved into regression trees and model trees using the whitewines dataset, learning how these models capture non-linear relationships and enhance prediction accuracy. This activity not only improved my data analysis skills but also broadened my understanding of tree-based methods in predictive modeling. Overall, I enjoyed this class activity because I now better understand how tree-based methods can effectively capture non-linear relationships and improve prediction accuracy.
LS0tCnRpdGxlOiAiTGluZWFyIFJlZ3Jlc3Npb24gUGFydCAxIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCmBgYHtyfQpnZXR3ZCgpCiMgU2V0IHRoZSB3b3JraW5nIGRpcmVjdG9yeSB0byB0aGUgZm9sZGVyIGNvbnRhaW5pbmcgdGhlIGZpbGUKIyBSZWFkIHRoZSBDU1YgZmlsZQogIGxhdW5jaCA8LSByZWFkLmNzdigiY2hhbGxlbmdlcjIuY3N2IikKICBWaWV3KGxhdW5jaCkKICAjIGVzdGltYXRlIGJldGEgbWFudWFsbHkKYiA8LSBjb3YobGF1bmNoJHRlbXBlcmF0dXJlLCBsYXVuY2gkZGlzdHJlc3NfY3QpIC8gdmFyKGxhdW5jaCR0ZW1wZXJhdHVyZSkKYgojVGhpcyB2YWx1ZSBzdWdnZXN0cyBhIG5lZ2F0aXZlIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIHRlbXBlcmF0dXJlIGFuZCBkaXN0cmVzcyBjb3VudC4KIyBlc3RpbWF0ZSBhbHBoYSBtYW51YWxseQphIDwtIG1lYW4obGF1bmNoJGRpc3RyZXNzX2N0KSAtIGIgKiBtZWFuKGxhdW5jaCR0ZW1wZXJhdHVyZSkKYQojIGNhbGN1bGF0ZSB0aGUgY29ycmVsYXRpb24gb2YgbGF1bmNoIGRhdGEKciA8LSBjb3YobGF1bmNoJHRlbXBlcmF0dXJlLCBsYXVuY2gkZGlzdHJlc3NfY3QpIC8KICAgICAgIChzZChsYXVuY2gkdGVtcGVyYXR1cmUpICogc2QobGF1bmNoJGRpc3RyZXNzX2N0KSkKcgojIGNhbGN1bGF0ZSB0aGUgY29ycmVsYXRpb24gYmV0d2VlbiB0ZW1wZXJhdHVyZSBhbmQgZGlzdHJlc3MuIHdlIGRpZCBpdCBkaXJlY3RseSB1c2luZyB0aGlzIGNvZGUgYmVjYXVzZSBpdCB3YXMgdGhlIHNhbWUgbnVtYmVyIApjb3IobGF1bmNoJHRlbXBlcmF0dXJlLCBsYXVuY2gkZGlzdHJlc3NfY3QpCiN0aGlzIGlzIGEgbmVnYXRpdmUgY29ycmVsYXRpb24KIyBjb21wdXRpbmcgdGhlIHNsb3BlIHVzaW5nIGNvcnJlbGF0aW9uCnIgKiAoc2QobGF1bmNoJGRpc3RyZXNzX2N0KSAvIHNkKGxhdW5jaCR0ZW1wZXJhdHVyZSkpCnIKIyBjb25maXJtaW5nIHRoZSByZWdyZXNzaW9uIGxpbmUgdXNpbmcgdGhlIGxtIGZ1bmN0aW9uIChub3QgaW4gdGV4dCkKbW9kZWwgPC0gbG0oZGlzdHJlc3NfY3QgfiB0ZW1wZXJhdHVyZSwgZGF0YSA9IGxhdW5jaCkKbW9kZWwKI0hlbmNlLCB3ZSBjYW4gc2VlIHRoYXQgdGhlIHZhbHVlcyBnb3QgdGhyb3VnaCB0aGUgbGluZWFyIHJlZ3Jlc3Npb24gbW9kZWwgYXJlIHZlcnkgc2ltaWxhciB0byB0aGUgb25lIHRoYXQgd2UgZ290IG1hbnVhbGx5CnN1bW1hcnkobW9kZWwpCiMgY3JlYXRpbmcgYSBzaW1wbGUgbXVsdGlwbGUgcmVncmVzc2lvbiBmdW5jdGlvbgpyZWcgPC0gZnVuY3Rpb24oeSwgeCkgewogIHggPC0gYXMubWF0cml4KHgpCiAgeCA8LSBjYmluZChJbnRlcmNlcHQgPSAxLCB4KQogIGIgPC0gc29sdmUodCh4KSAlKiUgeCkgJSolIHQoeCkgJSolIHkKICBjb2xuYW1lcyhiKSA8LSAiZXN0aW1hdGUiCiAgcHJpbnQoYikKfQojIGV4YW1pbmUgdGhlIGxhdW5jaCBkYXRhCnN0cihsYXVuY2gpCiMgdGVzdCByZWdyZXNzaW9uIG1vZGVsIHdpdGggc2ltcGxlIGxpbmVhciByZWdyZXNzaW9uCnJlZyh5ID0gbGF1bmNoJGRpc3RyZXNzX2N0LCB4ID0gbGF1bmNoWzJdKQojIHVzZSByZWdyZXNzaW9uIG1vZGVsIHdpdGggbXVsdGlwbGUgcmVncmVzc2lvbgpyZWcoeSA9IGxhdW5jaCRkaXN0cmVzc19jdCwgeCA9IGxhdW5jaFsyOjRdKQojIGNvbmZpcm1pbmcgdGhlIG11bHRpcGxlIHJlZ3Jlc3Npb24gcmVzdWx0IHVzaW5nIHRoZSBsbSBmdW5jdGlvbiAobm90IGluIHRleHQpCm1vZGVsIDwtIGxtKGRpc3RyZXNzX2N0IH4gdGVtcGVyYXR1cmUgKyBmaWVsZF9jaGVja19wcmVzc3VyZSArIGZsaWdodF9udW0sIGRhdGEgPSBsYXVuY2gpCm1vZGVsCnN1bW1hcnkobW9kZWwpCiNEaXNwbGF5IHRoZSBzdW1tYXJ5IG9mIHRoZSBtdWx0aXBsZSByZWdyZXNzaW9uIG1vZGVsCiNJbiBjbGFzcywgd2UgZGlzY3Vzc2VkIGFuZCByZWFsaXplZCB0aGF0IGZsaWdodCBudW1iZXIgYW5kIGZpZWxkIGNoZWNrIHByZXNzdXJlIGFyZSBub3Qgc2lnbmlmaWNhbnQgdG8gdXMKCiNQcmVkaWN0aW5nIE1lZGljYWwgRXhwZW5zZXMKIyMgU3RlcCAyOiBFeHBsb3JpbmcgYW5kIHByZXBhcmluZyB0aGUgZGF0YSAtLS0tCmluc3VyYW5jZSA8LSByZWFkLmNzdigiaW5zdXJhbmNlLmNzdiIsIHN0cmluZ3NBc0ZhY3RvcnMgPSBUUlVFKQpzdHIoaW5zdXJhbmNlKQojIHN1bW1hcml6ZSB0aGUgY2hhcmdlcyB2YXJpYWJsZQpzdW1tYXJ5KGluc3VyYW5jZSRleHBlbnNlcykKIyBoaXN0b2dyYW0gb2YgaW5zdXJhbmNlIGNoYXJnZXMKaGlzdChpbnN1cmFuY2UkZXhwZW5zZXMpCiMgdGFibGUgb2YgcmVnaW9uCnRhYmxlKGluc3VyYW5jZSRyZWdpb24pCiMgZXhwbG9yaW5nIHJlbGF0aW9uc2hpcHMgYW1vbmcgZmVhdHVyZXM6IGNvcnJlbGF0aW9uIG1hdHJpeApjb3IoaW5zdXJhbmNlW2MoImFnZSIsICJibWkiLCAiY2hpbGRyZW4iLCAiZXhwZW5zZXMiKV0pCiMgdmlzdWFsaW5nIHJlbGF0aW9uc2hpcHMgYW1vbmcgZmVhdHVyZXM6IHNjYXR0ZXJwbG90IG1hdHJpeApwYWlycyhpbnN1cmFuY2VbYygiYWdlIiwgImJtaSIsICJjaGlsZHJlbiIsICJleHBlbnNlcyIpXSkKIyMgU3RlcCAzOiBUcmFpbmluZyBhIG1vZGVsIG9uIHRoZSBkYXRhIC0tLS0KaW5zX21vZGVsIDwtIGxtKGV4cGVuc2VzIH4gYWdlICsgY2hpbGRyZW4gKyBibWkgKyBzZXggKyBzbW9rZXIgKyByZWdpb24sCiAgICAgICAgICAgICAgICBkYXRhID0gaW5zdXJhbmNlKQppbnNfbW9kZWwgPC0gbG0oZXhwZW5zZXMgfiAuLCBkYXRhID0gaW5zdXJhbmNlKSAjIHRoaXMgaXMgZXF1aXZhbGVudCB0byBhYm92ZQoKIyBzZWUgdGhlIGVzdGltYXRlZCBiZXRhIGNvZWZmaWNpZW50cwppbnNfbW9kZWwKI1N0ZXAgNDogRXZhbHVhdGluZyBtb2RlbCBwZXJmb3JtYW5jZQojIHNlZSBtb3JlIGRldGFpbCBhYm91dCB0aGUgZXN0aW1hdGVkIGJldGEgY29lZmZpY2llbnRzCnN1bW1hcnkoaW5zX21vZGVsKQojU3RlcCA1OiBJbXByb3ZpbmcgbW9kZWwgcGVyZm9ybWFuY2UKIyBhZGQgYSBoaWdoZXItb3JkZXIgImFnZSIgdGVybQppbnN1cmFuY2UkYWdlMiA8LSBpbnN1cmFuY2UkYWdlXjIKIyBhZGQgYW4gaW5kaWNhdG9yIGZvciBCTUkgPj0gMzAKaW5zdXJhbmNlJGJtaTMwIDwtIGlmZWxzZShpbnN1cmFuY2UkYm1pID49IDMwLCAxLCAwKQojIGNyZWF0ZSBmaW5hbCBtb2RlbAppbnNfbW9kZWwyIDwtIGxtKGV4cGVuc2VzIH4gYWdlICsgYWdlMiArIGNoaWxkcmVuICsgYm1pICsgc2V4ICsKICAgICAgICAgICAgICAgICAgIGJtaTMwKnNtb2tlciArIHJlZ2lvbiwgZGF0YSA9IGluc3VyYW5jZSkKc3VtbWFyeShpbnNfbW9kZWwyKQojIG1ha2luZyBwcmVkaWN0aW9ucyB3aXRoIHRoZSByZWdyZXNzaW9uIG1vZGVsCmluc3VyYW5jZSRwcmVkIDwtIHByZWRpY3QoaW5zX21vZGVsMiwgaW5zdXJhbmNlKQpjb3IoaW5zdXJhbmNlJHByZWQsIGluc3VyYW5jZSRleHBlbnNlcykKcGxvdChpbnN1cmFuY2UkcHJlZCwgaW5zdXJhbmNlJGV4cGVuc2VzKQphYmxpbmUoYSA9IDAsIGIgPSAxLCBjb2wgPSAicmVkIiwgbHdkID0gMywgbHR5ID0gMikKcHJlZGljdChpbnNfbW9kZWwyLAogICAgICAgIGRhdGEuZnJhbWUoYWdlID0gMzAsIGFnZTIgPSAzMF4yLCBjaGlsZHJlbiA9IDIsCiAgICAgICAgICAgICAgICAgICBibWkgPSAzMCwgc2V4ID0gIm1hbGUiLCBibWkzMCA9IDEsCiAgICAgICAgICAgICAgICAgICBzbW9rZXIgPSAibm8iLCByZWdpb24gPSAibm9ydGhlYXN0IikpCnByZWRpY3QoaW5zX21vZGVsMiwKICAgICAgICBkYXRhLmZyYW1lKGFnZSA9IDMwLCBhZ2UyID0gMzBeMiwgY2hpbGRyZW4gPSAyLAogICAgICAgICAgICAgICAgICAgYm1pID0gMzAsIHNleCA9ICJmZW1hbGUiLCBibWkzMCA9IDEsCiAgICAgICAgICAgICAgICAgICBzbW9rZXIgPSAibm8iLCByZWdpb24gPSAibm9ydGhlYXN0IikpCnByZWRpY3QoaW5zX21vZGVsMiwKICAgICAgICBkYXRhLmZyYW1lKGFnZSA9IDMwLCBhZ2UyID0gMzBeMiwgY2hpbGRyZW4gPSAwLAogICAgICAgICAgICAgICAgICAgYm1pID0gMzAsIHNleCA9ICJmZW1hbGUiLCBibWkzMCA9IDEsCiAgICAgICAgICAgICAgICAgICBzbW9rZXIgPSAibm8iLCByZWdpb24gPSAibm9ydGhlYXN0IikpCmBgYAoKYGBge3J9CiNQYXJ0IDI6IFJlZ3Jlc3Npb24gVHJlZXMgYW5kIE1vZGVsIFRyZWVzCiMgc2V0IHVwIHRoZSBkYXRhCnRlZSA8LSBjKDEsIDEsIDEsIDIsIDIsIDMsIDQsIDUsIDUsIDYsIDYsIDcsIDcsIDcsIDcpCmF0MSA8LSBjKDEsIDEsIDEsIDIsIDIsIDMsIDQsIDUsIDUpCmF0MiA8LSBjKDYsIDYsIDcsIDcsIDcsIDcpCmJ0MSA8LSBjKDEsIDEsIDEsIDIsIDIsIDMsIDQpCmJ0MiA8LSBjKDUsIDUsIDYsIDYsIDcsIDcsIDcsIDcpCiMgY29tcHV0ZSB0aGUgU0RSCnNkcl9hIDwtIHNkKHRlZSkgLSAobGVuZ3RoKGF0MSkgLyBsZW5ndGgodGVlKSAqIHNkKGF0MSkgKyBsZW5ndGgoYXQyKSAvIGxlbmd0aCh0ZWUpICogc2QoYXQyKSkKc2RyX2IgPC0gc2QodGVlKSAtIChsZW5ndGgoYnQxKSAvIGxlbmd0aCh0ZWUpICogc2QoYnQxKSArIGxlbmd0aChidDIpIC8gbGVuZ3RoKHRlZSkgKiBzZChidDIpKQojIGNvbXBhcmUgdGhlIFNEUiBmb3IgZWFjaCBzcGxpdApzZHJfYQpzZHJfYgojRXhlcmNpc2UgTm8gMzogRXN0aW1hdGluZyBXaW5lIFF1YWxpdHkKI1N0ZXAgMjogRXhwbG9yaW5nIGFuZCBwcmVwYXJpbmcgdGhlIGRhdGEKd2luZSA8LSByZWFkLmNzdigid2hpdGV3aW5lcy5jc3YiKQojIGV4YW1pbmUgdGhlIHdpbmUgZGF0YQpzdHIod2luZSkKIyB0aGUgZGlzdHJpYnV0aW9uIG9mIHF1YWxpdHkgcmF0aW5ncwpoaXN0KHdpbmUkcXVhbGl0eSkKIyBzdW1tYXJ5IHN0YXRpc3RpY3Mgb2YgdGhlIHdpbmUgZGF0YQpzdW1tYXJ5KHdpbmUpCndpbmVfdHJhaW4gPC0gd2luZVsxOjM3NTAsIF0Kd2luZV90ZXN0IDwtIHdpbmVbMzc1MTo0ODk4LCBdCiNTdGVwIDM6IFRyYWluaW5nIGEgbW9kZWwgb24gdGhlIGRhdGEKIyByZWdyZXNzaW9uIHRyZWUgdXNpbmcgcnBhcnQKbGlicmFyeShycGFydCkKbS5ycGFydCA8LSBycGFydChxdWFsaXR5IH4gLiwgZGF0YSA9IHdpbmVfdHJhaW4pCiMgZ2V0IGJhc2ljIGluZm9ybWF0aW9uIGFib3V0IHRoZSB0cmVlCm0ucnBhcnQKIyBnZXQgbW9yZSBkZXRhaWxlZCBpbmZvcm1hdGlvbiBhYm91dCB0aGUgdHJlZQpzdW1tYXJ5KG0ucnBhcnQpCmluc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKQojIHVzZSB0aGUgcnBhcnQucGxvdCBwYWNrYWdlIHRvIGNyZWF0ZSBhIHZpc3VhbGl6YXRpb24KbGlicmFyeShycGFydC5wbG90KQojIGEgYmFzaWMgZGVjaXNpb24gdHJlZSBkaWFncmFtCnJwYXJ0LnBsb3QobS5ycGFydCwgZGlnaXRzID0gMykKIyBhIGZldyBhZGp1c3RtZW50cyB0byB0aGUgZGlhZ3JhbQpycGFydC5wbG90KG0ucnBhcnQsIGRpZ2l0cyA9IDQsIGZhbGxlbi5sZWF2ZXMgPSBUUlVFLCB0eXBlID0gMywgZXh0cmEgPSAxMDEpCiNTdGVwIDQ6IEV2YWx1YXRlIG1vZGVsIHBlcmZvcm1hbmNlCiMgZ2VuZXJhdGUgcHJlZGljdGlvbnMgZm9yIHRoZSB0ZXN0aW5nIGRhdGFzZXQKcC5ycGFydCA8LSBwcmVkaWN0KG0ucnBhcnQsIHdpbmVfdGVzdCkKIyBjb21wYXJlIHRoZSBkaXN0cmlidXRpb24gb2YgcHJlZGljdGVkIHZhbHVlcyB2cy4gYWN0dWFsIHZhbHVlcwpzdW1tYXJ5KHAucnBhcnQpCnN1bW1hcnkod2luZV90ZXN0JHF1YWxpdHkpCiMgY29tcGFyZSB0aGUgY29ycmVsYXRpb24KY29yKHAucnBhcnQsIHdpbmVfdGVzdCRxdWFsaXR5KQojIGZ1bmN0aW9uIHRvIGNhbGN1bGF0ZSB0aGUgbWVhbiBhYnNvbHV0ZSBlcnJvcgpNQUUgPC0gZnVuY3Rpb24oYWN0dWFsLCBwcmVkaWN0ZWQpIHsKICBtZWFuKGFicyhhY3R1YWwgLSBwcmVkaWN0ZWQpKSAgCn0KIyBtZWFuIGFic29sdXRlIGVycm9yIGJldHdlZW4gcHJlZGljdGVkIGFuZCBhY3R1YWwgdmFsdWVzCk1BRShwLnJwYXJ0LCB3aW5lX3Rlc3QkcXVhbGl0eSkKIyBtZWFuIGFic29sdXRlIGVycm9yIGJldHdlZW4gYWN0dWFsIHZhbHVlcyBhbmQgbWVhbiB2YWx1ZQptZWFuKHdpbmVfdHJhaW4kcXVhbGl0eSkgIyByZXN1bHQgPSA1Ljg3Ck1BRSg1Ljg3LCB3aW5lX3Rlc3QkcXVhbGl0eSkKI1N0ZXAgNTogSW1wcm92aW5nIG1vZGVsIHBlcmZvcm1hbmNlCmluc3RhbGwucGFja2FnZXMoInBseXIiKQppbnN0YWxsLnBhY2thZ2VzKCJDdWJpc3QiKQojIHRyYWluIGEgQ3ViaXN0IE1vZGVsIFRyZWUKbGlicmFyeShDdWJpc3QpCm0uY3ViaXN0IDwtIGN1YmlzdCh4ID0gd2luZV90cmFpblstMTJdLCB5ID0gd2luZV90cmFpbiRxdWFsaXR5KQojIGRpc3BsYXkgYmFzaWMgaW5mb3JtYXRpb24gYWJvdXQgdGhlIG1vZGVsIHRyZWUKbS5jdWJpc3QKIyBkaXNwbGF5IHRoZSB0cmVlIGl0c2VsZgpzdW1tYXJ5KG0uY3ViaXN0KQojIGdlbmVyYXRlIHByZWRpY3Rpb25zIGZvciB0aGUgbW9kZWwKcC5jdWJpc3QgPC0gcHJlZGljdChtLmN1YmlzdCwgd2luZV90ZXN0KQojIHN1bW1hcnkgc3RhdGlzdGljcyBhYm91dCB0aGUgcHJlZGljdGlvbnMKc3VtbWFyeShwLmN1YmlzdCkKIyBjb3JyZWxhdGlvbiBiZXR3ZWVuIHRoZSBwcmVkaWN0ZWQgYW5kIHRydWUgdmFsdWVzCmNvcihwLmN1YmlzdCwgd2luZV90ZXN0JHF1YWxpdHkpCiMgbWVhbiBhYnNvbHV0ZSBlcnJvciBvZiBwcmVkaWN0ZWQgYW5kIHRydWUgdmFsdWVzCiMgKHVzZXMgYSBjdXN0b20gZnVuY3Rpb24gZGVmaW5lZCBhYm92ZSkKTUFFKHdpbmVfdGVzdCRxdWFsaXR5LCBwLmN1YmlzdCkgCgojQ29uY2x1c2lvbjoKI0J5IHJlY3JlYXRpbmcgdGhlIGxpbmVhciByZWdyZXNzaW9uIHNvbHV0aW9uIHVzaW5nIHRoZSBDaGFsbGVuZ2VyMiBhbmQgaW5zdXJhbmNlIGRhdGFzZXRzLCBJIGRpc2NvdmVyZWQgYSBuZWdhdGl2ZSBjb3JyZWxhdGlvbiBiZXR3ZWVuIHRlbXBlcmF0dXJlIGFuZCBkaXN0cmVzcyBjb3VudCwgaGlnaGxpZ2h0aW5nIHRoZSBpbXBvcnRhbmNlIG9mIHZhbGlkYXRpbmcgcmVzdWx0cyB3aXRoIGRpZmZlcmVudCBtZXRob2RzLiBJbiBQYXJ0IDIsIHdlIGRlbHZlZCBpbnRvIHJlZ3Jlc3Npb24gdHJlZXMgYW5kIG1vZGVsIHRyZWVzIHVzaW5nIHRoZSB3aGl0ZXdpbmVzIGRhdGFzZXQsIGxlYXJuaW5nIGhvdyB0aGVzZSBtb2RlbHMgY2FwdHVyZSBub24tbGluZWFyIHJlbGF0aW9uc2hpcHMgYW5kIGVuaGFuY2UgcHJlZGljdGlvbiBhY2N1cmFjeS4gVGhpcyBhY3Rpdml0eSBub3Qgb25seSBpbXByb3ZlZCBteSBkYXRhIGFuYWx5c2lzIHNraWxscyBidXQgYWxzbyBicm9hZGVuZWQgbXkgdW5kZXJzdGFuZGluZyBvZiB0cmVlLWJhc2VkIG1ldGhvZHMgaW4gcHJlZGljdGl2ZSBtb2RlbGluZy4gT3ZlcmFsbCwgSSBlbmpveWVkIHRoaXMgY2xhc3MgYWN0aXZpdHkgYmVjYXVzZSBJIG5vdyBiZXR0ZXIgdW5kZXJzdGFuZCBob3cgdHJlZS1iYXNlZCBtZXRob2RzIGNhbiBlZmZlY3RpdmVseSBjYXB0dXJlIG5vbi1saW5lYXIgcmVsYXRpb25zaGlwcyBhbmQgaW1wcm92ZSBwcmVkaWN0aW9uIGFjY3VyYWN5LgpgYGA=