1 Question 1

fit <- lm(y ~ (x1 + x2 + x3 + x4)^2, data = dat)
names(coef(fit))[is.na(coef(fit))]
## [1] "x1:x3" "x2:x3"

2 Question 2

summary(fit)$r.squared
## [1] 0.7496145

3 Question 3-4

summary(fit)$fstatistic
##   value   numdf   dendf 
## 19.8342  8.0000 53.0000

4 Question 5-7

fit_red  <- lm(y ~ x1 + x2 + x3 + x4, data = dat)
fit_full <- lm(y ~ (x1 + x2 + x3 + x4)^2, data = dat)
anova(fit_red, fit_full)
## Analysis of Variance Table
## 
## Model 1: y ~ x1 + x2 + x3 + x4
## Model 2: y ~ (x1 + x2 + x3 + x4)^2
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     57 1432.8                              
## 2     53 1162.4  4    270.37 3.0819 0.02352 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5 Question 8

fit_full_ok <- update(fit_full, . ~ . - x1:x3 - x2:x3)
fit_step <- stepAIC(fit_full_ok, direction = "backward", trace = FALSE)
attr(terms(fit_step), "term.labels")
## [1] "x2"    "x3"    "x4"    "x2:x4"

6 Question 9

fit_best <- lm(y ~ x2 + x3 + x4 + x2:x4, data = dat)
summary(fit_best)$adj.r.squared
## [1] 0.714905

7 Question 10-11

coef(summary(fit_best))["x2:x4", ]
##     Estimate   Std. Error      t value     Pr(>|t|) 
## -0.304718256  0.090556493 -3.364952043  0.001374861

8 Question 12

new_point <- data.frame(x2 = 10, x3 = 0.5, x4 = 0.75)
predict(fit_best, new_point)
##        1 
## 27.44168

9 Question 13-14

pt1 <- data.frame(x2 = 10, x3 = 0.5, x4 = 0.75)
ci_mean <- predict(fit_best, newdata = pt1, interval = "confidence", level = 0.95)
ci_mean
##        fit      lwr      upr
## 1 27.44168 24.00618 30.87717

10 Question 15

pt2 <- data.frame(x2 = 3, x3 = 0.25, x4 = 0.85)
predict(fit_best, pt2)
##        1 
## 18.61092

11 Question 16

pi_pt2 <- predict(fit_best, newdata = pt2, interval = "prediction", level = 0.95)
pi_pt2
##        fit      lwr      upr
## 1 18.61092 8.860183 28.36165