Question 1
fit <- lm(y ~ (x1 + x2 + x3 + x4)^2, data = dat)
names(coef(fit))[is.na(coef(fit))]
## [1] "x1:x3" "x2:x3"
Question 2
summary(fit)$r.squared
## [1] 0.7496145
Question 3-4
summary(fit)$fstatistic
## value numdf dendf
## 19.8342 8.0000 53.0000
Question 5-7
fit_red <- lm(y ~ x1 + x2 + x3 + x4, data = dat)
fit_full <- lm(y ~ (x1 + x2 + x3 + x4)^2, data = dat)
anova(fit_red, fit_full)
## Analysis of Variance Table
##
## Model 1: y ~ x1 + x2 + x3 + x4
## Model 2: y ~ (x1 + x2 + x3 + x4)^2
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 57 1432.8
## 2 53 1162.4 4 270.37 3.0819 0.02352 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Question 8
fit_full_ok <- update(fit_full, . ~ . - x1:x3 - x2:x3)
fit_step <- stepAIC(fit_full_ok, direction = "backward", trace = FALSE)
attr(terms(fit_step), "term.labels")
## [1] "x2" "x3" "x4" "x2:x4"
Question 9
fit_best <- lm(y ~ x2 + x3 + x4 + x2:x4, data = dat)
summary(fit_best)$adj.r.squared
## [1] 0.714905
Question 10-11
coef(summary(fit_best))["x2:x4", ]
## Estimate Std. Error t value Pr(>|t|)
## -0.304718256 0.090556493 -3.364952043 0.001374861
Question 12
new_point <- data.frame(x2 = 10, x3 = 0.5, x4 = 0.75)
predict(fit_best, new_point)
## 1
## 27.44168
Question 13-14
pt1 <- data.frame(x2 = 10, x3 = 0.5, x4 = 0.75)
ci_mean <- predict(fit_best, newdata = pt1, interval = "confidence", level = 0.95)
ci_mean
## fit lwr upr
## 1 27.44168 24.00618 30.87717
Question 15
pt2 <- data.frame(x2 = 3, x3 = 0.25, x4 = 0.85)
predict(fit_best, pt2)
## 1
## 18.61092
Question 16
pi_pt2 <- predict(fit_best, newdata = pt2, interval = "prediction", level = 0.95)
pi_pt2
## fit lwr upr
## 1 18.61092 8.860183 28.36165