library(alr4); library(ggplot2)
## Warning: package 'car' was built under R version 4.3.3
## Warning: package 'carData' was built under R version 4.3.3
## Warning in check_dep_version(): ABI version mismatch: 
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package

a

dd = alr4::snake

lm_fit = lm(Y ~ X, data = dd)

beta0 = coef(lm_fit)[1]  # Intercept
beta1 = coef(lm_fit)[2] #slope

beta0
## (Intercept) 
##   0.7253804
beta1
##         X 
## 0.4980812
ggplot(dd, aes(x = X, y = Y)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Scatter Plot with Regression Line",
       x = "Water Content of Snow",
       y = "Water Yield") +
  theme_minimal()

b

residuals_sum = sum(residuals(lm_fit))

residuals_sum
## [1] -6.661338e-16

This is near zero so this does confirm the expected.

c

anova_result = anova(lm_fit)
p_value_f = anova_result[1, "Pr(>F)"]
p_value_f
## [1] 4.63157e-08

The p_value that we derived from the is <0.05, meaning we reject the NULL hypothesis.

d

summary_lm_fit = summary(lm_fit)
p_value_t = summary_lm_fit$coefficients[2, "Pr(>|t|)"]
p_value_t
## [1] 4.63157e-08

This p_value is also <0.05 so we can reject the null hypothesis.

e

f_statistic = anova_result[1, "F value"]
t_statistic = summary_lm_fit$coefficients[2, "t value"]

f_statistic
## [1] 101.16
t_statistic
## [1] 10.05783
t_statistic^2
## [1] 101.16
all.equal(f_statistic, t_statistic^2)
## [1] TRUE

f

X0 = data.frame(X = 42)

confidence_interval = predict(lm_fit, X0, interval = "confidence")
prediction_interval = predict(lm_fit, X0, interval = "prediction")

confidence_interval
##        fit    lwr      upr
## 1 21.64479 20.098 23.19158
prediction_interval
##        fit      lwr      upr
## 1 21.64479 17.62093 25.66865

The confidence interval is narrower than the prediction interval because the prediction interval accounts for error. The intervals here are relatively small meaning there is a fair level of certainty with this correlation. The furthing X0 is from the mean the wider the intervals would be because it adds more uncertainty to the data.