# Load necessary libraries
library(ISLR)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──
## ✔ broom 1.0.5 ✔ recipes 1.0.10
## ✔ dials 1.2.1 ✔ rsample 1.2.0
## ✔ dplyr 1.1.4 ✔ tibble 3.2.1
## ✔ ggplot2 3.5.0 ✔ tidyr 1.3.1
## ✔ infer 1.0.6 ✔ tune 1.1.2
## ✔ modeldata 1.3.0 ✔ workflows 1.1.4
## ✔ parsnip 1.2.0 ✔ workflowsets 1.0.1
## ✔ purrr 1.0.2 ✔ yardstick 1.3.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ recipes::step() masks stats::step()
## • Learn how to get started at https://www.tidymodels.org/start/
library(ggplot2)
library(datasets)
# Load the Auto dataset
data(Auto)
# Check the structure of the Auto dataset
str(Auto)
## 'data.frame': 392 obs. of 9 variables:
## $ mpg : num 18 15 18 16 17 15 14 14 14 15 ...
## $ cylinders : num 8 8 8 8 8 8 8 8 8 8 ...
## $ displacement: num 307 350 318 304 302 429 454 440 455 390 ...
## $ horsepower : num 130 165 150 150 140 198 220 215 225 190 ...
## $ weight : num 3504 3693 3436 3433 3449 ...
## $ acceleration: num 12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
## $ year : num 70 70 70 70 70 70 70 70 70 70 ...
## $ origin : num 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
# (a) Perform simple linear regression and summarize results
model <- lm(mpg ~ horsepower, data = Auto)
summary(model)
##
## Call:
## lm(formula = mpg ~ horsepower, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5710 -3.2592 -0.3435 2.7630 16.9240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.935861 0.717499 55.66 <2e-16 ***
## horsepower -0.157845 0.006446 -24.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.906 on 390 degrees of freedom
## Multiple R-squared: 0.6059, Adjusted R-squared: 0.6049
## F-statistic: 599.7 on 1 and 390 DF, p-value: < 2.2e-16
# (i) Is there a relationship between the predictor and the response?
# In the summary output, look at the p-value associated with the horsepower coefficient.
# If the p-value is less than the chosen significance level (e.g., 0.05), there is a significant relationship.
# (ii) How strong is the relationship between the predictor and the response?
# Look at the R-squared value. R-squared close to 1 indicates a strong relationship.
# (iii) Is the relationship between the predictor and the response positive or negative?
# Check the sign of the coefficient associated with the horsepower predictor.
# (iv) What is the predicted mpg associated with a horsepower of 98? What are the associated 95% confidence and prediction intervals?
# Use the predict() function to obtain the predicted mpg, confidence interval, and prediction interval.
# (b) Plot the response and the predictor with regression line
ggplot(Auto, aes(x = horsepower, y = mpg)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(x = "Horsepower", y = "MPG", title = "Scatterplot of MPG vs Horsepower")
## `geom_smooth()` using formula = 'y ~ x'

# (c) Diagnostic plots of the regression fit
plot(model)



