# Load necessary libraries
library(ISLR)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──
## ✔ broom        1.0.5      ✔ recipes      1.0.10
## ✔ dials        1.2.1      ✔ rsample      1.2.0 
## ✔ dplyr        1.1.4      ✔ tibble       3.2.1 
## ✔ ggplot2      3.5.0      ✔ tidyr        1.3.1 
## ✔ infer        1.0.6      ✔ tune         1.1.2 
## ✔ modeldata    1.3.0      ✔ workflows    1.1.4 
## ✔ parsnip      1.2.0      ✔ workflowsets 1.0.1 
## ✔ purrr        1.0.2      ✔ yardstick    1.3.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ recipes::step()  masks stats::step()
## • Learn how to get started at https://www.tidymodels.org/start/
library(ggplot2)
library(datasets)

# Load the Auto dataset
data(Auto)

# Check the structure of the Auto dataset
str(Auto)
## 'data.frame':    392 obs. of  9 variables:
##  $ mpg         : num  18 15 18 16 17 15 14 14 14 15 ...
##  $ cylinders   : num  8 8 8 8 8 8 8 8 8 8 ...
##  $ displacement: num  307 350 318 304 302 429 454 440 455 390 ...
##  $ horsepower  : num  130 165 150 150 140 198 220 215 225 190 ...
##  $ weight      : num  3504 3693 3436 3433 3449 ...
##  $ acceleration: num  12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
##  $ year        : num  70 70 70 70 70 70 70 70 70 70 ...
##  $ origin      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ name        : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
# (a) Perform simple linear regression and summarize results
model <- lm(mpg ~ horsepower, data = Auto)
summary(model)
## 
## Call:
## lm(formula = mpg ~ horsepower, data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.5710  -3.2592  -0.3435   2.7630  16.9240 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 39.935861   0.717499   55.66   <2e-16 ***
## horsepower  -0.157845   0.006446  -24.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.906 on 390 degrees of freedom
## Multiple R-squared:  0.6059, Adjusted R-squared:  0.6049 
## F-statistic: 599.7 on 1 and 390 DF,  p-value: < 2.2e-16
# (i) Is there a relationship between the predictor and the response?
# In the summary output, look at the p-value associated with the horsepower coefficient. 
# If the p-value is less than the chosen significance level (e.g., 0.05), there is a significant relationship.

# (ii) How strong is the relationship between the predictor and the response?
# Look at the R-squared value. R-squared close to 1 indicates a strong relationship.

# (iii) Is the relationship between the predictor and the response positive or negative?
# Check the sign of the coefficient associated with the horsepower predictor.

# (iv) What is the predicted mpg associated with a horsepower of 98? What are the associated 95% confidence and prediction intervals?
# Use the predict() function to obtain the predicted mpg, confidence interval, and prediction interval.

# (b) Plot the response and the predictor with regression line
ggplot(Auto, aes(x = horsepower, y = mpg)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(x = "Horsepower", y = "MPG", title = "Scatterplot of MPG vs Horsepower")
## `geom_smooth()` using formula = 'y ~ x'

# (c) Diagnostic plots of the regression fit
plot(model)