Import Data and Libraries and conduct linear regression

library(ISLR)
library(ggplot2)

df = as.data.frame(Auto)

model1 = lm(weight~horsepower, data = df)

summary(model1)
## 
## Call:
## lm(formula = weight ~ horsepower, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2191.1  -297.7   -80.1   330.8  1150.8 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 984.5003    62.5143   15.75   <2e-16 ***
## horsepower   19.0782     0.5616   33.97   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 427.4 on 390 degrees of freedom
## Multiple R-squared:  0.7474, Adjusted R-squared:  0.7468 
## F-statistic:  1154 on 1 and 390 DF,  p-value: < 2.2e-16

Check Correlation

corr <- cor.test(df$weight,df$horsepower)
corr
## 
##  Pearson's product-moment correlation
## 
## data:  df$weight and df$horsepower
## t = 33.972, df = 390, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8371778 0.8875815
## sample estimates:
##       cor 
## 0.8645377

Check Relationship

model1$coefficients
## (Intercept)  horsepower 
##   984.50033    19.07816

Make a Prediction

new_data <- data.frame(horsepower = 150)

predict(model1,
        new_data, 
        interval = 'prediction', 
        level = 0.97)
##        fit      lwr      upr
## 1 3846.225 2912.409 4780.041

Plot the Regression

Here is a plot of the 97% confidence interval and prediction interval.

df_copy <- df
pred <- predict(model1,interval = 'prediction')
pred_df <- cbind(df_copy,pred)

sp2 <- ggplot(pred_df, aes(x = horsepower , y = weight))+
  theme_light()+
  geom_point(size = 1, alpha = 0.5)+
  geom_smooth(method=lm,level = 0.97, 
              aes(color = 'Line of Best Fit', fill = 'Confidence Interval'))+
  geom_line(aes(y = lwr, color = 'lwr'), 
            col = 'red',linetype='dashed')+
  geom_line(aes(y = upr, color = 'upr'), 
            col = 'green',linetype='dashed')+
  scale_fill_manual(NULL, 
                    values = c('Confidence Interval'='grey')) +
  scale_color_manual(NULL, 
                     values = c('Line of Best Fit'='#6495ED','upr'='green','lwr'='red')) +
  theme(legend.position = "bottom",
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 3),
        plot.title = element_text(size = 12)
        )+
  ggtitle("Linear Regression of Horsepower on Weight with Confidence & Prediction Intervals")+
  xlab("Horsepower")+
  ylab("Weight")+
  xlim(min(pred_df$horsepower), max(pred_df$horsepower))+
  ylim(min(pred_df$weight), max(pred_df$weight))

sp2