Using the “cars” dataset in R, build a linear model for stopping distance as a function of speed and replicate the analysis of your textbook chapter 3 (visualization, quality evaluation of the model, and residual analysis.)
library(tidyverse)
head(cars)
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
*** Visualization
Scatter plot
plot(cars[,"speed"], cars[,"dist"], main='CARS', xlab='speed', ylab='dist')
*** Scatter plot including regresssion line
cars %>%
ggplot(aes(speed, dist)) +
geom_point() +
geom_smooth(method = lm, se = F) +
labs(title = "Cars",
x = "Speed", y = "Distance") +
theme_minimal()
cars_lm = lm(speed ~ dist, data = cars)
cars_lm
##
## Call:
## lm(formula = speed ~ dist, data = cars)
##
## Coefficients:
## (Intercept) dist
## 8.2839 0.1656
*** Quality Evaluation
** Residual Analysis
cars_lm %>%
ggplot(aes(fitted(cars_lm), resid(cars_lm))) +
geom_point() +
geom_smooth(method = lm, se =F) +
labs(title = "Residual Analysis",
x = "Fitted Line", y = "Residuals") +
theme_minimal()
*** Quantile to Quantile (QQ) plot
cars_lm = lm(speed ~ dist, data = cars)
qqnorm(resid(cars_lm))
qqline(resid(cars_lm))