library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
summary(cars$speed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 12.0 15.0 15.4 19.0 25.0
summary(cars$dist)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 26.00 36.00 42.98 56.00 120.00
# scattered plot
plot(cars$speed, cars$dist, xlab='Speed (mph)', ylab='Stopping Distance (ft)', main='Stopping Distance vs. Speed')
### creating linear model
x <- cars$speed # car speed
y <- cars$dist # stopping distance
cars_lm <- lm(y ~ x) # linear model
qplot(x, y, ylab="Stopping Distance (ft)", xlab="Speed (mph)", main="Cars Speed vs. Stopping Distance", ymin=-10) +
geom_abline(intercept = cars_lm$coefficients[1], slope = cars_lm$coefficients[2])
summary(cars_lm)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.069 -9.525 -2.272 9.215 43.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.5791 6.7584 -2.601 0.0123 *
## x 3.9324 0.4155 9.464 1.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
## F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
ggplot(cars_lm, aes(.fitted, .resid)) + geom_point(color = "red", size=2) +labs(title = "Fitted Values vs Residuals") +labs(x = "Fitted Values") +labs(y = "Residuals")
qqnorm(resid(cars_lm))
qqline(resid(cars_lm))