library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
glimpse(cars)
## Observations: 50
## Variables: 2
## $ speed <dbl> 4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 1…
## $ dist <dbl> 2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, 28, 26, 3…
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
dim(cars)
## [1] 50 2
ggpairs(data = cars, title = "Cars Data")
ggcorr(data = cars, label = TRUE)
\(H_0\) : Speed and Dist are not related to each other. \(H_A\) : Speed and Dist have some relationship.
m_dist_spd <- lm(dist ~ speed, data = cars)
# model summary
summary(m_dist_spd)
##
## Call:
## lm(formula = dist ~ speed, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.069 -9.525 -2.272 9.215 43.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.5791 6.7584 -2.601 0.0123 *
## speed 3.9324 0.4155 9.464 1.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
## F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
# draw model
m_dist_spd %>%
ggplot(aes(speed, dist)) +
geom_point() +
geom_smooth(method = lm, se = F)
## `geom_smooth()` using formula 'y ~ x'
# residuals
m_dist_spd %>%
ggplot(aes(fitted(m_dist_spd), resid(m_dist_spd))) +
geom_point() +
geom_smooth(method = lm, se =F) +
labs(title = "Residual Analysis",
x = "Fitted Line", y = "Residuals") +
theme_minimal()
## `geom_smooth()` using formula 'y ~ x'
# residuals histogram
m_dist_spd %>%
ggplot(aes(m_dist_spd$residuals)) +
geom_histogram(binwidth = 1)
# qq plot
m_dist_spd %>%
ggplot(aes(sample = resid(m_dist_spd))) +
stat_qq() +
stat_qq_line() +
labs(title = "Q-Q Plot") +
theme_minimal()