library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(modelr)
library(broom)
##
## Attaching package: 'broom'
## The following object is masked from 'package:modelr':
##
## bootstrap
# Import wages here
# Fall back in case you cannot load wages
wages <- heights %>%
filter(income > 0) %>%
mutate(marital = as.character(marital),
sex = as.character(sex))
04-Models
.wages.xlsx
as wages and copy the code to your setup chunk.Fit the model on the slide and then examine the output. What does it look like?
mod_e <- lm(log(income) ~ education, data = wages)
#We have our regression w/ coefficients, etc.
Use a pipe to model log(income)
against height
. Then use broom and dplyr functions to extract:
mod_h <- wages %>% lm(log(income) ~ height, data = .)
mod_h %>%
tidy()
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 6.98 0.237 29.4 4.13e-176
## 2 height 0.0520 0.00352 14.8 2.44e- 48
mod_h %>%
glance() %>%
select(adj.r.squared, p.value)
## # A tibble: 1 × 2
## adj.r.squared p.value
## <dbl> <dbl>
## 1 0.0396 2.44e-48
mod_h %>%
tidy() %>% filter(p.value < 0.05)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 6.98 0.237 29.4 4.13e-176
## 2 height 0.0520 0.00352 14.8 2.44e- 48
mod_e %>%
tidy() %>% filter(p.value < 0.05)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 8.56 0.0733 117. 0
## 2 education 0.142 0.00530 26.7 8.41e-148
Model log(income)
against education
and height
and sex
. Can you interpret the coefficients?
mod_ehs <- wages %>%
lm(log(income) ~ education + height + sex, data = .)
mod_ehs %>%
tidy()
## # A tibble: 4 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 7.79 0.307 25.3 9.41e-134
## 2 education 0.148 0.00520 28.5 5.16e-166
## 3 height 0.00673 0.00479 1.40 1.61e- 1
## 4 sexmale 0.462 0.0389 11.9 5.02e- 32
#income is positively correlated with education, height, and sex. Coefficients as shown.
Add + geom_smooth(method = lm)
to the code below. What happens?
wages %>%
ggplot(aes(x = height, y = log(income))) +
geom_point(alpha = 0.1) +
geom_smooth(method = lm)
## `geom_smooth()` using formula 'y ~ x'
#Adds a model line
Use add_predictions
to make the plot below.
# Make plot here
wages %>%
add_predictions(mod_ehs) %>%
ggplot(mapping = aes(x = height, y = pred, color = sex)) +
geom_line() +
facet_wrap(~ education)
Use glance()
, tidy()
, and augment()
from the broom package to return model values in a data frame.
Use add_predictions()
or gather_predictions()
or spread_predictions()
from the modelr package to visualize predictions.
Use add_residuals()
or gather_residuals()
or spread_residuals()
from the modelr package to visualize residuals.