##Loading packages:
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(broom)
library(skimr)
library(haven)
nlsw <- read_dta("nlsw88.dta")
view(nlsw)
glimpse(nlsw)
## Rows: 2,246
## Columns: 17
## $ idcode <dbl> 1, 2, 3, 4, 6, 7, 9, 12, 13, 14, 15, 16, 18, 19, 20, 22,…
## $ age <dbl> 37, 37, 42, 43, 42, 39, 37, 40, 40, 40, 39, 40, 40, 40, …
## $ race <dbl+lbl> 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ married <dbl+lbl> 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1…
## $ never_married <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ grade <dbl> 12, 12, 12, 17, 12, 12, 12, 18, 14, 15, 16, 15, 15, 15, …
## $ collgrad <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1…
## $ south <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ smsa <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1…
## $ c_city <dbl> 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ industry <dbl+lbl> 5, 4, 4, 11, 4, 11, 5, 11, 11, 11, 11, 11, 6, …
## $ occupation <dbl+lbl> 6, 5, 3, 13, 6, 3, 2, 2, 3, 1, 1, 1, 5, …
## $ union <dbl+lbl> 1, 1, NA, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, …
## $ wage <dbl> 11.739125, 6.400963, 5.016723, 9.033813, 8.083731, 4.629…
## $ hours <dbl> 48, 40, 40, 42, 48, 30, 40, 45, 8, 50, 16, 40, 40, 40, 4…
## $ ttl_exp <dbl> 10.333334, 13.621795, 17.730770, 13.211537, 17.820513, 7…
## $ tenure <dbl> 5.3333335, 5.2500000, 1.2500000, 1.7500000, 17.7500000, …
#Rename variables
nlsw.clean <- nlsw %>%
rename(.,
college_grad = collgrad,
total_work_exp = ttl_exp,
age_years = age) %>%
mutate(.,
college_grad_fac = as_factor(college_grad))
glimpse(nlsw.clean)
## Rows: 2,246
## Columns: 18
## $ idcode <dbl> 1, 2, 3, 4, 6, 7, 9, 12, 13, 14, 15, 16, 18, 19, 20, …
## $ age_years <dbl> 37, 37, 42, 43, 42, 39, 37, 40, 40, 40, 39, 40, 40, 4…
## $ race <dbl+lbl> 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ married <dbl+lbl> 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1…
## $ never_married <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ grade <dbl> 12, 12, 12, 17, 12, 12, 12, 18, 14, 15, 16, 15, 15, 1…
## $ college_grad <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1…
## $ south <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ smsa <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0…
## $ c_city <dbl> 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ industry <dbl+lbl> 5, 4, 4, 11, 4, 11, 5, 11, 11, 11, 11, 11, …
## $ occupation <dbl+lbl> 6, 5, 3, 13, 6, 3, 2, 2, 3, 1, 1, 1, …
## $ union <dbl+lbl> 1, 1, NA, 1, 0, 0, 1, 0, 0, 0, 1, 0, …
## $ wage <dbl> 11.739125, 6.400963, 5.016723, 9.033813, 8.083731, 4.…
## $ hours <dbl> 48, 40, 40, 42, 48, 30, 40, 45, 8, 50, 16, 40, 40, 40…
## $ total_work_exp <dbl> 10.333334, 13.621795, 17.730770, 13.211537, 17.820513…
## $ tenure <dbl> 5.3333335, 5.2500000, 1.2500000, 1.7500000, 17.750000…
## $ college_grad_fac <fct> not college grad, not college grad, not college grad,…
#confirm renaming
skim(nlsw.clean)
| Name | nlsw.clean |
| Number of rows | 2246 |
| Number of columns | 18 |
| _______________________ | |
| Column type frequency: | |
| factor | 1 |
| numeric | 17 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| college_grad_fac | 0 | 1 | FALSE | 2 | not: 1714, col: 532 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| idcode | 0 | 1.00 | 2612.65 | 1480.86 | 1.00 | 1366.25 | 2614.00 | 3902.25 | 5159.00 | ▇▇▇▇▇ |
| age_years | 0 | 1.00 | 39.15 | 3.06 | 34.00 | 36.00 | 39.00 | 42.00 | 46.00 | ▇▆▇▃▃ |
| race | 0 | 1.00 | 1.28 | 0.48 | 1.00 | 1.00 | 1.00 | 2.00 | 3.00 | ▇▁▃▁▁ |
| married | 0 | 1.00 | 0.64 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
| never_married | 0 | 1.00 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| grade | 2 | 1.00 | 13.10 | 2.52 | 0.00 | 12.00 | 12.00 | 15.00 | 18.00 | ▁▁▁▇▃ |
| college_grad | 0 | 1.00 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| south | 0 | 1.00 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
| smsa | 0 | 1.00 | 0.70 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
| c_city | 0 | 1.00 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
| industry | 14 | 0.99 | 8.19 | 3.01 | 1.00 | 6.00 | 8.00 | 11.00 | 12.00 | ▁▃▅▂▇ |
| occupation | 9 | 1.00 | 4.64 | 3.41 | 1.00 | 2.00 | 3.00 | 6.00 | 13.00 | ▇▁▃▁▁ |
| union | 368 | 0.84 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| wage | 0 | 1.00 | 7.77 | 5.76 | 1.00 | 4.26 | 6.27 | 9.59 | 40.75 | ▇▂▁▁▁ |
| hours | 4 | 1.00 | 37.22 | 10.51 | 1.00 | 35.00 | 40.00 | 40.00 | 80.00 | ▁▂▇▁▁ |
| total_work_exp | 0 | 1.00 | 12.53 | 4.61 | 0.12 | 9.21 | 13.13 | 15.98 | 28.88 | ▂▅▇▂▁ |
| tenure | 15 | 0.99 | 5.98 | 5.51 | 0.00 | 1.58 | 3.83 | 9.33 | 25.92 | ▇▃▂▁▁ |
#Simple Regression: total_work_exp –> wage
wagereg1 <- lm(wage ~ total_work_exp, data = nlsw.clean)
options(scipen=999)
summary(wagereg1)
##
## Call:
## lm(formula = wage ~ total_work_exp, data = nlsw.clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.502 -2.911 -1.295 1.308 35.370
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.61249 0.33935 10.64 <0.0000000000000002 ***
## total_work_exp 0.33143 0.02541 13.04 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.55 on 2244 degrees of freedom
## Multiple R-squared: 0.07048, Adjusted R-squared: 0.07006
## F-statistic: 170.1 on 1 and 2244 DF, p-value: < 0.00000000000000022
#Multiple Regression: total_work_exp + age_years –> wage
wagereg2 <- lm(wage ~ total_work_exp + age_years, data = nlsw.clean)
summary(wagereg2)
##
## Call:
## lm(formula = wage ~ total_work_exp + age_years, data = nlsw.clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.107 -2.926 -1.267 1.337 35.050
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.64932 1.50566 5.745 0.0000000105 ***
## total_work_exp 0.34233 0.02555 13.401 < 0.0000000000000002 ***
## age_years -0.13213 0.03849 -3.433 0.000607 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.537 on 2243 degrees of freedom
## Multiple R-squared: 0.07534, Adjusted R-squared: 0.07451
## F-statistic: 91.37 on 2 and 2243 DF, p-value: < 0.00000000000000022
#Multiple Regression: total_work_exp + age_years + college_grad_fac –> wage
wagereg3 <- lm(wage ~ total_work_exp + age_years + college_grad_fac, data = nlsw.clean)
summary(wagereg3)
##
## Call:
## lm(formula = wage ~ total_work_exp + age_years + college_grad_fac,
## data = nlsw.clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.917 -2.689 -1.021 1.086 35.550
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.92690 1.46037 5.428 0.0000000631
## total_work_exp 0.30869 0.02491 12.391 < 0.0000000000000002
## age_years -0.12252 0.03731 -3.284 0.00104
## college_grad_faccollege grad 3.24134 0.26799 12.095 < 0.0000000000000002
##
## (Intercept) ***
## total_work_exp ***
## age_years **
## college_grad_faccollege grad ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.366 on 2242 degrees of freedom
## Multiple R-squared: 0.132, Adjusted R-squared: 0.1308
## F-statistic: 113.6 on 3 and 2242 DF, p-value: < 0.00000000000000022
#Mult. Reg.: total_work_exp + age_years + college_grad_fac + age_years*total_work_exp –> wage
wagereg4 <- lm(wage ~ total_work_exp + age_years + college_grad_fac + total_work_exp:age_years, data = nlsw.clean)
summary(wagereg4)
##
## Call:
## lm(formula = wage ~ total_work_exp + age_years + college_grad_fac +
## total_work_exp:age_years, data = nlsw.clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.535 -2.646 -1.027 1.064 35.940
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.701472 4.151041 0.410 0.6819
## total_work_exp 0.816992 0.318256 2.567 0.0103
## age_years 0.034692 0.104979 0.330 0.7411
## college_grad_faccollege grad 3.229193 0.268005 12.049 <0.0000000000000002
## total_work_exp:age_years -0.012788 0.007982 -1.602 0.1093
##
## (Intercept)
## total_work_exp *
## age_years
## college_grad_faccollege grad ***
## total_work_exp:age_years
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.364 on 2241 degrees of freedom
## Multiple R-squared: 0.133, Adjusted R-squared: 0.1314
## F-statistic: 85.92 on 4 and 2241 DF, p-value: < 0.00000000000000022
#Summary Table
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
summary.table <- stargazer(wagereg1,
wagereg2,
wagereg3,
wagereg4,
type = "text",
font.size = "normalsize",
digits = 2,
keep.stat = c("n", "rsq", "adj.rsq"),
out = "Module2_table.html",
star.cutoffs = c(.05, .01, .001))
##
## =============================================================
## Dependent variable:
## --------------------------------
## wage
## (1) (2) (3) (4)
## -------------------------------------------------------------
## total_work_exp 0.33*** 0.34*** 0.31*** 0.82*
## (0.03) (0.03) (0.02) (0.32)
##
## age_years -0.13*** -0.12** 0.03
## (0.04) (0.04) (0.10)
##
## college_grad_faccollege grad 3.24*** 3.23***
## (0.27) (0.27)
##
## total_work_exp:age_years -0.01
## (0.01)
##
## Constant 3.61*** 8.65*** 7.93*** 1.70
## (0.34) (1.51) (1.46) (4.15)
##
## -------------------------------------------------------------
## Observations 2,246 2,246 2,246 2,246
## R2 0.07 0.08 0.13 0.13
## Adjusted R2 0.07 0.07 0.13 0.13
## =============================================================
## Note: *p<0.05; **p<0.01; ***p<0.001