Input nlsw88.dta

nlsw <- read_dta("nlsw88.dta")

view(nlsw)
glimpse(nlsw)

## Rows: 2,246
## Columns: 17
## $ idcode        <dbl> 1, 2, 3, 4, 6, 7, 9, 12, 13, 14, 15, 16, 18, 19, 20, 22,…
## $ age           <dbl> 37, 37, 42, 43, 42, 39, 37, 40, 40, 40, 39, 40, 40, 40, …
## $ race          <dbl+lbl> 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ married       <dbl+lbl> 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1…
## $ never_married <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ grade         <dbl> 12, 12, 12, 17, 12, 12, 12, 18, 14, 15, 16, 15, 15, 15, …
## $ collgrad      <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1…
## $ south         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ smsa          <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1…
## $ c_city        <dbl> 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ industry      <dbl+lbl>  5,  4,  4, 11,  4, 11,  5, 11, 11, 11, 11, 11,  6, …
## $ occupation    <dbl+lbl>  6,  5,  3, 13,  6,  3,  2,  2,  3,  1,  1,  1,  5, …
## $ union         <dbl+lbl>  1,  1, NA,  1,  0,  0,  1,  0,  0,  0,  1,  0,  0, …
## $ wage          <dbl> 11.739125, 6.400963, 5.016723, 9.033813, 8.083731, 4.629…
## $ hours         <dbl> 48, 40, 40, 42, 48, 30, 40, 45, 8, 50, 16, 40, 40, 40, 4…
## $ ttl_exp       <dbl> 10.333334, 13.621795, 17.730770, 13.211537, 17.820513, 7…
## $ tenure        <dbl> 5.3333335, 5.2500000, 1.2500000, 1.7500000, 17.7500000, …

#Rename variables

nlsw.clean <- nlsw %>%
  rename(.,
         college_grad = collgrad,
         total_work_exp = ttl_exp,
         age_years = age) %>%
  mutate(.,
         college_grad_fac = as_factor(college_grad))


glimpse(nlsw.clean)

## Rows: 2,246
## Columns: 18
## $ idcode           <dbl> 1, 2, 3, 4, 6, 7, 9, 12, 13, 14, 15, 16, 18, 19, 20, …
## $ age_years        <dbl> 37, 37, 42, 43, 42, 39, 37, 40, 40, 40, 39, 40, 40, 4…
## $ race             <dbl+lbl> 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ married          <dbl+lbl> 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1…
## $ never_married    <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ grade            <dbl> 12, 12, 12, 17, 12, 12, 12, 18, 14, 15, 16, 15, 15, 1…
## $ college_grad     <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1…
## $ south            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ smsa             <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0…
## $ c_city           <dbl> 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ industry         <dbl+lbl>  5,  4,  4, 11,  4, 11,  5, 11, 11, 11, 11, 11,  …
## $ occupation       <dbl+lbl>  6,  5,  3, 13,  6,  3,  2,  2,  3,  1,  1,  1,  …
## $ union            <dbl+lbl>  1,  1, NA,  1,  0,  0,  1,  0,  0,  0,  1,  0,  …
## $ wage             <dbl> 11.739125, 6.400963, 5.016723, 9.033813, 8.083731, 4.…
## $ hours            <dbl> 48, 40, 40, 42, 48, 30, 40, 45, 8, 50, 16, 40, 40, 40…
## $ total_work_exp   <dbl> 10.333334, 13.621795, 17.730770, 13.211537, 17.820513…
## $ tenure           <dbl> 5.3333335, 5.2500000, 1.2500000, 1.7500000, 17.750000…
## $ college_grad_fac <fct> not college grad, not college grad, not college grad,…

#confirm renaming

skim(nlsw.clean)

Data summary
Name	nlsw.clean
Number of rows	2246
Number of columns	18
_______________________
Column type frequency:
factor	1
numeric	17
________________________
Group variables	None

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
college_grad_fac	0	1	FALSE	2	not: 1714, col: 532

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
idcode	0	1.00	2612.65	1480.86	1.00	1366.25	2614.00	3902.25	5159.00	▇▇▇▇▇
age_years	0	1.00	39.15	3.06	34.00	36.00	39.00	42.00	46.00	▇▆▇▃▃
race	0	1.00	1.28	0.48	1.00	1.00	1.00	2.00	3.00	▇▁▃▁▁
married	0	1.00	0.64	0.48	0.00	0.00	1.00	1.00	1.00	▅▁▁▁▇
never_married	0	1.00	0.10	0.31	0.00	0.00	0.00	0.00	1.00	▇▁▁▁▁
grade	2	1.00	13.10	2.52	0.00	12.00	12.00	15.00	18.00	▁▁▁▇▃
college_grad	0	1.00	0.24	0.43	0.00	0.00	0.00	0.00	1.00	▇▁▁▁▂
south	0	1.00	0.42	0.49	0.00	0.00	0.00	1.00	1.00	▇▁▁▁▆
smsa	0	1.00	0.70	0.46	0.00	0.00	1.00	1.00	1.00	▃▁▁▁▇
c_city	0	1.00	0.29	0.45	0.00	0.00	0.00	1.00	1.00	▇▁▁▁▃
industry	14	0.99	8.19	3.01	1.00	6.00	8.00	11.00	12.00	▁▃▅▂▇
occupation	9	1.00	4.64	3.41	1.00	2.00	3.00	6.00	13.00	▇▁▃▁▁
union	368	0.84	0.25	0.43	0.00	0.00	0.00	0.00	1.00	▇▁▁▁▂
wage	0	1.00	7.77	5.76	1.00	4.26	6.27	9.59	40.75	▇▂▁▁▁
hours	4	1.00	37.22	10.51	1.00	35.00	40.00	40.00	80.00	▁▂▇▁▁
total_work_exp	0	1.00	12.53	4.61	0.12	9.21	13.13	15.98	28.88	▂▅▇▂▁
tenure	15	0.99	5.98	5.51	0.00	1.58	3.83	9.33	25.92	▇▃▂▁▁

#Simple Regression: total_work_exp –> wage

wagereg1 <- lm(wage ~ total_work_exp, data = nlsw.clean)

options(scipen=999)
summary(wagereg1)

## 
## Call:
## lm(formula = wage ~ total_work_exp, data = nlsw.clean)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.502 -2.911 -1.295  1.308 35.370 
## 
## Coefficients:
##                Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)     3.61249    0.33935   10.64 <0.0000000000000002 ***
## total_work_exp  0.33143    0.02541   13.04 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.55 on 2244 degrees of freedom
## Multiple R-squared:  0.07048,    Adjusted R-squared:  0.07006 
## F-statistic: 170.1 on 1 and 2244 DF,  p-value: < 0.00000000000000022

#Multiple Regression: total_work_exp + age_years –> wage

wagereg2 <- lm(wage ~ total_work_exp + age_years, data = nlsw.clean)

summary(wagereg2)

## 
## Call:
## lm(formula = wage ~ total_work_exp + age_years, data = nlsw.clean)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.107 -2.926 -1.267  1.337 35.050 
## 
## Coefficients:
##                Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)     8.64932    1.50566   5.745         0.0000000105 ***
## total_work_exp  0.34233    0.02555  13.401 < 0.0000000000000002 ***
## age_years      -0.13213    0.03849  -3.433             0.000607 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.537 on 2243 degrees of freedom
## Multiple R-squared:  0.07534,    Adjusted R-squared:  0.07451 
## F-statistic: 91.37 on 2 and 2243 DF,  p-value: < 0.00000000000000022

#Multiple Regression: total_work_exp + age_years + college_grad_fac –> wage

wagereg3 <- lm(wage ~ total_work_exp + age_years + college_grad_fac, data = nlsw.clean)

summary(wagereg3)

## 
## Call:
## lm(formula = wage ~ total_work_exp + age_years + college_grad_fac, 
##     data = nlsw.clean)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.917 -2.689 -1.021  1.086 35.550 
## 
## Coefficients:
##                              Estimate Std. Error t value             Pr(>|t|)
## (Intercept)                   7.92690    1.46037   5.428         0.0000000631
## total_work_exp                0.30869    0.02491  12.391 < 0.0000000000000002
## age_years                    -0.12252    0.03731  -3.284              0.00104
## college_grad_faccollege grad  3.24134    0.26799  12.095 < 0.0000000000000002
##                                 
## (Intercept)                  ***
## total_work_exp               ***
## age_years                    ** 
## college_grad_faccollege grad ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.366 on 2242 degrees of freedom
## Multiple R-squared:  0.132,  Adjusted R-squared:  0.1308 
## F-statistic: 113.6 on 3 and 2242 DF,  p-value: < 0.00000000000000022

#Mult. Reg.: total_work_exp + age_years + college_grad_fac + age_years*total_work_exp –> wage

wagereg4 <- lm(wage ~ total_work_exp + age_years + college_grad_fac + total_work_exp:age_years, data = nlsw.clean)

summary(wagereg4)

## 
## Call:
## lm(formula = wage ~ total_work_exp + age_years + college_grad_fac + 
##     total_work_exp:age_years, data = nlsw.clean)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.535 -2.646 -1.027  1.064 35.940 
## 
## Coefficients:
##                               Estimate Std. Error t value            Pr(>|t|)
## (Intercept)                   1.701472   4.151041   0.410              0.6819
## total_work_exp                0.816992   0.318256   2.567              0.0103
## age_years                     0.034692   0.104979   0.330              0.7411
## college_grad_faccollege grad  3.229193   0.268005  12.049 <0.0000000000000002
## total_work_exp:age_years     -0.012788   0.007982  -1.602              0.1093
##                                 
## (Intercept)                     
## total_work_exp               *  
## age_years                       
## college_grad_faccollege grad ***
## total_work_exp:age_years        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.364 on 2241 degrees of freedom
## Multiple R-squared:  0.133,  Adjusted R-squared:  0.1314 
## F-statistic: 85.92 on 4 and 2241 DF,  p-value: < 0.00000000000000022

#Summary Table

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

summary.table <- stargazer(wagereg1,
                           wagereg2,
                           wagereg3,
                           wagereg4,
                           type = "text",
                           font.size = "normalsize",
                           digits = 2,
                           keep.stat = c("n", "rsq", "adj.rsq"),
                           out = "Module2_table.html",
                           star.cutoffs = c(.05, .01, .001))

## 
## =============================================================
##                                    Dependent variable:       
##                              --------------------------------
##                                            wage              
##                                (1)     (2)      (3)     (4)  
## -------------------------------------------------------------
## total_work_exp               0.33*** 0.34***  0.31***  0.82* 
##                              (0.03)   (0.03)  (0.02)  (0.32) 
##                                                              
## age_years                            -0.13*** -0.12**  0.03  
##                                       (0.04)  (0.04)  (0.10) 
##                                                              
## college_grad_faccollege grad                  3.24*** 3.23***
##                                               (0.27)  (0.27) 
##                                                              
## total_work_exp:age_years                               -0.01 
##                                                       (0.01) 
##                                                              
## Constant                     3.61*** 8.65***  7.93***  1.70  
##                              (0.34)   (1.51)  (1.46)  (4.15) 
##                                                              
## -------------------------------------------------------------
## Observations                  2,246   2,246    2,246   2,246 
## R2                            0.07     0.08    0.13    0.13  
## Adjusted R2                   0.07     0.07    0.13    0.13  
## =============================================================
## Note:                           *p<0.05; **p<0.01; ***p<0.001

Module 2_Content Review

Camie Tomlinson

2022-09-17

Input nlsw88.dta