Install Packages

install.packages("tidyverse", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp92yhUQ/downloaded_packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
install.packages("psych", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp92yhUQ/downloaded_packages
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
install.packages("knitr",repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp92yhUQ/downloaded_packages
library("knitr")
install.packages("kableExtra",repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp92yhUQ/downloaded_packages
library("kableExtra")
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
install.packages("robustbase",repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp92yhUQ/downloaded_packages
library(robustbase)
install.packages("stargazer",repos = 
"https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp92yhUQ/downloaded_packages
library(stargazer)
## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
install.packages("MASS",repos = 
"https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp92yhUQ/downloaded_packages
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
install.packages("AER",repos = 
"https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp92yhUQ/downloaded_packages
library("AER")
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: sandwich
## Loading required package: survival
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:robustbase':
## 
##     heart
install.packages("modelsummary", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp92yhUQ/downloaded_packages
library("modelsummary")
## 
## Attaching package: 'modelsummary'
## 
## The following object is masked from 'package:psych':
## 
##     SD
install.packages("broom", repos ="https://cloud.r-project.org" )
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmp92yhUQ/downloaded_packages
library(broom)

Download data

dt <- read_csv("Fertility.csv")
## Rows: 30000 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (9): morekids, boy1st, boy2nd, samesex, agem1, black, hispan, othrace, w...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Create a summary of statistics

summary_table1 <- describe(dt)
summary_table1
##          vars     n  mean    sd median trimmed  mad min max range  skew
## morekids    1 30000  0.38  0.48      0    0.35 0.00   0   1     1  0.50
## boy1st      2 30000  0.52  0.50      1    0.52 0.00   0   1     1 -0.06
## boy2nd      3 30000  0.51  0.50      1    0.51 0.00   0   1     1 -0.02
## samesex     4 30000  0.50  0.50      1    0.50 0.00   0   1     1 -0.01
## agem1       5 30000 30.35  3.38     31   30.60 2.97  21  35    14 -0.58
## black       6 30000  0.05  0.22      0    0.00 0.00   0   1     1  3.98
## hispan      7 30000  0.07  0.26      0    0.00 0.00   0   1     1  3.24
## othrace     8 30000  0.06  0.23      0    0.00 0.00   0   1     1  3.83
## weeksm1     9 30000 19.21 21.94      6   17.51 8.90   0  52    52  0.52
##          kurtosis   se
## morekids    -1.75 0.00
## boy1st      -2.00 0.00
## boy2nd      -2.00 0.00
## samesex     -2.00 0.00
## agem1       -0.46 0.02
## black       13.83 0.00
## hispan       8.52 0.00
## othrace     12.65 0.00
## weeksm1     -1.49 0.13

Create a summary of statistics table for our document

table1 <- knitr::kable(summary_table1, 
             "html",
             caption = "Summary Statistics Fertility Data",
             digits = 2) %>%
footnote(general = "N = 30000 | 1980 U.S. CENSUS") %>% 
  kable_styling(font_size = 12)
table1
Summary Statistics Fertility Data
vars n mean sd median trimmed mad min max range skew kurtosis se
morekids 1 30000 0.38 0.48 0 0.35 0.00 0 1 1 0.50 -1.75 0.00
boy1st 2 30000 0.52 0.50 1 0.52 0.00 0 1 1 -0.06 -2.00 0.00
boy2nd 3 30000 0.51 0.50 1 0.51 0.00 0 1 1 -0.02 -2.00 0.00
samesex 4 30000 0.50 0.50 1 0.50 0.00 0 1 1 -0.01 -2.00 0.00
agem1 5 30000 30.35 3.38 31 30.60 2.97 21 35 14 -0.58 -0.46 0.02
black 6 30000 0.05 0.22 0 0.00 0.00 0 1 1 3.98 13.83 0.00
hispan 7 30000 0.07 0.26 0 0.00 0.00 0 1 1 3.24 8.52 0.00
othrace 8 30000 0.06 0.23 0 0.00 0.00 0 1 1 3.83 12.65 0.00
weeksm1 9 30000 19.21 21.94 6 17.51 8.90 0 52 52 0.52 -1.49 0.13
Note:
N = 30000 | 1980 U.S. CENSUS

Create a histogram for variable Weeksm1

hist(dt$weeksm1,
     xlab = "Moms Weeks Worked on 1979",
     ylab = "Frequency",
     main = "Distribution of Moms Weeks Worked",
     col = "black",
     border = "white"
     )

# Run a OLS regression for Weeksm1 on moreKids

Model 1

model1 <- rlm(dt$weeksm1 ~ dt$morekids)

Creating table for model 1

stargazer(model1,
          title = "Mom's Weeks Worked on If mom had more than 2 children",
          dep.var.caption = "DV: Mom's Weeks Worked in 1979",
          covariate.labels = c("If mom had more than 2 kids"),
          notes.label = "Source: 1980 Census| N = 30000",
          add.lines = TRUE,
          keep.stat = c("n","rsq"),
          digits = 2,
          type = "html",
          out = "AdEconomretics.htm")
## 
## <table style="text-align:center"><caption><strong>Mom's Weeks Worked on If mom had more than 2 children</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td>DV: Mom's Weeks Worked in 1979</td></tr>
## <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>weeksm1</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">If mom had more than 2 kids</td><td>-6.01<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.26)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">Constant</td><td>21.48<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.16)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">TRUE</td><td></td></tr>
## <tr><td style="text-align:left">Observations</td><td>30,000</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Source: 1980 Census| N = 30000</td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

IV regression

iv1 <- ivreg(dt$weeksm1 ~ dt$morekids | dt$samesex)
summary(iv1)
## 
## Call:
## ivreg(formula = dt$weeksm1 ~ dt$morekids | dt$samesex)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -21.49 -21.49 -13.49  26.51  36.55 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   21.488      1.425  15.083   <2e-16 ***
## dt$morekids   -6.033      3.758  -1.605    0.108    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.75 on 29998 degrees of freedom
## Multiple R-Squared: 0.01762, Adjusted R-squared: 0.01759 
## Wald test: 2.577 on 1 and 29998 DF,  p-value: 0.1084

Creating a side by side table to compare our models

SBST1 <- list("Model 1" = model1,
              "IV Model" = iv1 )
modelsummary(SBST1,
             fmt = 5,
             stars = TRUE,
             output = "table2.1.docx")
summary(iv1, vcov. = sandwich, diagnostics = TRUE)
## 
## Call:
## ivreg(formula = dt$weeksm1 ~ dt$morekids | dt$samesex)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -21.49 -21.49 -13.49  26.51  36.55 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   21.488      1.425  15.076   <2e-16 ***
## dt$morekids   -6.033      3.758  -1.605    0.108    
## 
## Diagnostic tests:
##                    df1   df2 statistic p-value    
## Weak instruments     1 29998     143.2  <2e-16 ***
## Wu-Hausman           1 29997       0.0   0.995    
## Sargan               0    NA        NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.75 on 29998 degrees of freedom
## Multiple R-Squared: 0.01762, Adjusted R-squared: 0.01759 
## Wald test: 2.577 on 1 and 29998 DF,  p-value: 0.1084

IV regression controlling for more variables

iv2 <- ivreg(dt$weeksm1 ~ dt$morekids + dt$agem1 + dt$black + dt$hispan + dt$othrace | dt$agem1 + dt$black + dt$hispan + dt$othrace + dt$samesex )
summary(iv2, vcov. = sandwich, diagnostics = TRUE)
## 
## Call:
## ivreg(formula = dt$weeksm1 ~ dt$morekids + dt$agem1 + dt$black + 
##     dt$hispan + dt$othrace | dt$agem1 + dt$black + dt$hispan + 
##     dt$othrace + dt$samesex)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -35.88 -17.86 -10.69  23.16  44.03 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.37034    1.17793  -3.710 0.000207 ***
## dt$morekids -5.78075    3.64459  -1.586 0.112723    
## dt$agem1     0.82350    0.06926  11.889  < 2e-16 ***
## dt$black    11.42628    0.65820  17.360  < 2e-16 ***
## dt$hispan   -0.41177    0.74925  -0.550 0.582615    
## dt$othrace   3.30779    0.61254   5.400 6.71e-08 ***
## 
## Diagnostic tests:
##                    df1   df2 statistic p-value    
## Weak instruments     1 29994   150.879  <2e-16 ***
## Wu-Hausman           1 29993     0.094   0.759    
## Sargan               0    NA        NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.42 on 29994 degrees of freedom
## Multiple R-Squared: 0.04708, Adjusted R-squared: 0.04692 
## Wald test: 165.6 on 5 and 29994 DF,  p-value: < 2.2e-16

Creating a OLS regression to compare the IV model

olsmodel <- rlm(dt$weeksm1 ~ dt$morekids + dt$agem1 + dt$black + dt$hispan + dt$othrace )
summary(olsmodel)
## 
## Call: rlm(formula = dt$weeksm1 ~ dt$morekids + dt$agem1 + dt$black + 
##     dt$hispan + dt$othrace)
## Residuals:
##    Min     1Q Median     3Q    Max 
## -36.58 -18.01 -10.32  23.15  45.10 
## 
## Coefficients:
##             Value    Std. Error t value 
## (Intercept)  -4.9115   1.1366    -4.3212
## dt$morekids  -6.9872   0.2609   -26.7799
## dt$agem1      0.8545   0.0373    22.9036
## dt$black     11.5820   0.5594    20.7032
## dt$hispan    -0.2709   0.5280    -0.5130
## dt$othrace    3.3840   0.5966     5.6724
## 
## Residual standard error: 27.93 on 29994 degrees of freedom

Creating a side by side table for IV1, IV2 and olsmodel

SBST2 <- list("IV Model 1" = iv1,
              "IV Model 2" = iv2,
              "OLS Model" = olsmodel)
modelsummary(SBST2,
             fmt = 5,
             stars = TRUE,
             output = "table2.2.docx")