library(pacman)
## Warning: package 'pacman' was built under R version 4.4.2
p_load(causalweight, lmtest, sandwich, AER, ivmodel, haven, estimatr, tidyverse,
lubridate, usmap, gridExtra, stringr, readxl,
reshape2, scales, broom, data.table, ggplot2, stargazer,
foreign, ggthemes, ggforce, ggridges, latex2exp, viridis, extrafont,
kableExtra, snakecase, janitor)
Paper: Autor, David, David Dorn, Gordon Hanson, and Kaveh Majlesi. 2020. “Importing Political Polarization? The Electoral Consequences of Rising Trade Exposure.” American Economic Review 110 (10): 3139–83.
Research question: Whether the exposure of local labor markets to increased foreign competition from China has contributed to rising political polarization in the United States since 2000?
Outcome variable: Political polarization, measured by voting patterns and election outcomes.
Treatment variable: the average change in Chinese import penetration in that CZ’s industries, weighted by the share of each industry k in the CZ’s initial employment.
Endogeneity argument: Realized US imports from China in may be correlated with industry import-demand shocks in the U.S..
iv: contemporaneous composition and growth of Chinese imports in eight other high-income markets.
DAC
#install.packages("ggdag")
library(ggdag)
## Warning: package 'ggdag' was built under R version 4.4.2
##
## Attaching package: 'ggdag'
## The following object is masked from 'package:stats':
##
## filter
dag <- dagify(
Polarization ~ TradeExposure,
TradeExposure ~ ChineseImportsOtherCountries,
labels = c(
"Polarization" = "Political Polarization",
"TradeExposure" = "U.S. Local Labor Market Exposure",
"ChineseImportsOtherCountries" = "Chinese Import Exposure in Other Countries"
)
)
ggdag(dag, text = FALSE, use_labels = "label") +
theme_dag()
Relevance: Chinese import growth in the sample period is primarily driven by the increasing competitiveness of Chinese manufacturing that induce a supply shock to China’s global export. Cross-country and cross-industry imports by other developed countries highly correlate with the import pattern in the U.S..
Validity: The instrument is valid because Chinese import exposure in other countries is unlikely to be directly influenced by U.S. local political conditions, satisfying the exclusion restriction.
Main finding: rising trade exposure to Chinese imports increases political polarization, leading to a shift toward more extreme voting patterns in affected regions.
Paper: Oh & Vukina, 2022
The authors backed the IV choice using the Bertrand pricing, which assumes prices in different markets are correlated due to common cost factors rather than common demand shock across regions. For the second part, the authors acknowledge that it is difficult to prove no global demand shocks across regions, but they control for holiday seasons and retail chains that should control for the differentiated demand patterns.
install.packages("causalweight")
## Warning: package 'causalweight' is in use and will not be installed
library(causalweight)
data(JC)
table(JC$assignment)
##
## 0 1
## 3663 5577
table(JC$trainy1)
##
## 0 1
## 2666 6574
table(JC$assignment, JC$trainy1)
##
## 0 1
## 0 1809 1854
## 1 857 4720
prop.table(table(JC$assignment, JC$trainy1))
##
## 0 1
## 0 0.19577922 0.20064935
## 1 0.09274892 0.51082251
as.data.frame(table(JC$assignment, JC$trainy1))
## Var1 Var2 Freq
## 1 0 0 1809
## 2 1 0 857
## 3 0 1 1854
## 4 1 1 4720
ITT <- mean(JC$earny4[JC$assignment == 1]) - mean(JC$earny4[JC$assignment == 0])
print(ITT)
## [1] 16.05513
On average, individuals assigned to JC earned $16.05 more per week, compared to the control.
ITT_model <- lm(earny4 ~ assignment, data = JC)
summary(ITT_model)
##
## Call:
## lm(formula = earny4 ~ assignment, data = JC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -213.98 -164.65 -24.02 99.25 2211.98
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 197.926 3.212 61.620 < 2e-16 ***
## assignment 16.055 4.134 3.883 0.000104 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 194.4 on 9238 degrees of freedom
## Multiple R-squared: 0.00163, Adjusted R-squared: 0.001522
## F-statistic: 15.08 on 1 and 9238 DF, p-value: 0.0001038
ITT_estimate <- coef(ITT_model)["assignment"]
SE <- summary(ITT_model)$coefficients["assignment", "Std. Error"]
cat("ITT Estimate:", ITT_estimate, "\n")
## ITT Estimate: 16.05513
cat("Standard Error:", SE, "\n")
## Standard Error: 4.134466
comp_treatment <- mean(JC$trainy1[JC$assignment == 1])
comp_control <- mean(JC$trainy1[JC$assignment == 0])
comp_diff <- comp_treatment - comp_control
cat("Take-up Rate (Treatment Group):", comp_treatment, "\n")
## Take-up Rate (Treatment Group): 0.8463332
cat("Take-up Rate (Control Group):", comp_control, "\n")
## Take-up Rate (Control Group): 0.5061425
cat("Difference in Take-up Rates:", comp_diff, "\n")
## Difference in Take-up Rates: 0.3401906
takeup_model <- lm(trainy1 ~ assignment, data = JC)
summary(takeup_model)
##
## Call:
## lm(formula = trainy1 ~ assignment, data = JC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8463 -0.5061 0.1537 0.1537 0.4939
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.506143 0.006964 72.68 <2e-16 ***
## assignment 0.340191 0.008963 37.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4215 on 9238 degrees of freedom
## Multiple R-squared: 0.1349, Adjusted R-squared: 0.1348
## F-statistic: 1440 on 1 and 9238 DF, p-value: < 2.2e-16
ITT_earnings <- mean(JC$earny4[JC$assignment == 1]) - mean(JC$earny4[JC$assignment == 0])
comp_treatment <- mean(JC$trainy1[JC$assignment == 1])
comp_control <- mean(JC$trainy1[JC$assignment == 0])
comp_difference <- comp_treatment - comp_control
LATE <- ITT_earnings / comp_difference
cat("ITT on Earnings:", ITT_earnings, "\n")
## ITT on Earnings: 16.05513
cat("Difference in Take-up Rates:", comp_difference, "\n")
## Difference in Take-up Rates: 0.3401906
cat("Estimated LATE:", LATE, "\n")
## Estimated LATE: 47.1945
LATE_model <- ivreg(earny4 ~ trainy1 | assignment, data = JC)
# Display regression results
summary(LATE_model)
##
## Call:
## ivreg(formula = earny4 ~ trainy1 | assignment, data = JC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -221.23 -165.43 -22.55 100.01 2235.87
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 174.039 8.909 19.536 < 2e-16 ***
## trainy1 47.194 12.192 3.871 0.000109 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 195 on 9238 degrees of freedom
## Multiple R-Squared: -0.004797, Adjusted R-squared: -0.004905
## Wald test: 14.98 on 1 and 9238 DF, p-value: 0.0001092
# Using "ivmodel"
install.packages("ivmodel")
## Warning: package 'ivmodel' is in use and will not be installed
library(ivmodel)
# Estimate IV model
iv_result <- ivmodel(Y = JC$earny4, D = JC$trainy1, Z = JC$assignment)
# Display results
summary(iv_result)
##
## Call:
## ivmodel(Y = JC$earny4, D = JC$trainy1, Z = JC$assignment)
## sample size: 9240
## _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
##
## First Stage Regression Result:
##
## F=1440.46, df1=1, df2=9238, p-value is < 2.22e-16
## R-squared=0.1348939, Adjusted R-squared=0.1348003
## Residual standard error: 0.4214583 on 9239 degrees of freedom
## _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
##
## Coefficients of k-Class Estimators:
##
## k Estimate Std. Error t value Pr(>|t|)
## OLS 0.0000 14.2283 4.4649 3.187 0.001444 **
## Fuller 0.9999 47.1681 12.1881 3.870 0.000110 ***
## TSLS 1.0000 47.1945 12.1924 3.871 0.000109 ***
## LIML 1.0000 47.1945 12.1924 3.871 0.000109 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
##
## Alternative tests for the treatment effect under H_0: beta=0.
##
## Anderson-Rubin test (under F distribution):
## F=15.07956, df1=1, df2=9238, p-value is 0.00010379
## 95 percent confidence interval:
## [23.3644072308337, 71.2284358822774]
##
## Conditional Likelihood Ratio test (under Normal approximation):
## Test Stat=15.07956, p-value is 0.00010379
## 95 percent confidence interval:
## [23.3644368880534, 71.2284057155105]
ITT measures the treatment effect of all observations assigned to treatment, regardless of compliance. The $16.05 estimate applies to those assigned to the treatment, includes both compliers and noncompliers. Non-complier’s would not reveive any treatment effects, therefore, this ITT measure is an underestimate of the effect.
Manual calculation and regression yield the same LATE estimates. LATE estimates the effect of treatment on compliers. For compliers, participating in Job Corps caused an average increase in weekly earnings of $47.19 in the fourth year.
# Install and load the haven package
#install.packages("haven")
library(haven)
# Read the .dta file
Card1995 <- read_dta("Card1995.dta")
# View the data
head(Card1995)
## # A tibble: 6 × 52
## id nearc2 nearc4 nearc4a nearc4b ed76 ed66 age76 daded nodaded momed
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 0 0 0 0 7 5 29 9.94 1 10.2
## 2 3 0 0 0 0 12 11 27 8 0 8
## 3 4 0 0 0 0 12 12 34 14 0 12
## 4 5 1 1 1 0 11 11 27 11 0 12
## 5 6 1 1 1 0 12 12 34 8 0 7
## 6 7 1 1 1 0 12 11 26 9 0 12
## # ℹ 41 more variables: nomomed <dbl>, weight <dbl>, momdad14 <dbl>,
## # sinmom14 <dbl>, step14 <dbl>, reg661 <dbl>, reg662 <dbl>, reg663 <dbl>,
## # reg664 <dbl>, reg665 <dbl>, reg666 <dbl>, reg667 <dbl>, reg668 <dbl>,
## # reg669 <dbl>, south66 <dbl>, work76 <dbl>, work78 <dbl>, lwage76 <dbl>,
## # lwage78 <dbl>, famed <dbl>, black <dbl>, smsa76r <dbl>, smsa78r <dbl>,
## # reg76r <dbl>, reg78r <dbl>, reg80r <dbl>, smsa66r <dbl>, wage76 <dbl>,
## # wage78 <dbl>, wage80 <dbl>, noint78 <dbl>, noint80 <dbl>, enroll76 <dbl>, …
Card1995_reg <- Card1995 %>%
mutate(exp = age76 - ed76 - 6,
exp_sq_100 = exp^2/100)
head(Card1995_reg)
## # A tibble: 6 × 54
## id nearc2 nearc4 nearc4a nearc4b ed76 ed66 age76 daded nodaded momed
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 0 0 0 0 7 5 29 9.94 1 10.2
## 2 3 0 0 0 0 12 11 27 8 0 8
## 3 4 0 0 0 0 12 12 34 14 0 12
## 4 5 1 1 1 0 11 11 27 11 0 12
## 5 6 1 1 1 0 12 12 34 8 0 7
## 6 7 1 1 1 0 12 11 26 9 0 12
## # ℹ 43 more variables: nomomed <dbl>, weight <dbl>, momdad14 <dbl>,
## # sinmom14 <dbl>, step14 <dbl>, reg661 <dbl>, reg662 <dbl>, reg663 <dbl>,
## # reg664 <dbl>, reg665 <dbl>, reg666 <dbl>, reg667 <dbl>, reg668 <dbl>,
## # reg669 <dbl>, south66 <dbl>, work76 <dbl>, work78 <dbl>, lwage76 <dbl>,
## # lwage78 <dbl>, famed <dbl>, black <dbl>, smsa76r <dbl>, smsa78r <dbl>,
## # reg76r <dbl>, reg78r <dbl>, reg80r <dbl>, smsa66r <dbl>, wage76 <dbl>,
## # wage78 <dbl>, wage80 <dbl>, noint78 <dbl>, noint80 <dbl>, enroll76 <dbl>, …
# OLS regression
ols_model <- lm(lwage76 ~ ed76 + exp + exp_sq_100 + black + reg76r + smsa76r, data = Card1995_reg)
summary(ols_model)
##
## Call:
## lm(formula = lwage76 ~ ed76 + exp + exp_sq_100 + black + reg76r +
## smsa76r, data = Card1995_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.59297 -0.22315 0.01893 0.24223 1.33190
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.733664 0.067603 70.022 < 2e-16 ***
## ed76 0.074009 0.003505 21.113 < 2e-16 ***
## exp 0.083596 0.006648 12.575 < 2e-16 ***
## exp_sq_100 -0.224088 0.031784 -7.050 2.21e-12 ***
## black -0.189632 0.017627 -10.758 < 2e-16 ***
## reg76r -0.124862 0.015118 -8.259 < 2e-16 ***
## smsa76r 0.161423 0.015573 10.365 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3742 on 3003 degrees of freedom
## (603 observations deleted due to missingness)
## Multiple R-squared: 0.2905, Adjusted R-squared: 0.2891
## F-statistic: 204.9 on 6 and 3003 DF, p-value: < 2.2e-16
# IV regression
iv_model_1 <- ivreg(lwage76 ~ ed76 + exp + exp_sq_100 + black + reg76r + smsa76r |
nearc4 + exp + exp_sq_100 + black + reg76r + smsa76r ,
data = Card1995_reg)
summary(iv_model_1)
##
## Call:
## ivreg(formula = lwage76 ~ ed76 + exp + exp_sq_100 + black + reg76r +
## smsa76r | nearc4 + exp + exp_sq_100 + black + reg76r + smsa76r,
## data = Card1995_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.82125 -0.24065 0.02368 0.25469 1.43205
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.75278 0.82934 4.525 6.27e-06 ***
## ed76 0.13229 0.04923 2.687 0.00725 **
## exp 0.10750 0.02130 5.047 4.76e-07 ***
## exp_sq_100 -0.22841 0.03341 -6.836 9.84e-12 ***
## black -0.13080 0.05287 -2.474 0.01342 *
## reg76r -0.10490 0.02307 -4.546 5.67e-06 ***
## smsa76r 0.13132 0.03013 4.359 1.35e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.391 on 3003 degrees of freedom
## Multiple R-Squared: 0.2252, Adjusted R-squared: 0.2237
## Wald test: 120.8 on 6 and 3003 DF, p-value: < 2.2e-16
first_stage_1 <- lm(ed76 ~ nearc4 + exp + exp_sq_100 + black + reg76r + smsa76r, data = Card1995_reg)
summary(first_stage_1)
##
## Call:
## lm(formula = ed76 ~ nearc4 + exp + exp_sq_100 + black + reg76r +
## smsa76r, data = Card1995_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.6389 -1.4325 -0.1028 1.3268 6.2332
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.53964 0.16286 101.559 < 2e-16 ***
## nearc4 0.30628 0.07666 3.995 6.59e-05 ***
## exp -0.35881 0.03040 -11.805 < 2e-16 ***
## exp_sq_100 -0.21620 0.14590 -1.482 0.138
## black -1.03873 0.08358 -12.428 < 2e-16 ***
## reg76r -0.32964 0.07385 -4.464 8.29e-06 ***
## smsa76r 0.39091 0.07788 5.019 5.44e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.982 on 3606 degrees of freedom
## Multiple R-squared: 0.4813, Adjusted R-squared: 0.4805
## F-statistic: 557.7 on 6 and 3606 DF, p-value: < 2.2e-16
reduced_form_1 <- lm(lwage76 ~ nearc4 + exp + exp_sq_100 + black + reg76r + smsa76r, data = Card1995_reg)
summary(reduced_form_1)
##
## Call:
## lm(formula = lwage76 ~ nearc4 + exp + exp_sq_100 + black + reg76r +
## smsa76r, data = Card1995_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.56525 -0.24771 0.01465 0.27091 1.38743
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.956604 0.036371 163.775 < 2e-16 ***
## nearc4 0.044624 0.017011 2.623 0.00876 **
## exp 0.053258 0.006948 7.666 2.38e-14 ***
## exp_sq_100 -0.218720 0.034021 -6.429 1.49e-10 ***
## black -0.263903 0.018485 -14.277 < 2e-16 ***
## reg76r -0.143458 0.016336 -8.782 < 2e-16 ***
## smsa76r 0.184752 0.017503 10.555 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4005 on 3003 degrees of freedom
## (603 observations deleted due to missingness)
## Multiple R-squared: 0.1871, Adjusted R-squared: 0.1854
## F-statistic: 115.2 on 6 and 3003 DF, p-value: < 2.2e-16
#install.packages("modelsummary")
library(modelsummary)
## Warning: package 'modelsummary' was built under R version 4.4.3
models <- list(
"OLS" = ols_model,
"IV" = iv_model_1,
"First-Stage" = first_stage_1,
"Reduced Form" = reduced_form_1
)
coef_map <- c(
"ed76" = "Years of Schooling (ed76)",
"nearc4" = "Near 4-Year College (nearc4)",
"exp" = "Experience (exp)",
"exp_sq_100" = "Experience2/100 (exp_sq_100)",
"black" = "Black (black)",
"reg76r" = "South (reg76r)",
"smsa76r" = "SMSA (smsa76r)"
)
# Generate summary table
modelsummary(
models,
stars = TRUE,
title = "Regression Results: OLS, IV, First-Stage, and Reduced Form",
fmt = 5,
coef_map = coef_map
)
OLS | IV | First-Stage | Reduced Form | |
---|---|---|---|---|
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||||
Years of Schooling (ed76) | 0.07401*** | 0.13229** | ||
(0.00351) | (0.04923) | |||
Near 4-Year College (nearc4) | 0.30628*** | 0.04462** | ||
(0.07666) | (0.01701) | |||
Experience (exp) | 0.08360*** | 0.10750*** | -0.35881*** | 0.05326*** |
(0.00665) | (0.02130) | (0.03040) | (0.00695) | |
Experience2/100 (exp_sq_100) | -0.22409*** | -0.22841*** | -0.21620 | -0.21872*** |
(0.03178) | (0.03341) | (0.14590) | (0.03402) | |
Black (black) | -0.18963*** | -0.13080* | -1.03873*** | -0.26390*** |
(0.01763) | (0.05287) | (0.08358) | (0.01848) | |
South (reg76r) | -0.12486*** | -0.10490*** | -0.32964*** | -0.14346*** |
(0.01512) | (0.02307) | (0.07385) | (0.01634) | |
SMSA (smsa76r) | 0.16142*** | 0.13132*** | 0.39091*** | 0.18475*** |
(0.01557) | (0.03013) | (0.07788) | (0.01750) | |
Num.Obs. | 3010 | 3010 | 3613 | 3010 |
R2 | 0.291 | 0.225 | 0.481 | 0.187 |
R2 Adj. | 0.289 | 0.224 | 0.480 | 0.185 |
AIC | 2633.4 | 2898.4 | 15205.5 | 3043.1 |
BIC | 2681.5 | 2946.5 | 15255.0 | 3091.2 |
Log.Lik. | -1308.702 | -7594.738 | -1513.546 | |
F | 204.932 | 557.743 | 115.164 | |
RMSE | 0.37 | 0.39 | 1.98 | 0.40 |
OLS does not control for endogeneity and the treatment estimate is not reliable. Controlling for endogeneity via IV leads to a larger estimate at 0.13, indicating 1 extra year of education leads to about 13% increase in wage,but the negative estimate on experience is odd, suggestin remaining endogeneit issue.
# IV regression
iv_model <- ivreg(lwage76 ~ ed76 + exp + exp_sq_100 + black + reg76r + smsa76r |
nearc4a +nearc4b + exp + exp_sq_100 + black + reg76r + smsa76r,
data = Card1995_reg)
summary(iv_model)
##
## Call:
## ivreg(formula = lwage76 ~ ed76 + exp + exp_sq_100 + black + reg76r +
## smsa76r | nearc4a + nearc4b + exp + exp_sq_100 + black +
## reg76r + smsa76r, data = Card1995_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.93985 -0.25152 0.01722 0.27365 1.48154
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.26801 0.68718 4.756 2.07e-06 ***
## ed76 0.16109 0.04077 3.951 7.96e-05 ***
## exp 0.11931 0.01818 6.564 6.16e-11 ***
## exp_sq_100 -0.23054 0.03503 -6.582 5.46e-11 ***
## black -0.10173 0.04531 -2.245 0.0248 *
## reg76r -0.09504 0.02165 -4.389 1.18e-05 ***
## smsa76r 0.11645 0.02705 4.305 1.73e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4108 on 3003 degrees of freedom
## Multiple R-Squared: 0.1447, Adjusted R-squared: 0.143
## Wald test: 111 on 6 and 3003 DF, p-value: < 2.2e-16
first_stage <- lm(ed76 ~ nearc4a + nearc4b + exp + exp_sq_100 + black + reg76r + smsa76r, data = Card1995_reg)
reduced_form <- lm(lwage76 ~ nearc4a + nearc4b + exp + exp_sq_100 + black + reg76r + smsa76r, data = Card1995_reg)
#install.packages("modelsummary")
library(modelsummary)
models <- list(
"OLS" = ols_model,
"IV" = iv_model,
"First-Stage" = first_stage,
"Reduced Form" = reduced_form
)
coef_map <- c(
"ed76" = "Years of Schooling (ed76)",
"nearc4a" = "Public College (nearc4a)",
"nearc4b" = "Private College (nearc4b)",
"exp" = "Experience (exp)",
"exp_sq_100" = "Experience2/100 (exp_sq_100)",
"black" = "Black (black)",
"reg76r" = "South (reg76r)",
"smsa76r" = "SMSA (smsa76r)"
)
modelsummary(models,
stars = TRUE, # Add significance stars
title = "Regression Results: OLS, IV, First-Stage, and Reduced Form",
fmt = 5,
coef_map = coef_map
)
OLS | IV | First-Stage | Reduced Form | |
---|---|---|---|---|
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||||
Years of Schooling (ed76) | 0.07401*** | 0.16109*** | ||
(0.00351) | (0.04077) | |||
Public College (nearc4a) | 0.39599*** | 0.06401*** | ||
(0.08119) | (0.01801) | |||
Private College (nearc4b) | 0.09648 | -0.00011 | ||
(0.09929) | (0.02191) | |||
Experience (exp) | 0.08360*** | 0.11931*** | -0.36039*** | 0.05258*** |
(0.00665) | (0.01818) | (0.03036) | (0.00694) | |
Experience2/100 (exp_sq_100) | -0.22409*** | -0.23054*** | -0.20488 | -0.21464*** |
(0.03178) | (0.03503) | (0.14573) | (0.03399) | |
Black (black) | -0.18963*** | -0.10173* | -1.03976*** | -0.26394*** |
(0.01763) | (0.04531) | (0.08347) | (0.01846) | |
South (reg76r) | -0.12486*** | -0.09504*** | -0.30427*** | -0.13838*** |
(0.01512) | (0.02165) | (0.07414) | (0.01639) | |
SMSA (smsa76r) | 0.16142*** | 0.11645*** | 0.38804*** | 0.18389*** |
(0.01557) | (0.02705) | (0.07778) | (0.01748) | |
Num.Obs. | 3010 | 3010 | 3613 | 3010 |
R2 | 0.291 | 0.145 | 0.483 | 0.190 |
R2 Adj. | 0.289 | 0.143 | 0.482 | 0.188 |
AIC | 2633.4 | 3196.0 | 15196.5 | 3034.6 |
BIC | 2681.5 | 3244.0 | 15252.2 | 3088.7 |
Log.Lik. | -1308.702 | -7589.231 | -1508.323 | |
F | 204.932 | 480.965 | 100.513 | |
RMSE | 0.37 | 0.41 | 1.98 | 0.40 |
Residing near a public university is more relevant to education as shown in first stage results. Endogeneity-controlled regression shows a slightly higher estimate of the treatment effect compared to the single IV result, but the coefficient on experience is still negative.
Yes, it is, and so can be experience as both can be affected by unobserved factors such as ability.
# Create interaction terms
Card1995_reg$nearc4a_age76 <- Card1995_reg$nearc4a * Card1995_reg$age76
Card1995_reg$nearc4a_age76_sq <- Card1995_reg$nearc4a * (Card1995_reg$age76^2)
Card1995_reg$nearc4b_age76 <- Card1995_reg$nearc4b * Card1995_reg$age76
Card1995_reg$nearc4b_age76_sq <- Card1995_reg$nearc4b * (Card1995_reg$age76^2)
# Estimate the 2SLS model with interactions
iv_model_interactions <- ivreg(lwage76 ~ ed76 + exp + exp_sq_100 + black + reg76r + smsa76r |
nearc4a + nearc4b + nearc4a_age76 + nearc4a_age76_sq + nearc4b_age76 + nearc4b_age76_sq + black + reg76r + smsa76r,
data = Card1995_reg)
# Summarize the results
summary(iv_model_interactions)
##
## Call:
## ivreg(formula = lwage76 ~ ed76 + exp + exp_sq_100 + black + reg76r +
## smsa76r | nearc4a + nearc4b + nearc4a_age76 + nearc4a_age76_sq +
## nearc4b_age76 + nearc4b_age76_sq + black + reg76r + smsa76r,
## data = Card1995_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.75022 -0.23674 0.02375 0.24883 1.32243
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.34516 0.29800 14.581 < 2e-16 ***
## ed76 0.11077 0.02548 4.347 1.43e-05 ***
## exp 0.05498 0.02308 2.383 0.017243 *
## exp_sq_100 -0.05962 0.11830 -0.504 0.614337
## black -0.13894 0.04143 -3.353 0.000808 ***
## reg76r -0.11094 0.01938 -5.723 1.15e-08 ***
## smsa76r 0.13284 0.02810 4.727 2.39e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3851 on 3003 degrees of freedom
## Multiple R-Squared: 0.2484, Adjusted R-squared: 0.2469
## Wald test: 156.8 on 6 and 3003 DF, p-value: < 2.2e-16
All estimates are now in the reasonable direction and significant.
# Coefficient on ed76 from IV model
beta_iv <- coef(iv_model)["ed76"]
se_iv <- sqrt(vcov(iv_model)["ed76", "ed76"])
# Coefficient on ed76 from OLS model
beta_ols <- coef(ols_model)["ed76"]
se_ols <- sqrt(vcov(ols_model)["ed76", "ed76"])
diff_beta <- beta_iv-beta_ols
#sigmaless
hausman_statistic_sigmaless <- diff_beta^2 / se_ols^2
# Compute the p-value
p_value_sigmaless <- pchisq(hausman_statistic_sigmaless, df = 1, lower.tail = FALSE)
# Display the results
cat("Hausman Test Statistic (sigmaless):", hausman_statistic_sigmaless, "\n")
## Hausman Test Statistic (sigmaless): 617.1341
cat("p-value (sigmaless):", p_value_sigmaless, "\n")
## p-value (sigmaless): 3.141124e-136
P value suggest we can reject the null hypothesis, meaning ed76 is endogenous.