First Stages
first_stage_1 <- ggplot(avg) +
geom_point(aes(x = Age, y = avg_ninsurance)) +
geom_vline(xintercept = 65, linetype = "dashed", color = "red") +
labs(
title = "Number of Insurance by Age",
x = "Age",
y = "Average Number of Insurance Policy"
)+
theme_minimal()
print(first_stage_1)

# export
ggsave("first_stage_1.png", plot = first_stage_1, width = 6, height = 4, units = "in", dpi = 300)
first_stage2 <- ggplot(avg) +
geom_point(aes(x = Age, y = avg_one_or_more_policies, color = "One or More Policies")) +
geom_point(aes(x = Age, y = avg_two_or_more_policies, color = "Two or More Policies")) +
geom_vline(xintercept = 65, linetype = "dashed", color = "red") +
labs(
title = "Insurance Policies by Age",
x = "Age",
y = "Proportion",
color = ""
) +
theme_minimal()
print(first_stage2)

# export
ggsave("first_stage2.png", plot = first_stage2, width = 6, height = 4, units = "in", dpi = 300)
2SLS
data_2sls <- data %>%
mutate(
Z = ifelse(Age >= 65, 1, 0), # Indicator for age >= 65
AgeZ = Age * Z # Interaction term
)
# Triangular Kernel Function
triangular_kernel <- function(distance, bandwidth) {
weight <- 1 - abs(distance / bandwidth)
weight[weight < 0] <- 0
return(weight)
}
# Compute weights
threshold <- 65
bandwidth <- 5
data_2sls <- data_2sls %>%
mutate(
distance = Age - threshold,
kernel_weight = triangular_kernel(distance, bandwidth)
)
data_2sls <- data_2sls %>%
filter(Age >= 60 & Age <= 70)
model <- ivreg(DelayCare ~ Ninsurance + Age + AgeZ | Z + Age + AgeZ,
data = data_2sls,
weights = kernel_weight)
summary(model, robust = TRUE)
##
## Call:
## ivreg(formula = DelayCare ~ Ninsurance + Age + AgeZ | Z + Age +
## AgeZ, data = data_2sls, weights = kernel_weight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.14529 -0.06163 -0.03621 -0.01874 0.96012
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3673161 0.0945004 3.887 0.000102 ***
## Ninsurance -0.0527078 0.0795669 -0.662 0.507699
## Age -0.0037655 0.0023980 -1.570 0.116369
## AgeZ 0.0003498 0.0006307 0.555 0.579196
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.177 on 29660 degrees of freedom
## Multiple R-Squared: 0.02028, Adjusted R-squared: -0.03198
## Wald test: 24.47 on 3 and 29660 DF, p-value: 8.305e-16
# export to Latex
stargazer(model, type = "latex", title = "2SLS Regression Results",
align = TRUE, digits =3, out = "2SLS_results.tex")
##
## % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
## % Date and time: Fri, Dec 06, 2024 - 11:55:29 AM
## % Requires LaTeX packages: dcolumn
## \begin{table}[!htbp] \centering
## \caption{2SLS Regression Results}
## \label{}
## \begin{tabular}{@{\extracolsep{5pt}}lD{.}{.}{-3} }
## \\[-1.8ex]\hline
## \hline \\[-1.8ex]
## & \multicolumn{1}{c}{\textit{Dependent variable:}} \\
## \cline{2-2}
## \\[-1.8ex] & \multicolumn{1}{c}{DelayCare} \\
## \hline \\[-1.8ex]
## Ninsurance & -0.053 \\
## & (0.080) \\
## & \\
## Age & -0.004 \\
## & (0.002) \\
## & \\
## AgeZ & 0.0003 \\
## & (0.001) \\
## & \\
## Constant & 0.367^{***} \\
## & (0.095) \\
## & \\
## \hline \\[-1.8ex]
## Observations & \multicolumn{1}{c}{29,664} \\
## R$^{2}$ & \multicolumn{1}{c}{0.020} \\
## Adjusted R$^{2}$ & \multicolumn{1}{c}{-0.032} \\
## Residual Std. Error & \multicolumn{1}{c}{0.177 (df = 29660)} \\
## \hline
## \hline \\[-1.8ex]
## \textit{Note:} & \multicolumn{1}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\
## \end{tabular}
## \end{table}
Changing h
# Triangular kernel function
triangular_kernel <- function(distance, bandwidth) {
weight <- 1 - abs(distance / bandwidth)
weight[weight < 0] <- 0
return(weight)
}
# Update kernel weights for h = 3
data_h3 <- data_2sls %>%
mutate(
kernel_weight_h3 = triangular_kernel(distance, 3)
) %>%
filter(abs(distance) <= 3) # Filter for the bandwidth
# Update kernel weights for h = 7
data_h7 <- data_2sls %>%
mutate(
kernel_weight_h7 = triangular_kernel(distance, 7)
) %>%
filter(abs(distance) <= 7) # Filter for the bandwidth
# 2SLS regression for h = 3
model_h3 <- ivreg(DelayCare ~ Ninsurance + Age + AgeZ | Z + Age + AgeZ,
data = data_h3,
weights = kernel_weight_h3)
# 2SLS regression for h = 7
model_h7 <- ivreg(DelayCare ~ Ninsurance + Age + AgeZ | Z + Age + AgeZ,
data = data_h7,
weights = kernel_weight_h7)
stargazer(model_h3, model_h7,
type = "latex",
title = "2SLS Regression Results for Different Bandwidths",
align = TRUE,
digits = 3,
column.labels = c("H = 3", "H = 7"),
out = "2SLS_combined_results.tex")
##
## % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
## % Date and time: Fri, Dec 06, 2024 - 11:55:30 AM
## % Requires LaTeX packages: dcolumn
## \begin{table}[!htbp] \centering
## \caption{2SLS Regression Results for Different Bandwidths}
## \label{}
## \begin{tabular}{@{\extracolsep{5pt}}lD{.}{.}{-3} D{.}{.}{-3} }
## \\[-1.8ex]\hline
## \hline \\[-1.8ex]
## & \multicolumn{2}{c}{\textit{Dependent variable:}} \\
## \cline{2-3}
## \\[-1.8ex] & \multicolumn{2}{c}{DelayCare} \\
## & \multicolumn{1}{c}{H = 3} & \multicolumn{1}{c}{H = 7} \\
## \\[-1.8ex] & \multicolumn{1}{c}{(1)} & \multicolumn{1}{c}{(2)}\\
## \hline \\[-1.8ex]
## Ninsurance & -0.105 & -0.106 \\
## & (0.081) & (0.091) \\
## & & \\
## Age & -0.002 & -0.002 \\
## & (0.005) & (0.002) \\
## & & \\
## AgeZ & 0.001 & 0.001 \\
## & (0.001) & (0.001) \\
## & & \\
## Constant & 0.313 & 0.323^{***} \\
## & (0.238) & (0.069) \\
## & & \\
## \hline \\[-1.8ex]
## Observations & \multicolumn{1}{c}{17,246} & \multicolumn{1}{c}{31,243} \\
## R$^{2}$ & \multicolumn{1}{c}{0.007} & \multicolumn{1}{c}{0.018} \\
## Adjusted R$^{2}$ & \multicolumn{1}{c}{-0.082} & \multicolumn{1}{c}{0.018} \\
## Residual Std. Error & \multicolumn{1}{c}{0.181 (df = 17242)} & \multicolumn{1}{c}{0.197 (df = 31239)} \\
## \hline
## \hline \\[-1.8ex]
## \textit{Note:} & \multicolumn{2}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\
## \end{tabular}
## \end{table}
Replicate Figure 2 in Caetano, Caetano, Escanciano (2023)
avg_white<- data %>%
filter(Minority==0) %>%
group_by(Age) %>%
summarize(
avg_outcome = mean(DelayCare, na.rm = TRUE),
avg_ninsurance = mean(Ninsurance, na.rm = TRUE),
avg_one_or_more_policies = mean(one_or_more_policies, na.rm = TRUE),
avg_two_or_more_policies = mean(two_or_more_policies, na.rm = TRUE)
)
cce_fig2a <- ggplot(avg_white) +
geom_point(aes(x = Age, y = avg_one_or_more_policies, color = "One or More Policies")) +
geom_point(aes(x = Age, y = avg_two_or_more_policies, color = "Two or More Policies")) +
geom_vline(xintercept = 65, linetype = "dashed", color = "red") +
labs(
title = "White",
x = "Age",
y = "Proportion",
color = ""
) +
theme_minimal()
print(cce_fig2a)

# export
ggsave("cce_fig2a.png", plot = cce_fig2a, width = 6, height = 4, units = "in", dpi = 300)
avg_minor<- data %>%
filter(Minority==1) %>%
group_by(Age) %>%
summarize(
avg_outcome = mean(DelayCare, na.rm = TRUE),
avg_ninsurance = mean(Ninsurance, na.rm = TRUE),
avg_one_or_more_policies = mean(one_or_more_policies, na.rm = TRUE),
avg_two_or_more_policies = mean(two_or_more_policies, na.rm = TRUE)
)
cce_fig2b <- ggplot(avg_minor) +
geom_point(aes(x = Age, y = avg_one_or_more_policies, color = "One or More Policies")) +
geom_point(aes(x = Age, y = avg_two_or_more_policies, color = "Two or More Policies")) +
geom_vline(xintercept = 65, linetype = "dashed", color = "red") +
labs(
title = "Minority",
x = "Age",
y = "Proportion",
color = ""
) +
theme_minimal()
print(cce_fig2b)

# export
ggsave("cce_fig2b.png", plot = cce_fig2b, width = 6, height = 4, units = "in", dpi = 300)
outcome_minor <- ggplot(avg_minor) +
geom_point(aes(x = Age, y = avg_outcome)) +
geom_vline(xintercept = 65, linetype = "dashed", color = "red") +
labs(
title = "Minority",
x = "Age",
y = "Proportion of Delay Care",
color = ""
) +
theme_minimal()
print(outcome_minor)

# export
ggsave("outcome_minor.png", plot = outcome_minor, width = 6, height = 4, units = "in", dpi = 300)
outcome_white <- ggplot(avg_white) +
geom_point(aes(x = Age, y = avg_outcome)) +
geom_vline(xintercept = 65, linetype = "dashed", color = "red") +
labs(
title = "White",
x = "Age",
y = "Proportion of Delay Care",
color = ""
) +
theme_minimal()
print(outcome_white)

# export
ggsave("outcome_white.png", plot = outcome_white, width = 6, height = 4, units = "in", dpi = 300)
data <- data %>%
mutate(Z = ifelse(Age >= 65, 1, 0),
AgeMin = Age * Minority, # Interaction between age and minority status
AgeZ = Age * Z, # Interaction between age and Z
MinZ = Minority * Z, # Interaction between minority and Z
AgeMinZ = Age * Minority * Z, # Interaction between age, minority, and Z
Oneins = one_or_more_policies,
Twoins = two_or_more_policies
)
# Define the triangular kernel function
triangular_kernel <- function(distance, bandwidth) {
weight <- 1 - abs(distance / bandwidth)
weight[weight < 0] <- 0
return(weight)
}
# Calculate distance from the threshold and restrict observations
bandwidth <- 5
data <- data %>%
mutate(
distance = Age - 65, # Distance from the threshold (age 65)
kernel_weight = triangular_kernel(distance, bandwidth)
) %>%
filter(abs(distance) <= bandwidth) # Restrict to the bandwidth
# 2SLS regression
model_2sls_update <- ivreg(
DelayCare ~ Oneins + Twoins + Age + AgeZ + Minority + MinZ + AgeMin + AgeMinZ |
Z + MinZ + Age + AgeZ + Minority + AgeMin + AgeMinZ,
data = data,
weights = kernel_weight
)
# Summary with robust standard errors
summary(model_2sls_update, robust = TRUE)
##
## Call:
## ivreg(formula = DelayCare ~ Oneins + Twoins + Age + AgeZ + Minority +
## MinZ + AgeMin + AgeMinZ | Z + MinZ + Age + AgeZ + Minority +
## AgeMin + AgeMinZ, data = data, weights = kernel_weight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.230606 -0.111919 -0.034683 0.002359 1.351753
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.197892 0.388638 -0.509 0.6106
## Oneins 0.516990 0.338698 1.526 0.1269
## Twoins -0.195697 0.140815 -1.390 0.1646
## Age -0.002919 0.003308 -0.882 0.3776
## AgeZ 0.001049 0.001021 1.027 0.3044
## Minority 0.008962 0.247767 0.036 0.9711
## MinZ -0.121250 0.055348 -2.191 0.0285 *
## AgeMin 0.001506 0.003574 0.421 0.6735
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2201 on 29656 degrees of freedom
## Multiple R-Squared: -0.5145, Adjusted R-squared: -0.5955
## Wald test: 10.74 on 7 and 29656 DF, p-value: 1.36e-13
#
stargazer(model_2sls_update, type = "latex", title = "2SLS Regression Results",
align = TRUE, digits =3, out = "2SLS_results_update.tex")
##
## % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
## % Date and time: Fri, Dec 06, 2024 - 11:55:33 AM
## % Requires LaTeX packages: dcolumn
## \begin{table}[!htbp] \centering
## \caption{2SLS Regression Results}
## \label{}
## \begin{tabular}{@{\extracolsep{5pt}}lD{.}{.}{-3} }
## \\[-1.8ex]\hline
## \hline \\[-1.8ex]
## & \multicolumn{1}{c}{\textit{Dependent variable:}} \\
## \cline{2-2}
## \\[-1.8ex] & \multicolumn{1}{c}{DelayCare} \\
## \hline \\[-1.8ex]
## Oneins & 0.517 \\
## & (0.339) \\
## & \\
## Twoins & -0.196 \\
## & (0.141) \\
## & \\
## Age & -0.003 \\
## & (0.003) \\
## & \\
## AgeZ & 0.001 \\
## & (0.001) \\
## & \\
## Minority & 0.009 \\
## & (0.248) \\
## & \\
## MinZ & -0.121^{**} \\
## & (0.055) \\
## & \\
## AgeMin & 0.002 \\
## & (0.004) \\
## & \\
## AgeMinZ & \\
## & \\
## & \\
## Constant & -0.198 \\
## & (0.389) \\
## & \\
## \hline \\[-1.8ex]
## Observations & \multicolumn{1}{c}{29,664} \\
## R$^{2}$ & \multicolumn{1}{c}{-0.515} \\
## Adjusted R$^{2}$ & \multicolumn{1}{c}{-0.596} \\
## Residual Std. Error & \multicolumn{1}{c}{0.220 (df = 29656)} \\
## \hline
## \hline \\[-1.8ex]
## \textit{Note:} & \multicolumn{1}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\
## \end{tabular}
## \end{table}
data <- data %>%
mutate(
Q1 = ifelse(Minority == 0 & Education == 2, 1, 0), # White High School
Q2 = ifelse(Minority == 0 & Education == 3, 1, 0), # White College
Q3 = ifelse(Minority == 1 & Education == 3, 1, 0), # Minority High School
Q4 = ifelse(Minority == 1 & Education == 2, 1, 0) # Minority College
)
# Create interaction variables
data <- data %>%
mutate(
AgeQ1 = Age * Q1,
AgeQ2 = Age * Q2,
AgeQ3 = Age * Q3,
AgeQ4 = Age * Q4,
AgeZ = Age * Z,
Q1Z = Q1 * Z,
Q2Z = Q2 * Z,
Q3Z = Q3 * Z,
Q4Z = Q4 * Z,
AgeQ1Z = Age * Q1 * Z,
AgeQ2Z = Age * Q2 * Z,
AgeQ3Z = Age * Q3 * Z,
AgeQ4Z = Age * Q4 * Z
)
# Define triangular kernel and restrict observations
bandwidth <- 5
data <- data %>%
mutate(
distance = Age - 65, # Distance from the threshold
kernel_weight = triangular_kernel(distance, bandwidth)
) %>%
filter(abs(distance) <= bandwidth) # Filter within bandwidth
# Run 2SLS regression
model_2sls_last <- ivreg(
DelayCare ~ Oneins + Twoins + Age + AgeZ + Q1 + Q2 + Q3 + Q4 + AgeQ1 + AgeQ2 + AgeQ3 + AgeQ4 + AgeQ1Z + AgeQ2Z + AgeQ3Z + AgeQ4Z |
Z + Q1Z + Q2Z + Q3Z + Q4Z + AgeZ + AgeQ1Z + AgeQ2Z + AgeQ3Z + AgeQ4Z,
data = data,
weights = kernel_weight
)
# Summary with robust standard errors
summary(model_2sls_last, robust = TRUE)
##
## Call:
## ivreg(formula = DelayCare ~ Oneins + Twoins + Age + AgeZ + Q1 +
## Q2 + Q3 + Q4 + AgeQ1 + AgeQ2 + AgeQ3 + AgeQ4 + AgeQ1Z + AgeQ2Z +
## AgeQ3Z + AgeQ4Z | Z + Q1Z + Q2Z + Q3Z + Q4Z + AgeZ + AgeQ1Z +
## AgeQ2Z + AgeQ3Z + AgeQ4Z, data = data, weights = kernel_weight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.43420 -0.11499 0.04829 0.15160 1.23941
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.275908 1.545646 0.825 0.409
## Oneins -1.655815 3.179767 -0.521 0.603
## Twoins 0.426839 0.682997 0.625 0.532
## Age 0.002727 0.022959 0.119 0.905
## AgeZ -0.000523 0.002200 -0.238 0.812
## Q1 1.819503 2.869902 0.634 0.526
## Q2 1.544483 2.554145 0.605 0.545
## Q3 -0.002750 0.039953 -0.069 0.945
## Q4 0.001130 0.067207 0.017 0.987
## AgeQ1 -0.027807 0.042616 -0.653 0.514
## AgeQ2 -0.023741 0.037828 -0.628 0.530
##
## Residual standard error: 0.336 on 29653 degrees of freedom
## Multiple R-Squared: -2.529, Adjusted R-squared: -2.718
## Wald test: 3.351 on 10 and 29653 DF, p-value: 0.0002249
# Export to Latex
stargazer(model_2sls_last, type = "latex", title = "2SLS Regression Results",
align = TRUE, digits =3, out = "2SLS_results_last.tex")
##
## % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
## % Date and time: Fri, Dec 06, 2024 - 11:55:34 AM
## % Requires LaTeX packages: dcolumn
## \begin{table}[!htbp] \centering
## \caption{2SLS Regression Results}
## \label{}
## \begin{tabular}{@{\extracolsep{5pt}}lD{.}{.}{-3} }
## \\[-1.8ex]\hline
## \hline \\[-1.8ex]
## & \multicolumn{1}{c}{\textit{Dependent variable:}} \\
## \cline{2-2}
## \\[-1.8ex] & \multicolumn{1}{c}{DelayCare} \\
## \hline \\[-1.8ex]
## Oneins & -1.656 \\
## & (3.180) \\
## & \\
## Twoins & 0.427 \\
## & (0.683) \\
## & \\
## Age & 0.003 \\
## & (0.023) \\
## & \\
## AgeZ & -0.001 \\
## & (0.002) \\
## & \\
## Q1 & 1.820 \\
## & (2.870) \\
## & \\
## Q2 & 1.544 \\
## & (2.554) \\
## & \\
## Q3 & -0.003 \\
## & (0.040) \\
## & \\
## Q4 & 0.001 \\
## & (0.067) \\
## & \\
## AgeQ1 & -0.028 \\
## & (0.043) \\
## & \\
## AgeQ2 & -0.024 \\
## & (0.038) \\
## & \\
## AgeQ3 & \\
## & \\
## & \\
## AgeQ4 & \\
## & \\
## & \\
## AgeQ1Z & \\
## & \\
## & \\
## AgeQ2Z & \\
## & \\
## & \\
## AgeQ3Z & \\
## & \\
## & \\
## AgeQ4Z & \\
## & \\
## & \\
## Constant & 1.276 \\
## & (1.546) \\
## & \\
## \hline \\[-1.8ex]
## Observations & \multicolumn{1}{c}{29,664} \\
## R$^{2}$ & \multicolumn{1}{c}{-2.529} \\
## Adjusted R$^{2}$ & \multicolumn{1}{c}{-2.718} \\
## Residual Std. Error & \multicolumn{1}{c}{0.336 (df = 29653)} \\
## \hline
## \hline \\[-1.8ex]
## \textit{Note:} & \multicolumn{1}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\
## \end{tabular}
## \end{table}