# (a) Set random seed for reproducibility
set.seed(1)
# (b) Number of observations
N <- 10000
# (c) Draw epsilon_1 and epsilon_2 from N(0, 1)
epsilon_D <- rnorm(N, mean = 0, sd = 1)
epsilon_Y <- rnorm(N, mean = 0, sd = 1)
# (d) Draw U from N(0, 0.5)
U <- rnorm(N, mean = 0, sd = 0.5)
# (e) Create Z = 1(z > 0.5), where z is randomly drawn from a uniform distribution
z <- runif(N, min = 0, max = 1)
Z <- as.numeric(z > 0.5)
# (f) Create D_i
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# (g) Create Y_i
beta_0 <- 3
beta_D <- 2
beta_Z <- 0
beta_U <- 6
Y <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
#Create a data frame for convenience
data <- data.frame(Y = Y, D = D, Z = Z, U = U)
##
## Call:
## lm(formula = Y ~ D, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2473 -2.0084 -0.1544 1.9659 9.2807
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.02336 0.03642 55.56 <2e-16 ***
## D 4.72039 0.06021 78.40 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.9 on 9998 degrees of freedom
## Multiple R-squared: 0.3807, Adjusted R-squared: 0.3807
## F-statistic: 6146 on 1 and 9998 DF, p-value: < 2.2e-16
##
## Call:
## ivreg(formula = Y ~ D | Z, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.14794 -2.10898 0.01567 2.11343 10.84223
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.92404 0.04738 61.71 <2e-16 ***
## D 2.25817 0.09716 23.24 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.133 on 9998 degrees of freedom
## Multiple R-Squared: 0.2771, Adjusted R-squared: 0.2771
## Wald test: 540.1 on 1 and 9998 DF, p-value: < 2.2e-16
# Run OLS regression of D on Z (with a constant)
ols_D_on_Z <- lm(D ~ Z, data = data)
# Predicted value of D (Dhat)
Dhat <- fitted(ols_D_on_Z)
# Residual of D (Dres)
Dres <- resid(ols_D_on_Z)
# Run OLS regression of Y on Dhat
ols_Y_on_Dhat <- lm(Y ~ Dhat, data = data)
summary(ols_Y_on_Dhat)
##
## Call:
## lm(formula = Y ~ Dhat, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.614 -2.516 0.199 2.388 12.973
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.92404 0.05463 53.52 <2e-16 ***
## Dhat 2.25817 0.11203 20.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.612 on 9998 degrees of freedom
## Multiple R-squared: 0.03905, Adjusted R-squared: 0.03896
## F-statistic: 406.3 on 1 and 9998 DF, p-value: < 2.2e-16
# Run OLS regression of Y on D and Dres
ols_Y_on_D_Dres <- lm(Y ~ D + Dres, data = data)
summary(ols_Y_on_D_Dres)
##
## Call:
## lm(formula = Y ~ D + Dres, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.9638 -1.7894 0.0297 1.8385 9.4420
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.92404 0.04077 71.72 <2e-16 ***
## D 2.25817 0.08360 27.01 <2e-16 ***
## Dres 4.46237 0.11255 39.65 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.696 on 9997 degrees of freedom
## Multiple R-squared: 0.4649, Adjusted R-squared: 0.4648
## F-statistic: 4342 on 2 and 9997 DF, p-value: < 2.2e-16
# Change the DGP to beta_Z = 1
beta_Z <- 1
Y_new <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# Re-run IV regression with Z as the IV for D
iv_model_new <- ivreg(Y_new ~ D | Z)
summary(iv_model_new)
##
## Call:
## ivreg(formula = Y_new ~ D | Z)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.08396 -1.92037 -0.00676 1.95614 10.35569
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.86006 0.04371 65.44 <2e-16 ***
## D 3.80869 0.08963 42.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.89 on 9998 degrees of freedom
## Multiple R-Squared: 0.4263, Adjusted R-squared: 0.4262
## Wald test: 1806 on 1 and 9998 DF, p-value: < 2.2e-16
# Change the DGP to beta_Z = -1
beta_Z <- -1
Y_new_neg <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# Re-run IV regression with Z as the IV for D
iv_model_neg <- ivreg(Y_new_neg ~ D | Z)
summary(iv_model_neg)
##
## Call:
## ivreg(formula = Y_new_neg ~ D | Z)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.2119 -2.3955 0.1247 2.2961 12.2946
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.98802 0.05216 57.287 < 2e-16 ***
## D 0.70764 0.10696 6.616 3.88e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.449 on 9998 degrees of freedom
## Multiple R-Squared: 0.09203, Adjusted R-squared: 0.09194
## Wald test: 43.77 on 1 and 9998 DF, p-value: 3.881e-11
# Baseline case (alpha_Z = 5 and beta_Z = 1)
alpha_Z <- 5
beta_Z <- 1
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_baseline <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# IV regression for baseline case
iv_model_baseline <- ivreg(Y_baseline ~ D | Z)
summary(iv_model_baseline)
##
## Call:
## ivreg(formula = Y_baseline ~ D | Z)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.08396 -1.92037 -0.00676 1.95614 10.35569
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.86006 0.04371 65.44 <2e-16 ***
## D 3.80869 0.08963 42.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.89 on 9998 degrees of freedom
## Multiple R-Squared: 0.4263, Adjusted R-squared: 0.4262
## Wald test: 1806 on 1 and 9998 DF, p-value: < 2.2e-16
# New case (alpha_Z = 10 and beta_Z = 1)
alpha_Z <- 10
D_new <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_new <- beta_0 + beta_D * D_new + beta_Z * Z + beta_U * U + epsilon_Y
# IV regression for new case
iv_model_new <- ivreg(Y_new ~ D_new | Z)
summary(iv_model_new)
##
## Call:
## ivreg(formula = Y_new ~ D_new | Z)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.10825 -2.14575 0.05882 2.16869 10.92009
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.88435 0.04642 62.13 <2e-16 ***
## D_new 3.22000 0.06556 49.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.134 on 9998 degrees of freedom
## Multiple R-Squared: 0.2509, Adjusted R-squared: 0.2508
## Wald test: 2412 on 1 and 9998 DF, p-value: < 2.2e-16
# Parameters
N <- 10000
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
# Simulate variables
Z <- rbinom(N, 1, 0.5) # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5) # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1) # Error term
# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)
# Calculate D when Z = 1
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# Compliers: D changes from 0 to 1 when Z changes from 0 to 1
compliers <- sum(D_Z0 == 0 & D_Z1 == 1) / N
# Always-takers: D = 1 regardless of Z
always_takers <- sum(D_Z0 == 1 & D_Z1 == 1) / N
# Never-takers: D = 0 regardless of Z
never_takers <- sum(D_Z0 == 0 & D_Z1 == 0) / N
# Output the proportions
list(
compliers = compliers,
always_takers = always_takers,
never_takers = never_takers
)
## $compliers
## [1] 0.3172
##
## $always_takers
## [1] 0.0369
##
## $never_takers
## [1] 0.6459
# Parameters for the new case (alpha_Z = 10)
alpha_Z <- 10
alpha_0 <- -4
alpha_U <- 4
N <- 10000
# Simulate variables
Z <- rbinom(N, 1, 0.5) # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5) # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1) # Error term
# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)
# Calculate D when Z = 1 (with alpha_Z = 10)
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# Proportions
compliers_new <- sum(D_Z0 == 0 & D_Z1 == 1) / N
always_takers_new <- sum(D_Z0 == 1 & D_Z1 == 1) / N
never_takers_new <- sum(D_Z0 == 0 & D_Z1 == 0) / N
# Output the proportions
list(
compliers_new = compliers_new,
always_takers_new = always_takers_new,
never_takers_new = never_takers_new
)
## $compliers_new
## [1] 0.4801
##
## $always_takers_new
## [1] 0.0379
##
## $never_takers_new
## [1] 0.482
# Set the random seed to 2 for reproducibility
set.seed(2)
# (b) Number of observations
N <- 10000
# (c) Draw epsilon_1 and epsilon_2 from N(0, 1)
epsilon_D <- rnorm(N, mean = 0, sd = 1)
epsilon_Y <- rnorm(N, mean = 0, sd = 1)
# (d) Draw U from N(0, 0.5)
U <- rnorm(N, mean = 0, sd = 0.5)
# (e) Create Z = 1(z > 0.5), where z is randomly drawn from a uniform distribution
z <- runif(N, min = 0, max = 1)
Z <- as.numeric(z > 0.5)
# (f) Create D_i
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# (g) Create Y_i
beta_0 <- 3
beta_D <- 2
beta_Z <- 0
beta_U <- 6
Y <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
#Create a data frame for convenience
data <- data.frame(Y = Y, D = D, Z = Z, U = U)
# Run OLS regression
ols_model <- lm(Y ~ D, data = data)
summary(ols_model)
# export latex
xtable::xtable(ols_model)
# Run IV regression
iv_model <- ivreg(Y ~ D | Z, data = data)
summary(iv_model)
iv_summary <- summary(iv_model)$coefficients
# export latex
xtable::xtable(iv_summary)
# Run OLS regression of D on Z (with a constant)
ols_D_on_Z <- lm(D ~ Z, data = data)
# Predicted value of D (Dhat)
Dhat <- fitted(ols_D_on_Z)
# Residual of D (Dres)
Dres <- resid(ols_D_on_Z)
# Run OLS regression of Y on Dhat
ols_Y_on_Dhat <- lm(Y ~ Dhat, data = data)
summary(ols_Y_on_Dhat)
# Run OLS regression of Y on D and Dres
ols_Y_on_D_Dres <- lm(Y ~ D + Dres, data = data)
summary(ols_Y_on_D_Dres)
# export latex
xtable::xtable(ols_Y_on_Dhat)
# export latex
xtable::xtable(ols_Y_on_D_Dres)
# Change the DGP to beta_Z = 1
beta_Z <- 1
Y_new <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# Re-run IV regression with Z as the IV for D
iv_model_new <- ivreg(Y_new ~ D | Z)
summary(iv_model_new)
iv_reg1<- summary(iv_model_new)$coefficients
# Change the DGP to beta_Z = -1
beta_Z <- -1
Y_new_neg <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# Re-run IV regression with Z as the IV for D
iv_model_neg <- ivreg(Y_new_neg ~ D | Z)
iv_reg2<- summary(iv_model_neg)$coefficients
# export latex
xtable::xtable(iv_reg1)
# export latex
xtable::xtable(iv_reg2)
# Baseline case (alpha_Z = 5 and beta_Z = 1)
alpha_Z <- 5
beta_Z <- 1
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_baseline <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# IV regression for baseline case
iv_model_baseline <- ivreg(Y_baseline ~ D | Z)
summary(iv_model_baseline)
iv_baseline <- summary(iv_model_baseline)$coefficients
# New case (alpha_Z = 10 and beta_Z = 1)
alpha_Z <- 10
D_new <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_new <- beta_0 + beta_D * D_new + beta_Z * Z + beta_U * U + epsilon_Y
# IV regression for new case
iv_model_new <- ivreg(Y_new ~ D_new | Z)
summary(iv_model_new)
iv_new <- summary(iv_model_new)$coefficients
# export latex
xtable::xtable(iv_baseline)
# export latex
xtable::xtable(iv_new)
# Parameters
N <- 10000
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
# Simulate variables
Z <- rbinom(N, 1, 0.5) # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5) # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1) # Error term
# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)
# Calculate D when Z = 1
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# Compliers: D changes from 0 to 1 when Z changes from 0 to 1
compliers <- sum(D_Z0 == 0 & D_Z1 == 1) / N
# Always-takers: D = 1 regardless of Z
always_takers <- sum(D_Z0 == 1 & D_Z1 == 1) / N
# Never-takers: D = 0 regardless of Z
never_takers <- sum(D_Z0 == 0 & D_Z1 == 0) / N
# Output the proportions
list(
compliers = compliers,
always_takers = always_takers,
never_takers = never_takers
)
# Parameters for the new case (alpha_Z = 10)
alpha_Z <- 10
alpha_0 <- -4
alpha_U <- 4
N <- 10000
# Simulate variables
Z <- rbinom(N, 1, 0.5) # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5) # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1) # Error term
# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)
# Calculate D when Z = 1 (with alpha_Z = 10)
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# Proportions
compliers_new <- sum(D_Z0 == 0 & D_Z1 == 1) / N
always_takers_new <- sum(D_Z0 == 1 & D_Z1 == 1) / N
never_takers_new <- sum(D_Z0 == 0 & D_Z1 == 0) / N
# Output the proportions
list(
compliers_new = compliers_new,
always_takers_new = always_takers_new,
never_takers_new = never_takers_new
)