# (a) Set random seed for reproducibility
set.seed(2)
# (b) Number of observations
N <- 10000
# (c) Draw epsilon_1 and epsilon_2 from N(0, 1)
epsilon_D <- rnorm(N, mean = 0, sd = 1)
epsilon_Y <- rnorm(N, mean = 0, sd = 1)
# (d) Draw U from N(0, 0.5)
U <- rnorm(N, mean = 0, sd = 0.5)
# (e) Create Z = 1(z > 0.5), where z is randomly drawn from a uniform distribution
z <- runif(N, min = 0, max = 1)
Z <- as.numeric(z > 0.5)
# (f) Create D_i
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# (g) Create Y_i
beta_0 <- 3
beta_D <- 2
beta_Z <- 0
beta_U <- 6
Y <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
#Create a data frame for convenience
data <- data.frame(Y = Y, D = D, Z = Z, U = U)
##
## Call:
## lm(formula = Y ~ D, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1670 -2.0674 -0.1108 1.9913 10.4347
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.05792 0.03628 56.72 <2e-16 ***
## D 4.64392 0.06083 76.34 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.912 on 9998 degrees of freedom
## Multiple R-squared: 0.3683, Adjusted R-squared: 0.3682
## F-statistic: 5828 on 1 and 9998 DF, p-value: < 2.2e-16
##
## Call:
## ivreg(formula = Y ~ D | Z, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.07161 -2.13429 -0.02852 2.17754 12.07252
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.96252 0.04739 62.52 <2e-16 ***
## D 2.10146 0.09933 21.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.156 on 9998 degrees of freedom
## Multiple R-Squared: 0.2579, Adjusted R-squared: 0.2578
## Wald test: 447.6 on 1 and 9998 DF, p-value: < 2.2e-16
# Run OLS regression of D on Z (with a constant)
ols_D_on_Z <- lm(D ~ Z, data = data)
# Predicted value of D (Dhat)
Dhat <- fitted(ols_D_on_Z)
# Residual of D (Dres)
Dres <- resid(ols_D_on_Z)
# Run OLS regression of Y on Dhat
ols_Y_on_Dhat <- lm(Y ~ Dhat, data = data)
summary(ols_Y_on_Dhat)
##
## Call:
## lm(formula = Y ~ Dhat, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.4818 -2.4946 0.1184 2.4921 13.0604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.96252 0.05408 54.77 <2e-16 ***
## Dhat 2.10146 0.11338 18.54 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.603 on 9998 degrees of freedom
## Multiple R-squared: 0.03322, Adjusted R-squared: 0.03312
## F-statistic: 343.6 on 1 and 9998 DF, p-value: < 2.2e-16
# Run OLS regression of Y on D and Dres
ols_Y_on_D_Dres <- lm(Y ~ D + Dres, data = data)
summary(ols_Y_on_D_Dres)
##
## Call:
## lm(formula = Y ~ D + Dres, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2498 -1.8127 0.0082 1.8597 10.5775
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.96252 0.04060 72.96 <2e-16 ***
## D 2.10146 0.08511 24.69 <2e-16 ***
## Dres 4.54446 0.11379 39.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.705 on 9997 degrees of freedom
## Multiple R-squared: 0.4552, Adjusted R-squared: 0.4551
## F-statistic: 4176 on 2 and 9997 DF, p-value: < 2.2e-16
# Change the DGP to beta_Z = 1
beta_Z <- 1
Y_new <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# Re-run IV regression with Z as the IV for D
iv_model_new <- ivreg(Y_new ~ D | Z)
summary(iv_model_new)
##
## Call:
## ivreg(formula = Y_new ~ D | Z)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.35525 -1.95761 -0.02863 1.97623 11.55490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.90669 0.04363 66.62 <2e-16 ***
## D 3.67491 0.09146 40.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.906 on 9998 degrees of freedom
## Multiple R-Squared: 0.4111, Adjusted R-squared: 0.4111
## Wald test: 1614 on 1 and 9998 DF, p-value: < 2.2e-16
# Change the DGP to beta_Z = -1
beta_Z <- -1
Y_new_neg <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# Re-run IV regression with Z as the IV for D
iv_model_neg <- ivreg(Y_new_neg ~ D | Z)
summary(iv_model_neg)
##
## Call:
## ivreg(formula = Y_new_neg ~ D | Z)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.12745 -2.41072 0.07622 2.40888 12.59013
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.01835 0.05225 57.764 < 2e-16 ***
## D 0.52802 0.10954 4.821 1.45e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.481 on 9998 degrees of freedom
## Multiple R-Squared: 0.06862, Adjusted R-squared: 0.06853
## Wald test: 23.24 on 1 and 9998 DF, p-value: 1.453e-06
# Baseline case (alpha_Z = 5 and beta_Z = 1)
alpha_Z <- 5
beta_Z <- 1
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_baseline <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# IV regression for baseline case
iv_model_baseline <- ivreg(Y_baseline ~ D | Z)
summary(iv_model_baseline)
##
## Call:
## ivreg(formula = Y_baseline ~ D | Z)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.35525 -1.95761 -0.02863 1.97623 11.55490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.90669 0.04363 66.62 <2e-16 ***
## D 3.67491 0.09146 40.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.906 on 9998 degrees of freedom
## Multiple R-Squared: 0.4111, Adjusted R-squared: 0.4111
## Wald test: 1614 on 1 and 9998 DF, p-value: < 2.2e-16
# New case (alpha_Z = 10 and beta_Z = 1)
alpha_Z <- 10
D_new <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_new <- beta_0 + beta_D * D_new + beta_Z * Z + beta_U * U + epsilon_Y
# IV regression for new case
iv_model_new <- ivreg(Y_new ~ D_new | Z)
summary(iv_model_new)
##
## Call:
## ivreg(formula = Y_new ~ D_new | Z)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.142994 -2.137132 -0.006763 2.200596 12.102601
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.92684 0.04624 63.29 <2e-16 ***
## D_new 3.10706 0.06529 47.59 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.139 on 9998 degrees of freedom
## Multiple R-Squared: 0.233, Adjusted R-squared: 0.2329
## Wald test: 2265 on 1 and 9998 DF, p-value: < 2.2e-16
# Parameters
N <- 10000
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
# Simulate variables
Z <- rbinom(N, 1, 0.5) # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5) # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1) # Error term
# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)
# Calculate D when Z = 1
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# Compliers: D changes from 0 to 1 when Z changes from 0 to 1
compliers <- sum(D_Z0 == 0 & D_Z1 == 1) / N
# Always-takers: D = 1 regardless of Z
always_takers <- sum(D_Z0 == 1 & D_Z1 == 1) / N
# Never-takers: D = 0 regardless of Z
never_takers <- sum(D_Z0 == 0 & D_Z1 == 0) / N
# Output the proportions
list(
compliers = compliers,
always_takers = always_takers,
never_takers = never_takers
)
## $compliers
## [1] 0.3252
##
## $always_takers
## [1] 0.0401
##
## $never_takers
## [1] 0.6347
# Parameters for the new case (alpha_Z = 10)
alpha_Z <- 10
alpha_0 <- -4
alpha_U <- 4
N <- 10000
# Simulate variables
Z <- rbinom(N, 1, 0.5) # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5) # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1) # Error term
# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)
# Calculate D when Z = 1 (with alpha_Z = 10)
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# Proportions
compliers_new <- sum(D_Z0 == 0 & D_Z1 == 1) / N
always_takers_new <- sum(D_Z0 == 1 & D_Z1 == 1) / N
never_takers_new <- sum(D_Z0 == 0 & D_Z1 == 0) / N
# Output the proportions
list(
compliers_new = compliers_new,
always_takers_new = always_takers_new,
never_takers_new = never_takers_new
)
## $compliers_new
## [1] 0.49
##
## $always_takers_new
## [1] 0.0351
##
## $never_takers_new
## [1] 0.4749
# Set the random seed to 2 for reproducibility
set.seed(2)
# (b) Number of observations
N <- 10000
# (c) Draw epsilon_1 and epsilon_2 from N(0, 1)
epsilon_D <- rnorm(N, mean = 0, sd = 1)
epsilon_Y <- rnorm(N, mean = 0, sd = 1)
# (d) Draw U from N(0, 0.5)
U <- rnorm(N, mean = 0, sd = 0.5)
# (e) Create Z = 1(z > 0.5), where z is randomly drawn from a uniform distribution
z <- runif(N, min = 0, max = 1)
Z <- as.numeric(z > 0.5)
# (f) Create D_i
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# (g) Create Y_i
beta_0 <- 3
beta_D <- 2
beta_Z <- 0
beta_U <- 6
Y <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
#Create a data frame for convenience
data <- data.frame(Y = Y, D = D, Z = Z, U = U)
# Run OLS regression
ols_model <- lm(Y ~ D, data = data)
summary(ols_model)
# export latex
xtable::xtable(ols_model)
# Run IV regression
iv_model <- ivreg(Y ~ D | Z, data = data)
summary(iv_model)
iv_summary <- summary(iv_model)$coefficients
# export latex
xtable::xtable(iv_summary)
# Run OLS regression of D on Z (with a constant)
ols_D_on_Z <- lm(D ~ Z, data = data)
# Predicted value of D (Dhat)
Dhat <- fitted(ols_D_on_Z)
# Residual of D (Dres)
Dres <- resid(ols_D_on_Z)
# Run OLS regression of Y on Dhat
ols_Y_on_Dhat <- lm(Y ~ Dhat, data = data)
summary(ols_Y_on_Dhat)
# Run OLS regression of Y on D and Dres
ols_Y_on_D_Dres <- lm(Y ~ D + Dres, data = data)
summary(ols_Y_on_D_Dres)
# export latex
xtable::xtable(ols_Y_on_Dhat)
# export latex
xtable::xtable(ols_Y_on_D_Dres)
# Change the DGP to beta_Z = 1
beta_Z <- 1
Y_new <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# Re-run IV regression with Z as the IV for D
iv_model_new <- ivreg(Y_new ~ D | Z)
summary(iv_model_new)
iv_reg1<- summary(iv_model_new)$coefficients
# Change the DGP to beta_Z = -1
beta_Z <- -1
Y_new_neg <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# Re-run IV regression with Z as the IV for D
iv_model_neg <- ivreg(Y_new_neg ~ D | Z)
iv_reg2<- summary(iv_model_neg)$coefficients
# export latex
xtable::xtable(iv_reg1)
# export latex
xtable::xtable(iv_reg2)
# Baseline case (alpha_Z = 5 and beta_Z = 1)
alpha_Z <- 5
beta_Z <- 1
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_baseline <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y
# IV regression for baseline case
iv_model_baseline <- ivreg(Y_baseline ~ D | Z)
summary(iv_model_baseline)
iv_baseline <- summary(iv_model_baseline)$coefficients
# New case (alpha_Z = 10 and beta_Z = 1)
alpha_Z <- 10
D_new <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_new <- beta_0 + beta_D * D_new + beta_Z * Z + beta_U * U + epsilon_Y
# IV regression for new case
iv_model_new <- ivreg(Y_new ~ D_new | Z)
summary(iv_model_new)
iv_new <- summary(iv_model_new)$coefficients
# export latex
xtable::xtable(iv_baseline)
# export latex
xtable::xtable(iv_new)
# Parameters
N <- 10000
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
# Simulate variables
Z <- rbinom(N, 1, 0.5) # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5) # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1) # Error term
# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)
# Calculate D when Z = 1
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# Compliers: D changes from 0 to 1 when Z changes from 0 to 1
compliers <- sum(D_Z0 == 0 & D_Z1 == 1) / N
# Always-takers: D = 1 regardless of Z
always_takers <- sum(D_Z0 == 1 & D_Z1 == 1) / N
# Never-takers: D = 0 regardless of Z
never_takers <- sum(D_Z0 == 0 & D_Z1 == 0) / N
# Output the proportions
list(
compliers = compliers,
always_takers = always_takers,
never_takers = never_takers
)
# Parameters for the new case (alpha_Z = 10)
alpha_Z <- 10
alpha_0 <- -4
alpha_U <- 4
N <- 10000
# Simulate variables
Z <- rbinom(N, 1, 0.5) # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5) # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1) # Error term
# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)
# Calculate D when Z = 1 (with alpha_Z = 10)
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
# Proportions
compliers_new <- sum(D_Z0 == 0 & D_Z1 == 1) / N
always_takers_new <- sum(D_Z0 == 1 & D_Z1 == 1) / N
never_takers_new <- sum(D_Z0 == 0 & D_Z1 == 0) / N
# Output the proportions
list(
compliers_new = compliers_new,
always_takers_new = always_takers_new,
never_takers_new = never_takers_new
)