Purpose

  • The purpose of this script is to show the steps and decisions taken to complete PS1 for ECON 8100.
library(dplyr)
library(tidyverse)
library(data.table)
library(ggplot2)
library(kableExtra)
library(xtable)
library(AER)

Coding

Part 1.1

# (a) Set random seed for reproducibility
set.seed(2)

# (b) Number of observations
N <- 10000

# (c) Draw epsilon_1 and epsilon_2 from N(0, 1)
epsilon_D <- rnorm(N, mean = 0, sd = 1)
epsilon_Y <- rnorm(N, mean = 0, sd = 1)

# (d) Draw U from N(0, 0.5)
U <- rnorm(N, mean = 0, sd = 0.5)

# (e) Create Z = 1(z > 0.5), where z is randomly drawn from a uniform distribution
z <- runif(N, min = 0, max = 1)
Z <- as.numeric(z > 0.5)

# (f) Create D_i
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# (g) Create Y_i
beta_0 <- 3
beta_D <- 2
beta_Z <- 0
beta_U <- 6
Y <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y


#Create a data frame for convenience
data <- data.frame(Y = Y, D = D, Z = Z, U = U)

Part 1.2

  • OLS regress of Y on D with a constant
# Run OLS regression
ols_model <- lm(Y ~ D, data = data)
summary(ols_model)
## 
## Call:
## lm(formula = Y ~ D, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.1670  -2.0674  -0.1108   1.9913  10.4347 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.05792    0.03628   56.72   <2e-16 ***
## D            4.64392    0.06083   76.34   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.912 on 9998 degrees of freedom
## Multiple R-squared:  0.3683, Adjusted R-squared:  0.3682 
## F-statistic:  5828 on 1 and 9998 DF,  p-value: < 2.2e-16

Part 1.3

  • IV regression of Y on D with Z as the IV for D
# Run IV regression
iv_model <- ivreg(Y ~ D | Z, data = data)
summary(iv_model)
## 
## Call:
## ivreg(formula = Y ~ D | Z, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -11.07161  -2.13429  -0.02852   2.17754  12.07252 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.96252    0.04739   62.52   <2e-16 ***
## D            2.10146    0.09933   21.16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.156 on 9998 degrees of freedom
## Multiple R-Squared: 0.2579,  Adjusted R-squared: 0.2578 
## Wald test: 447.6 on 1 and 9998 DF,  p-value: < 2.2e-16

Part 1.4

  1. Regress Y on Dhat with a constant
# Run OLS regression of D on Z (with a constant)
ols_D_on_Z <- lm(D ~ Z, data = data)

# Predicted value of D (Dhat)
Dhat <- fitted(ols_D_on_Z)

# Residual of D (Dres)
Dres <- resid(ols_D_on_Z)

# Run OLS regression of Y on Dhat
ols_Y_on_Dhat <- lm(Y ~ Dhat, data = data)
summary(ols_Y_on_Dhat)
## 
## Call:
## lm(formula = Y ~ Dhat, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.4818  -2.4946   0.1184   2.4921  13.0604 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.96252    0.05408   54.77   <2e-16 ***
## Dhat         2.10146    0.11338   18.54   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.603 on 9998 degrees of freedom
## Multiple R-squared:  0.03322,    Adjusted R-squared:  0.03312 
## F-statistic: 343.6 on 1 and 9998 DF,  p-value: < 2.2e-16
  1. Regress Y on D and Dres with a constant
# Run OLS regression of Y on D and Dres
ols_Y_on_D_Dres <- lm(Y ~ D + Dres, data = data)
summary(ols_Y_on_D_Dres)
## 
## Call:
## lm(formula = Y ~ D + Dres, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2498  -1.8127   0.0082   1.8597  10.5775 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.96252    0.04060   72.96   <2e-16 ***
## D            2.10146    0.08511   24.69   <2e-16 ***
## Dres         4.54446    0.11379   39.94   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.705 on 9997 degrees of freedom
## Multiple R-squared:  0.4552, Adjusted R-squared:  0.4551 
## F-statistic:  4176 on 2 and 9997 DF,  p-value: < 2.2e-16

Part 1.5

  • Change the DGP to beta_Z = 1
# Change the DGP to beta_Z = 1
beta_Z <- 1
Y_new <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# Re-run IV regression with Z as the IV for D
iv_model_new <- ivreg(Y_new ~ D | Z)
summary(iv_model_new)
## 
## Call:
## ivreg(formula = Y_new ~ D | Z)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -10.35525  -1.95761  -0.02863   1.97623  11.55490 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.90669    0.04363   66.62   <2e-16 ***
## D            3.67491    0.09146   40.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.906 on 9998 degrees of freedom
## Multiple R-Squared: 0.4111,  Adjusted R-squared: 0.4111 
## Wald test:  1614 on 1 and 9998 DF,  p-value: < 2.2e-16
iv_reg1<- summary(iv_model_new)$coefficients
  • Change the DGP to beta_Z = -1
# Change the DGP to beta_Z = -1
beta_Z <- -1
Y_new_neg <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# Re-run IV regression with Z as the IV for D
iv_model_neg <- ivreg(Y_new_neg ~ D | Z)
summary(iv_model_neg)
## 
## Call:
## ivreg(formula = Y_new_neg ~ D | Z)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -12.12745  -2.41072   0.07622   2.40888  12.59013 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.01835    0.05225  57.764  < 2e-16 ***
## D            0.52802    0.10954   4.821 1.45e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.481 on 9998 degrees of freedom
## Multiple R-Squared: 0.06862, Adjusted R-squared: 0.06853 
## Wald test: 23.24 on 1 and 9998 DF,  p-value: 1.453e-06
iv_reg2<- summary(iv_model_neg)$coefficients

Part 1.6

  • Baseline case (alpha_Z = 5)
# Baseline case (alpha_Z = 5 and beta_Z = 1)
alpha_Z <- 5
beta_Z <- 1
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_baseline <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# IV regression for baseline case
iv_model_baseline <- ivreg(Y_baseline ~ D | Z)
summary(iv_model_baseline)
## 
## Call:
## ivreg(formula = Y_baseline ~ D | Z)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -10.35525  -1.95761  -0.02863   1.97623  11.55490 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.90669    0.04363   66.62   <2e-16 ***
## D            3.67491    0.09146   40.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.906 on 9998 degrees of freedom
## Multiple R-Squared: 0.4111,  Adjusted R-squared: 0.4111 
## Wald test:  1614 on 1 and 9998 DF,  p-value: < 2.2e-16
iv_baseline <- summary(iv_model_baseline)$coefficients
  • New case (alpha_Z = 10)
# New case (alpha_Z = 10 and beta_Z = 1)
alpha_Z <- 10
D_new <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_new <- beta_0 + beta_D * D_new + beta_Z * Z + beta_U * U + epsilon_Y

# IV regression for new case
iv_model_new <- ivreg(Y_new ~ D_new | Z)
summary(iv_model_new)
## 
## Call:
## ivreg(formula = Y_new ~ D_new | Z)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -11.142994  -2.137132  -0.006763   2.200596  12.102601 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.92684    0.04624   63.29   <2e-16 ***
## D_new        3.10706    0.06529   47.59   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.139 on 9998 degrees of freedom
## Multiple R-Squared: 0.233,   Adjusted R-squared: 0.2329 
## Wald test:  2265 on 1 and 9998 DF,  p-value: < 2.2e-16
iv_new <- summary(iv_model_new)$coefficients

Part 1.8

# Parameters
N <- 10000
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4

# Simulate variables
Z <- rbinom(N, 1, 0.5)  # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5)  # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1)  # Error term

# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)

# Calculate D when Z = 1
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# Compliers: D changes from 0 to 1 when Z changes from 0 to 1
compliers <- sum(D_Z0 == 0 & D_Z1 == 1) / N

# Always-takers: D = 1 regardless of Z
always_takers <- sum(D_Z0 == 1 & D_Z1 == 1) / N

# Never-takers: D = 0 regardless of Z
never_takers <- sum(D_Z0 == 0 & D_Z1 == 0) / N

# Output the proportions
list(
  compliers = compliers,
  always_takers = always_takers,
  never_takers = never_takers
)
## $compliers
## [1] 0.3252
## 
## $always_takers
## [1] 0.0401
## 
## $never_takers
## [1] 0.6347

Part 1.9

# Parameters for the new case (alpha_Z = 10)
alpha_Z <- 10
alpha_0 <- -4
alpha_U <- 4
N <- 10000

# Simulate variables
Z <- rbinom(N, 1, 0.5)  # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5)  # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1)  # Error term

# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)

# Calculate D when Z = 1 (with alpha_Z = 10)
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# Proportions
compliers_new <- sum(D_Z0 == 0 & D_Z1 == 1) / N
always_takers_new <- sum(D_Z0 == 1 & D_Z1 == 1) / N
never_takers_new <- sum(D_Z0 == 0 & D_Z1 == 0) / N

# Output the proportions
list(
  compliers_new = compliers_new,
  always_takers_new = always_takers_new,
  never_takers_new = never_takers_new
)
## $compliers_new
## [1] 0.49
## 
## $always_takers_new
## [1] 0.0351
## 
## $never_takers_new
## [1] 0.4749
# Set the random seed to 2 for reproducibility
set.seed(2)


# (b) Number of observations
N <- 10000

# (c) Draw epsilon_1 and epsilon_2 from N(0, 1)
epsilon_D <- rnorm(N, mean = 0, sd = 1)
epsilon_Y <- rnorm(N, mean = 0, sd = 1)

# (d) Draw U from N(0, 0.5)
U <- rnorm(N, mean = 0, sd = 0.5)

# (e) Create Z = 1(z > 0.5), where z is randomly drawn from a uniform distribution
z <- runif(N, min = 0, max = 1)
Z <- as.numeric(z > 0.5)

# (f) Create D_i
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# (g) Create Y_i
beta_0 <- 3
beta_D <- 2
beta_Z <- 0
beta_U <- 6
Y <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y


#Create a data frame for convenience
data <- data.frame(Y = Y, D = D, Z = Z, U = U)

# Run OLS regression
ols_model <- lm(Y ~ D, data = data)
summary(ols_model)

# export latex 
xtable::xtable(ols_model)

# Run IV regression
iv_model <- ivreg(Y ~ D | Z, data = data)
summary(iv_model)

iv_summary <- summary(iv_model)$coefficients
# export latex
xtable::xtable(iv_summary)


# Run OLS regression of D on Z (with a constant)
ols_D_on_Z <- lm(D ~ Z, data = data)

# Predicted value of D (Dhat)
Dhat <- fitted(ols_D_on_Z)

# Residual of D (Dres)
Dres <- resid(ols_D_on_Z)

# Run OLS regression of Y on Dhat
ols_Y_on_Dhat <- lm(Y ~ Dhat, data = data)
summary(ols_Y_on_Dhat)


# Run OLS regression of Y on D and Dres
ols_Y_on_D_Dres <- lm(Y ~ D + Dres, data = data)
summary(ols_Y_on_D_Dres)

# export latex
xtable::xtable(ols_Y_on_Dhat)
# export latex
xtable::xtable(ols_Y_on_D_Dres)


# Change the DGP to beta_Z = 1
beta_Z <- 1
Y_new <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# Re-run IV regression with Z as the IV for D
iv_model_new <- ivreg(Y_new ~ D | Z)
summary(iv_model_new)
iv_reg1<- summary(iv_model_new)$coefficients

# Change the DGP to beta_Z = -1
beta_Z <- -1
Y_new_neg <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# Re-run IV regression with Z as the IV for D
iv_model_neg <- ivreg(Y_new_neg ~ D | Z)
iv_reg2<- summary(iv_model_neg)$coefficients

# export latex
xtable::xtable(iv_reg1)

# export latex
xtable::xtable(iv_reg2)

# Baseline case (alpha_Z = 5 and beta_Z = 1)
alpha_Z <- 5
beta_Z <- 1
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_baseline <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# IV regression for baseline case
iv_model_baseline <- ivreg(Y_baseline ~ D | Z)
summary(iv_model_baseline)
iv_baseline <- summary(iv_model_baseline)$coefficients

# New case (alpha_Z = 10 and beta_Z = 1)
alpha_Z <- 10
D_new <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_new <- beta_0 + beta_D * D_new + beta_Z * Z + beta_U * U + epsilon_Y

# IV regression for new case
iv_model_new <- ivreg(Y_new ~ D_new | Z)
summary(iv_model_new)
iv_new <- summary(iv_model_new)$coefficients

# export latex
xtable::xtable(iv_baseline)

# export latex
xtable::xtable(iv_new)

# Parameters
N <- 10000
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4

# Simulate variables
Z <- rbinom(N, 1, 0.5)  # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5)  # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1)  # Error term

# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)

# Calculate D when Z = 1
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# Compliers: D changes from 0 to 1 when Z changes from 0 to 1
compliers <- sum(D_Z0 == 0 & D_Z1 == 1) / N

# Always-takers: D = 1 regardless of Z
always_takers <- sum(D_Z0 == 1 & D_Z1 == 1) / N

# Never-takers: D = 0 regardless of Z
never_takers <- sum(D_Z0 == 0 & D_Z1 == 0) / N

# Output the proportions
list(
  compliers = compliers,
  always_takers = always_takers,
  never_takers = never_takers
)

# Parameters for the new case (alpha_Z = 10)
alpha_Z <- 10
alpha_0 <- -4
alpha_U <- 4
N <- 10000

# Simulate variables
Z <- rbinom(N, 1, 0.5)  # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5)  # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1)  # Error term

# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)

# Calculate D when Z = 1 (with alpha_Z = 10)
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# Proportions
compliers_new <- sum(D_Z0 == 0 & D_Z1 == 1) / N
always_takers_new <- sum(D_Z0 == 1 & D_Z1 == 1) / N
never_takers_new <- sum(D_Z0 == 0 & D_Z1 == 0) / N

# Output the proportions
list(
  compliers_new = compliers_new,
  always_takers_new = always_takers_new,
  never_takers_new = never_takers_new
)