Purpose

  • The purpose of this script is to show the steps and decisions taken to complete PS1 for ECON 8100.
library(dplyr)
library(tidyverse)
library(data.table)
library(ggplot2)
library(kableExtra)
library(xtable)
library(AER)

Coding

Part 1.1

# (a) Set random seed for reproducibility
set.seed(1)

# (b) Number of observations
N <- 10000

# (c) Draw epsilon_1 and epsilon_2 from N(0, 1)
epsilon_D <- rnorm(N, mean = 0, sd = 1)
epsilon_Y <- rnorm(N, mean = 0, sd = 1)

# (d) Draw U from N(0, 0.5)
U <- rnorm(N, mean = 0, sd = 0.5)

# (e) Create Z = 1(z > 0.5), where z is randomly drawn from a uniform distribution
z <- runif(N, min = 0, max = 1)
Z <- as.numeric(z > 0.5)

# (f) Create D_i
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# (g) Create Y_i
beta_0 <- 3
beta_D <- 2
beta_Z <- 0
beta_U <- 6
Y <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y


#Create a data frame for convenience
data <- data.frame(Y = Y, D = D, Z = Z, U = U)

Part 1.2

  • OLS regress of Y on D with a constant
# Run OLS regression
ols_model <- lm(Y ~ D, data = data)
summary(ols_model)
## 
## Call:
## lm(formula = Y ~ D, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2473  -2.0084  -0.1544   1.9659   9.2807 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.02336    0.03642   55.56   <2e-16 ***
## D            4.72039    0.06021   78.40   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.9 on 9998 degrees of freedom
## Multiple R-squared:  0.3807, Adjusted R-squared:  0.3807 
## F-statistic:  6146 on 1 and 9998 DF,  p-value: < 2.2e-16

Part 1.3

  • IV regression of Y on D with Z as the IV for D
# Run IV regression
iv_model <- ivreg(Y ~ D | Z, data = data)
summary(iv_model)
## 
## Call:
## ivreg(formula = Y ~ D | Z, data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -11.14794  -2.10898   0.01567   2.11343  10.84223 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.92404    0.04738   61.71   <2e-16 ***
## D            2.25817    0.09716   23.24   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.133 on 9998 degrees of freedom
## Multiple R-Squared: 0.2771,  Adjusted R-squared: 0.2771 
## Wald test: 540.1 on 1 and 9998 DF,  p-value: < 2.2e-16

Part 1.4

  1. Regress Y on Dhat with a constant
# Run OLS regression of D on Z (with a constant)
ols_D_on_Z <- lm(D ~ Z, data = data)

# Predicted value of D (Dhat)
Dhat <- fitted(ols_D_on_Z)

# Residual of D (Dres)
Dres <- resid(ols_D_on_Z)

# Run OLS regression of Y on Dhat
ols_Y_on_Dhat <- lm(Y ~ Dhat, data = data)
summary(ols_Y_on_Dhat)
## 
## Call:
## lm(formula = Y ~ Dhat, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.614  -2.516   0.199   2.388  12.973 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.92404    0.05463   53.52   <2e-16 ***
## Dhat         2.25817    0.11203   20.16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.612 on 9998 degrees of freedom
## Multiple R-squared:  0.03905,    Adjusted R-squared:  0.03896 
## F-statistic: 406.3 on 1 and 9998 DF,  p-value: < 2.2e-16
  1. Regress Y on D and Dres with a constant
# Run OLS regression of Y on D and Dres
ols_Y_on_D_Dres <- lm(Y ~ D + Dres, data = data)
summary(ols_Y_on_D_Dres)
## 
## Call:
## lm(formula = Y ~ D + Dres, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.9638  -1.7894   0.0297   1.8385   9.4420 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.92404    0.04077   71.72   <2e-16 ***
## D            2.25817    0.08360   27.01   <2e-16 ***
## Dres         4.46237    0.11255   39.65   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.696 on 9997 degrees of freedom
## Multiple R-squared:  0.4649, Adjusted R-squared:  0.4648 
## F-statistic:  4342 on 2 and 9997 DF,  p-value: < 2.2e-16

Part 1.5

  • Change the DGP to beta_Z = 1
# Change the DGP to beta_Z = 1
beta_Z <- 1
Y_new <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# Re-run IV regression with Z as the IV for D
iv_model_new <- ivreg(Y_new ~ D | Z)
summary(iv_model_new)
## 
## Call:
## ivreg(formula = Y_new ~ D | Z)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -11.08396  -1.92037  -0.00676   1.95614  10.35569 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.86006    0.04371   65.44   <2e-16 ***
## D            3.80869    0.08963   42.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.89 on 9998 degrees of freedom
## Multiple R-Squared: 0.4263,  Adjusted R-squared: 0.4262 
## Wald test:  1806 on 1 and 9998 DF,  p-value: < 2.2e-16
iv_reg1<- summary(iv_model_new)$coefficients
  • Change the DGP to beta_Z = -1
# Change the DGP to beta_Z = -1
beta_Z <- -1
Y_new_neg <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# Re-run IV regression with Z as the IV for D
iv_model_neg <- ivreg(Y_new_neg ~ D | Z)
summary(iv_model_neg)
## 
## Call:
## ivreg(formula = Y_new_neg ~ D | Z)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.2119  -2.3955   0.1247   2.2961  12.2946 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.98802    0.05216  57.287  < 2e-16 ***
## D            0.70764    0.10696   6.616 3.88e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.449 on 9998 degrees of freedom
## Multiple R-Squared: 0.09203, Adjusted R-squared: 0.09194 
## Wald test: 43.77 on 1 and 9998 DF,  p-value: 3.881e-11
iv_reg2<- summary(iv_model_neg)$coefficients

Part 1.6

  • Baseline case (alpha_Z = 5)
# Baseline case (alpha_Z = 5 and beta_Z = 1)
alpha_Z <- 5
beta_Z <- 1
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_baseline <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# IV regression for baseline case
iv_model_baseline <- ivreg(Y_baseline ~ D | Z)
summary(iv_model_baseline)
## 
## Call:
## ivreg(formula = Y_baseline ~ D | Z)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -11.08396  -1.92037  -0.00676   1.95614  10.35569 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.86006    0.04371   65.44   <2e-16 ***
## D            3.80869    0.08963   42.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.89 on 9998 degrees of freedom
## Multiple R-Squared: 0.4263,  Adjusted R-squared: 0.4262 
## Wald test:  1806 on 1 and 9998 DF,  p-value: < 2.2e-16
iv_baseline <- summary(iv_model_baseline)$coefficients
  • New case (alpha_Z = 10)
# New case (alpha_Z = 10 and beta_Z = 1)
alpha_Z <- 10
D_new <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_new <- beta_0 + beta_D * D_new + beta_Z * Z + beta_U * U + epsilon_Y

# IV regression for new case
iv_model_new <- ivreg(Y_new ~ D_new | Z)
summary(iv_model_new)
## 
## Call:
## ivreg(formula = Y_new ~ D_new | Z)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -11.10825  -2.14575   0.05882   2.16869  10.92009 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.88435    0.04642   62.13   <2e-16 ***
## D_new        3.22000    0.06556   49.12   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.134 on 9998 degrees of freedom
## Multiple R-Squared: 0.2509,  Adjusted R-squared: 0.2508 
## Wald test:  2412 on 1 and 9998 DF,  p-value: < 2.2e-16
iv_new <- summary(iv_model_new)$coefficients

Part 1.8

# Parameters
N <- 10000
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4

# Simulate variables
Z <- rbinom(N, 1, 0.5)  # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5)  # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1)  # Error term

# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)

# Calculate D when Z = 1
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# Compliers: D changes from 0 to 1 when Z changes from 0 to 1
compliers <- sum(D_Z0 == 0 & D_Z1 == 1) / N

# Always-takers: D = 1 regardless of Z
always_takers <- sum(D_Z0 == 1 & D_Z1 == 1) / N

# Never-takers: D = 0 regardless of Z
never_takers <- sum(D_Z0 == 0 & D_Z1 == 0) / N

# Output the proportions
list(
  compliers = compliers,
  always_takers = always_takers,
  never_takers = never_takers
)
## $compliers
## [1] 0.3172
## 
## $always_takers
## [1] 0.0369
## 
## $never_takers
## [1] 0.6459

Part 1.9

# Parameters for the new case (alpha_Z = 10)
alpha_Z <- 10
alpha_0 <- -4
alpha_U <- 4
N <- 10000

# Simulate variables
Z <- rbinom(N, 1, 0.5)  # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5)  # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1)  # Error term

# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)

# Calculate D when Z = 1 (with alpha_Z = 10)
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# Proportions
compliers_new <- sum(D_Z0 == 0 & D_Z1 == 1) / N
always_takers_new <- sum(D_Z0 == 1 & D_Z1 == 1) / N
never_takers_new <- sum(D_Z0 == 0 & D_Z1 == 0) / N

# Output the proportions
list(
  compliers_new = compliers_new,
  always_takers_new = always_takers_new,
  never_takers_new = never_takers_new
)
## $compliers_new
## [1] 0.4801
## 
## $always_takers_new
## [1] 0.0379
## 
## $never_takers_new
## [1] 0.482
# Set the random seed to 2 for reproducibility
set.seed(2)


# (b) Number of observations
N <- 10000

# (c) Draw epsilon_1 and epsilon_2 from N(0, 1)
epsilon_D <- rnorm(N, mean = 0, sd = 1)
epsilon_Y <- rnorm(N, mean = 0, sd = 1)

# (d) Draw U from N(0, 0.5)
U <- rnorm(N, mean = 0, sd = 0.5)

# (e) Create Z = 1(z > 0.5), where z is randomly drawn from a uniform distribution
z <- runif(N, min = 0, max = 1)
Z <- as.numeric(z > 0.5)

# (f) Create D_i
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# (g) Create Y_i
beta_0 <- 3
beta_D <- 2
beta_Z <- 0
beta_U <- 6
Y <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y


#Create a data frame for convenience
data <- data.frame(Y = Y, D = D, Z = Z, U = U)

# Run OLS regression
ols_model <- lm(Y ~ D, data = data)
summary(ols_model)

# export latex 
xtable::xtable(ols_model)

# Run IV regression
iv_model <- ivreg(Y ~ D | Z, data = data)
summary(iv_model)

iv_summary <- summary(iv_model)$coefficients
# export latex
xtable::xtable(iv_summary)


# Run OLS regression of D on Z (with a constant)
ols_D_on_Z <- lm(D ~ Z, data = data)

# Predicted value of D (Dhat)
Dhat <- fitted(ols_D_on_Z)

# Residual of D (Dres)
Dres <- resid(ols_D_on_Z)

# Run OLS regression of Y on Dhat
ols_Y_on_Dhat <- lm(Y ~ Dhat, data = data)
summary(ols_Y_on_Dhat)


# Run OLS regression of Y on D and Dres
ols_Y_on_D_Dres <- lm(Y ~ D + Dres, data = data)
summary(ols_Y_on_D_Dres)

# export latex
xtable::xtable(ols_Y_on_Dhat)
# export latex
xtable::xtable(ols_Y_on_D_Dres)


# Change the DGP to beta_Z = 1
beta_Z <- 1
Y_new <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# Re-run IV regression with Z as the IV for D
iv_model_new <- ivreg(Y_new ~ D | Z)
summary(iv_model_new)
iv_reg1<- summary(iv_model_new)$coefficients

# Change the DGP to beta_Z = -1
beta_Z <- -1
Y_new_neg <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# Re-run IV regression with Z as the IV for D
iv_model_neg <- ivreg(Y_new_neg ~ D | Z)
iv_reg2<- summary(iv_model_neg)$coefficients

# export latex
xtable::xtable(iv_reg1)

# export latex
xtable::xtable(iv_reg2)

# Baseline case (alpha_Z = 5 and beta_Z = 1)
alpha_Z <- 5
beta_Z <- 1
D <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_baseline <- beta_0 + beta_D * D + beta_Z * Z + beta_U * U + epsilon_Y

# IV regression for baseline case
iv_model_baseline <- ivreg(Y_baseline ~ D | Z)
summary(iv_model_baseline)
iv_baseline <- summary(iv_model_baseline)$coefficients

# New case (alpha_Z = 10 and beta_Z = 1)
alpha_Z <- 10
D_new <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)
Y_new <- beta_0 + beta_D * D_new + beta_Z * Z + beta_U * U + epsilon_Y

# IV regression for new case
iv_model_new <- ivreg(Y_new ~ D_new | Z)
summary(iv_model_new)
iv_new <- summary(iv_model_new)$coefficients

# export latex
xtable::xtable(iv_baseline)

# export latex
xtable::xtable(iv_new)

# Parameters
N <- 10000
alpha_0 <- -4
alpha_Z <- 5
alpha_U <- 4

# Simulate variables
Z <- rbinom(N, 1, 0.5)  # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5)  # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1)  # Error term

# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)

# Calculate D when Z = 1
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# Compliers: D changes from 0 to 1 when Z changes from 0 to 1
compliers <- sum(D_Z0 == 0 & D_Z1 == 1) / N

# Always-takers: D = 1 regardless of Z
always_takers <- sum(D_Z0 == 1 & D_Z1 == 1) / N

# Never-takers: D = 0 regardless of Z
never_takers <- sum(D_Z0 == 0 & D_Z1 == 0) / N

# Output the proportions
list(
  compliers = compliers,
  always_takers = always_takers,
  never_takers = never_takers
)

# Parameters for the new case (alpha_Z = 10)
alpha_Z <- 10
alpha_0 <- -4
alpha_U <- 4
N <- 10000

# Simulate variables
Z <- rbinom(N, 1, 0.5)  # Instrument Z (random 0 or 1)
U <- rnorm(N, mean = 0, sd = 0.5)  # Unobserved variable U
epsilon_D <- rnorm(N, mean = 0, sd = 1)  # Error term

# Calculate D when Z = 0
D_Z0 <- as.numeric(alpha_0 + alpha_U * U + epsilon_D > 0)

# Calculate D when Z = 1 (with alpha_Z = 10)
D_Z1 <- as.numeric(alpha_0 + alpha_Z * Z + alpha_U * U + epsilon_D > 0)

# Proportions
compliers_new <- sum(D_Z0 == 0 & D_Z1 == 1) / N
always_takers_new <- sum(D_Z0 == 1 & D_Z1 == 1) / N
never_takers_new <- sum(D_Z0 == 0 & D_Z1 == 0) / N

# Output the proportions
list(
  compliers_new = compliers_new,
  always_takers_new = always_takers_new,
  never_takers_new = never_takers_new
)