#PROBLEM 2-1 Omitted Variable Bias #install libraries

library(dplyr)

## 
## 次のパッケージを付け加えます: 'dplyr'

## 以下のオブジェクトは 'package:stats' からマスクされています:
## 
##     filter, lag

## 以下のオブジェクトは 'package:base' からマスクされています:
## 
##     intersect, setdiff, setequal, union

library(modelsummary)
library(estimatr)
library(AER) # for CASchools data set

## 要求されたパッケージ car をロード中です

## 要求されたパッケージ carData をロード中です

## 
## 次のパッケージを付け加えます: 'car'

## 以下のオブジェクトは 'package:dplyr' からマスクされています:
## 
##     recode

## 要求されたパッケージ lmtest をロード中です

## 要求されたパッケージ zoo をロード中です

## 
## 次のパッケージを付け加えます: 'zoo'

## 以下のオブジェクトは 'package:base' からマスクされています:
## 
##     as.Date, as.Date.numeric

## 要求されたパッケージ sandwich をロード中です

## 要求されたパッケージ survival をロード中です

#data import

data("CASchools")

create the variable

CASchools <- CASchools %>% 
  mutate(STR= students/teachers) %>% 
  mutate(score = (read + math)/2)

formula for the omitted variable regression

ovreg <- english ~ STR

regression model using the formula from ‘ovreg’

model_ovreg <- lm_robust(ovreg, data = CASchools)

tabulate with function ‘modelsummary’

modelsummary(model_ovreg, stars = TRUE)

	(1)
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001
(Intercept)	-19.854*
	(8.698)
STR	1.814***
	(0.448)
Num.Obs.	420
R2	0.035
R2 Adj.	0.033
AIC	3623.0
BIC	3635.1
RMSE	17.94

long regression:

long <- score ~ STR + english

formula for the omitted variable regression from (a)

ovreg <- english ~ STR

fill in the below to complete the function

ovb2 <- function(data, treatment, omitted_variable) {
  
# regression model using the formula from 'long'
model_long <- lm_robust(long, data = CASchools)
  

# regression model using the formula from 'ovreg' from (a)

model_ovreg <- lm_robust(ovreg, data = CASchools)


# extract the coefficients

lambda <- coef[["coefficients"]][omitted_variable]
pi <- coef[["coefficients"]][omitted_variable]

# compute the omitted variable bias
bias <- lambda * pi

paste("The magnitude of the bias is", bias)
}

#Problem 2-2 #formulas for model

model_list <- list()

reg1 <- score ~ STR
reg2 <- score ~ STR + english
reg3 <- score ~ STR + english + lunch 
reg4 <- score ~ STR + english + calworks
reg5 <- score ~ STR + english + lunch + calworks

model1 <- lm_robust(reg1, data = CASchools)
model2 <- lm_robust(reg2, data = CASchools)
model3 <- lm_robust(reg3, data = CASchools)
model4 <- lm_robust(reg4, data = CASchools)
model5 <- lm_robust(reg5, data = CASchools)

model_list[["model1"]] <- model1
model_list[["model2"]] <- model2
model_list[["model3"]] <- model3
model_list[["model4"]] <- model4
model_list[["model5"]] <- model5

modelsummary(model_list,  stars=TRUE)

	model1	model2	model3	model4	model5
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001
(Intercept)	698.933***	686.032***	700.150***	697.999***	700.392***
	(10.400)	(8.754)	(5.591)	(6.946)	(5.559)
STR	-2.280***	-1.101*	-0.998***	-1.308***	-1.014***
	(0.521)	(0.434)	(0.271)	(0.340)	(0.270)
english		-0.650***	-0.122***	-0.488***	-0.130***
		(0.031)	(0.033)	(0.030)	(0.036)
lunch			-0.547***		-0.529***
			(0.024)		(0.038)
calworks				-0.790***	-0.048
				(0.069)	(0.060)
Num.Obs.	420	420	420	420	420
R2	0.051	0.426	0.775	0.629	0.775
R2 Adj.	0.049	0.424	0.773	0.626	0.773
AIC	3650.5	3441.1	3051.0	3260.7	3052.4
BIC	3662.6	3457.3	3071.2	3280.9	3076.6
RMSE	18.54	14.41	9.04	11.60	9.03

Percent on public income assistance(calworks) would be unnecessary. This is becasse the coefficient of STR variable increases once calworks added in the model4.

Homework2

Asahi

2025-05-23