#PROBLEM 2-1 Omitted Variable Bias #install libraries
library(dplyr)
##
## 次のパッケージを付け加えます: 'dplyr'
## 以下のオブジェクトは 'package:stats' からマスクされています:
##
## filter, lag
## 以下のオブジェクトは 'package:base' からマスクされています:
##
## intersect, setdiff, setequal, union
library(modelsummary)
library(estimatr)
library(AER) # for CASchools data set
## 要求されたパッケージ car をロード中です
## 要求されたパッケージ carData をロード中です
##
## 次のパッケージを付け加えます: 'car'
## 以下のオブジェクトは 'package:dplyr' からマスクされています:
##
## recode
## 要求されたパッケージ lmtest をロード中です
## 要求されたパッケージ zoo をロード中です
##
## 次のパッケージを付け加えます: 'zoo'
## 以下のオブジェクトは 'package:base' からマスクされています:
##
## as.Date, as.Date.numeric
## 要求されたパッケージ sandwich をロード中です
## 要求されたパッケージ survival をロード中です
#data import
data("CASchools")
CASchools <- CASchools %>%
mutate(STR= students/teachers) %>%
mutate(score = (read + math)/2)
ovreg <- english ~ STR
model_ovreg <- lm_robust(ovreg, data = CASchools)
modelsummary(model_ovreg, stars = TRUE)
| (1) | |
|---|---|
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | |
| (Intercept) | -19.854* |
| (8.698) | |
| STR | 1.814*** |
| (0.448) | |
| Num.Obs. | 420 |
| R2 | 0.035 |
| R2 Adj. | 0.033 |
| AIC | 3623.0 |
| BIC | 3635.1 |
| RMSE | 17.94 |
long <- score ~ STR + english
ovreg <- english ~ STR
ovb2 <- function(data, treatment, omitted_variable) {
# regression model using the formula from 'long'
model_long <- lm_robust(long, data = CASchools)
# regression model using the formula from 'ovreg' from (a)
model_ovreg <- lm_robust(ovreg, data = CASchools)
# extract the coefficients
lambda <- coef[["coefficients"]][omitted_variable]
pi <- coef[["coefficients"]][omitted_variable]
# compute the omitted variable bias
bias <- lambda * pi
paste("The magnitude of the bias is", bias)
}
#Problem 2-2 #formulas for model
model_list <- list()
reg1 <- score ~ STR
reg2 <- score ~ STR + english
reg3 <- score ~ STR + english + lunch
reg4 <- score ~ STR + english + calworks
reg5 <- score ~ STR + english + lunch + calworks
model1 <- lm_robust(reg1, data = CASchools)
model2 <- lm_robust(reg2, data = CASchools)
model3 <- lm_robust(reg3, data = CASchools)
model4 <- lm_robust(reg4, data = CASchools)
model5 <- lm_robust(reg5, data = CASchools)
model_list[["model1"]] <- model1
model_list[["model2"]] <- model2
model_list[["model3"]] <- model3
model_list[["model4"]] <- model4
model_list[["model5"]] <- model5
modelsummary(model_list, stars=TRUE)
| model1 | model2 | model3 | model4 | model5 | |
|---|---|---|---|---|---|
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | |||||
| (Intercept) | 698.933*** | 686.032*** | 700.150*** | 697.999*** | 700.392*** |
| (10.400) | (8.754) | (5.591) | (6.946) | (5.559) | |
| STR | -2.280*** | -1.101* | -0.998*** | -1.308*** | -1.014*** |
| (0.521) | (0.434) | (0.271) | (0.340) | (0.270) | |
| english | -0.650*** | -0.122*** | -0.488*** | -0.130*** | |
| (0.031) | (0.033) | (0.030) | (0.036) | ||
| lunch | -0.547*** | -0.529*** | |||
| (0.024) | (0.038) | ||||
| calworks | -0.790*** | -0.048 | |||
| (0.069) | (0.060) | ||||
| Num.Obs. | 420 | 420 | 420 | 420 | 420 |
| R2 | 0.051 | 0.426 | 0.775 | 0.629 | 0.775 |
| R2 Adj. | 0.049 | 0.424 | 0.773 | 0.626 | 0.773 |
| AIC | 3650.5 | 3441.1 | 3051.0 | 3260.7 | 3052.4 |
| BIC | 3662.6 | 3457.3 | 3071.2 | 3280.9 | 3076.6 |
| RMSE | 18.54 | 14.41 | 9.04 | 11.60 | 9.03 |
print(modelsummary)
## function (models, output = getOption("modelsummary_output", default = "default"),
## fmt = getOption("modelsummary_fmt", default = 3), estimate = getOption("modelsummary_estimate",
## default = "estimate"), statistic = getOption("modelsummary_statistic",
## default = "std.error"), vcov = getOption("modelsummary_vcov",
## default = NULL), conf_level = getOption("modelsummary_conf_level",
## default = 0.95), exponentiate = getOption("modelsummary_exponentiate",
## default = FALSE), stars = getOption("modelsummary_stars",
## default = FALSE), shape = getOption("modelsummary_shape",
## default = term + statistic ~ model), coef_map = getOption("modelsummary_coef_map",
## default = NULL), coef_omit = getOption("modelsummary_coef_omit",
## default = NULL), coef_rename = getOption("modelsummary_coef_rename",
## default = FALSE), gof_map = getOption("modelsummary_gof_map",
## default = NULL), gof_omit = getOption("modelsummary_gof_omit",
## default = NULL), gof_function = getOption("modelsummary_gof_function",
## default = NULL), group_map = getOption("modelsummary_group_map",
## default = NULL), add_columns = getOption("modelsummary_add_columns",
## default = NULL), add_rows = getOption("modelsummary_add_rows",
## default = NULL), align = getOption("modelsummary_align",
## default = NULL), notes = getOption("modelsummary_notes",
## default = NULL), title = getOption("modelsummary_title",
## default = NULL), escape = getOption("modelsummary_escape",
## default = TRUE), ...)
## {
## checkmate::assert(checkmate::check_formula(shape), checkmate::check_choice(shape,
## c("cbind", "rbind", "rcollapse")), checkmate::check_null(shape))
## if (isTRUE(checkmate::check_choice(shape, c("rbind", "rcollapse")))) {
## out <- modelsummary_rbind(models, output = output, fmt = fmt,
## estimate = estimate, statistic = statistic, vcov = vcov,
## conf_level = conf_level, exponentiate = exponentiate,
## stars = stars, coef_map = coef_map, coef_omit = coef_omit,
## coef_rename = coef_rename, gof_map = gof_map, gof_omit = gof_omit,
## gof_function = gof_function, add_columns = add_columns,
## add_rows = add_rows, align = align, shape = shape,
## group_map = NULL, notes = notes, title = title, escape = escape,
## ...)
## return(out)
## }
## dots <- list(...)
## if (!settings_equal("function_called", "modelsummary_rbind")) {
## settings_init(settings = list(function_called = "modelsummary"))
## }
## checkmate::assert_string(gof_omit, null.ok = TRUE)
## tmp <- sanitize_output(output)
## output_format <- tmp$output_format
## output_factory <- tmp$output_factory
## output_file <- tmp$output_file
## tmp <- get_span_cbind(models, shape)
## shape <- tmp$shape
## models <- tmp$models
## span_cbind <- tmp$span_cbind
## sanitize_escape(escape)
## sanity_ellipsis(vcov, ...)
## models <- sanitize_models(models, ...)
## vcov <- sanitize_vcov(vcov, models, ...)
## number_of_models <- max(length(models), length(vcov))
## estimate <- sanitize_estimate(estimate, number_of_models)
## exponentiate <- sanitize_exponentiate(exponentiate, number_of_models)
## shape <- sanitize_shape(shape)
## statistic <- sanitize_statistic(statistic, shape, conf_level)
## gof_map <- sanitize_gof_map(gof_map)
## fmt <- sanitize_fmt(fmt, calling_function = "modelsummary")
## sanity_group_map(group_map)
## conf_level <- sanitize_conf_level(conf_level, estimate, statistic)
## sanity_coef(coef_map, coef_rename, coef_omit)
## sanity_stars(stars)
## sanity_align(align, estimate = estimate, statistic = statistic,
## stars = stars)
## checkmate::assert_function(gof_function, null.ok = TRUE)
## if (!any(grepl("conf", c(estimate, statistic)))) {
## conf_level <- NULL
## }
## modelsummary_model_labels <- getOption("modelsummary_model_labels",
## default = "(arabic)")
## if (is.null(names(models))) {
## checkmate::assert_choice(modelsummary_model_labels, choices = c("model",
## "arabic", "letters", "roman", "(arabic)", "(letters)",
## "(roman)"))
## if (modelsummary_model_labels == "model") {
## model_names <- paste("Model", 1:number_of_models)
## }
## else if (grepl("arabic", modelsummary_model_labels)) {
## model_names <- as.character(1:number_of_models)
## }
## else if (grepl("letters", modelsummary_model_labels)) {
## model_names <- LETTERS[1:number_of_models]
## }
## else if (grepl("roman", modelsummary_model_labels)) {
## model_names <- as.character(utils::as.roman(1:number_of_models))
## }
## if (grepl("\\(", modelsummary_model_labels)) {
## model_names <- sprintf("(%s)", model_names)
## }
## }
## else {
## model_names <- names(models)
## }
## model_names <- pad(model_names, output_format = output_format)
## if (!settings_equal("function_called", "modelsummary_rbind") &&
## all(grepl("^\\(\\d+\\)$", model_names)) && identical(output_format,
## "kableExtra")) {
## model_names <- paste0(" ", model_names)
## }
## msl <- get_list_of_modelsummary_lists(models = models, conf_level = conf_level,
## vcov = vcov, gof_map = gof_map, gof_function = gof_function,
## shape = shape, coef_rename = coef_rename, output_format = output_format,
## ...)
## names(msl) <- model_names
## if (identical(output_format, "modelsummary_list")) {
## if (length(msl) == 1) {
## return(msl[[1]])
## }
## else {
## return(msl)
## }
## }
## est <- list()
## for (i in seq_along(msl)) {
## tmp <- format_estimates(est = msl[[i]]$tidy, fmt = fmt,
## estimate = estimate[[i]], estimate_label = names(estimate)[1],
## statistic = statistic, vcov = vcov[[i]], conf_level = conf_level,
## stars = stars, shape = shape, group_name = shape$group_name,
## exponentiate = exponentiate[[i]], ...)
## tmp <- map_estimates(tmp, coef_rename = coef_rename,
## coef_map = coef_map, coef_omit = coef_omit, group_map = group_map)
## colnames(tmp)[match("modelsummary_value", colnames(tmp))] <- model_names[i]
## est[[model_names[i]]] <- tmp
## }
## term_order <- unique(unlist(lapply(est, function(x) x$term)))
## statistic_order <- unique(unlist(lapply(est, function(x) x$statistic)))
## bycols <- c(list(c(shape$group_name, "group", "term", "statistic")),
## lapply(est, colnames))
## bycols <- Reduce(intersect, bycols)
## f <- function(x, y) {
## merge(x, y, all = TRUE, sort = FALSE, by = bycols)
## }
## est <- Reduce(f, est)
## if (is.null(shape$group_name)) {
## idx <- paste(est$term, est$statistic)
## if (anyDuplicated(idx) > 0) {
## candidate_groups <- sapply(msl, function(x) colnames(x[["tidy"]]))
## candidate_groups <- unlist(candidate_groups)
## candidate_groups <- setdiff(candidate_groups, c("term",
## "type", "estimate", "std.error", "conf.level",
## "conf.low", "conf.high", "statistic", "df.error",
## "p.value"))
## msg <- c("There are duplicate term names in the table.",
## "The `shape` argument of the `modelsummary` function can be used to print related terms together. The `group_map` argument can be used to reorder, subset, and rename group identifiers. See `?modelsummary` for details.",
## "You can find the group identifier to use in the `shape` argument by calling `get_estimates()` on one of your models. Candidates include:",
## paste(candidate_groups, collapse = ", "))
## insight::format_warning(msg)
## }
## }
## est <- shape_estimates(est, shape, conf_level = conf_level,
## statistic = statistic, estimate = estimate)
## est$part <- "estimates"
## est <- est[, unique(c("part", names(est)))]
## est[is.na(est)] <- ""
## if ("term" %in% colnames(est)) {
## if (!is.null(coef_map)) {
## term_order <- coef_map
## }
## est$term <- factor(est$term, unique(term_order))
## if ("group" %in% colnames(est)) {
## if (!is.null(group_map)) {
## est$group <- factor(est$group, group_map)
## }
## else {
## est$group <- factor(est$group, unique(est$group))
## }
## }
## }
## else if ("model" %in% colnames(est)) {
## est$model <- factor(est$model, model_names)
## }
## if ("statistic" %in% colnames(est)) {
## est$statistic <- factor(est$statistic, statistic_order)
## }
## est <- est[do.call(order, as.list(est)), ]
## if (is.numeric(coef_omit)) {
## coef_omit <- unique(round(coef_omit))
## if (length(unique(sign(coef_omit))) != 1) {
## insight::format_error("All elements of `coef_omit` must have the same sign.")
## }
## if (!"term" %in% shape$lhs) {
## msg <- "`term` must be on the left-hand side of the `shape` formula when `coef_omit` is a numeric vector."
## insight::format_error(msg)
## }
## term_idx <- paste(est$group, est$term)
## if (max(abs(coef_omit)) > length(unique(term_idx))) {
## msg <- sprintf("There are %s unique terms, but `coef_omit` tried to omit more than that.",
## length(term_idx))
## insight::format_error(msg)
## }
## idx <- !term_idx %in% unique(term_idx)[abs(coef_omit)]
## if (any(coef_omit > 0)) {
## est <- est[idx, , drop = FALSE]
## }
## else {
## est <- est[!idx, , drop = FALSE]
## }
## }
## if (isTRUE(checkmate::check_character(coef_rename, names = "unnamed"))) {
## nterms <- length(unique(est$term))
## if (length(coef_rename) != nterms) {
## msg <- "`coef_rename` must be a named character vector or an unnamed vector of length %s"
## insight::format_error(sprintf(msg, nterms))
## }
## dict <- stats::setNames(coef_rename, as.character(unique(est$term)))
## tmp <- replace_dict(as.character(est$term), dict)
## est$term <- factor(tmp, unique(tmp))
## }
## if (is.null(shape$group_name)) {
## est[["group"]] <- NULL
## }
## cols <- intersect(colnames(est), c("term", shape$group_name,
## "model", "statistic"))
## for (col in cols) {
## est[[col]] <- as.character(est[[col]])
## }
## if (all(est[["term"]] == "cross")) {
## est[["term"]] <- NULL
## }
## gof <- list()
## for (i in seq_along(msl)) {
## if (is.data.frame(msl[[i]]$glance)) {
## gof[[i]] <- format_gof(msl[[i]]$glance, fmt = fmt,
## gof_map = gof_map, ...)
## colnames(gof[[i]])[2] <- model_names[i]
## }
## else {
## gof[[i]] <- NULL
## }
## }
## f <- function(x, y) merge(x, y, all = TRUE, sort = FALSE,
## by = "term")
## gof <- Reduce(f, gof)
## gof <- map_gof(gof, gof_omit, gof_map)
## tab <- bind_est_gof(est, gof)
## tab[is.na(tab)] <- ""
## if (is.null(coef_map) && isFALSE(coef_rename) && "term" %in%
## colnames(tab) && !identical(output_format, "rtf")) {
## idx <- tab$part != "gof"
## tab$term <- ifelse(idx, gsub("::", " = ", tab$term),
## tab$term)
## tab$term <- ifelse(idx, gsub(":", " × ", tab$term),
## tab$term)
## }
## hrule <- match("gof", tab$part)
## if (!is.na(hrule) && !is.null(add_rows) && !is.null(attr(add_rows,
## "position"))) {
## hrule <- hrule + sum(length(attr(add_rows, "position") <
## hrule)) - 1
## }
## if (is.na(hrule)) {
## hrule <- NULL
## }
## stars_note <- settings_get("stars_note")
## if (isTRUE(stars_note) && !isFALSE(stars) && !any(grepl("\\{stars\\}",
## c(estimate, statistic)))) {
## stars_note <- make_stars_note(stars, output_format = output_format,
## output_factory = output_factory)
## if (is.null(notes)) {
## notes <- stars_note
## }
## else {
## notes <- c(stars_note, notes)
## }
## }
## if (settings_equal("function_called", "modelsummary_rbind")) {
## tab <- redundant_labels(tab, "term")
## }
## if (!identical(output_format, "dataframe") && !settings_equal("function_called",
## "modelsummary_rbind")) {
## dups <- c("term", "model", shape$group_name)
## for (d in dups) {
## tab <- redundant_labels(tab, d)
## }
## tab$statistic <- tab$part <- NULL
## if ("term" %in% colnames(tab))
## colnames(tab)[colnames(tab) == "term"] <- " "
## if ("model" %in% colnames(tab))
## colnames(tab)[colnames(tab) == "model"] <- " "
## if ("group" %in% colnames(tab))
## colnames(tab)[colnames(tab) == "model"] <- " "
## }
## if (length(unique(tab$group)) == 1) {
## tab$group <- NULL
## }
## tmp <- setdiff(shape$lhs, c("model", "term"))
## if (length(tmp) == 0) {
## tab$group <- NULL
## }
## else if (!identical(output_format, "dataframe")) {
## colnames(tab)[colnames(tab) == "group"] <- " "
## }
## if (is.null(align)) {
## n_stub <- sum(grepl("^ *$", colnames(tab))) + sum(colnames(tab) %in%
## c(" ", shape$group_name))
## align <- paste0(strrep("l", n_stub), strrep("c", ncol(tab) -
## n_stub))
## if (isTRUE(checkmate::check_data_frame(add_columns))) {
## align <- paste0(align, strrep("c", ncol(add_columns)))
## }
## }
## flag <- any(sapply(models, inherits, c("marginaleffects",
## "comparisons", "marginalmeans")))
## if (isTRUE(flag)) {
## colnames(tab) <- gsub("^value$", " ", colnames(tab))
## colnames(tab) <- gsub("^contrast_", "", colnames(tab))
## colnames(tab) <- gsub("^contrast$", " ", colnames(tab))
## }
## for (i in seq_along(tab)) {
## tab[[i]] <- gsub("\\(\\s*\\)", "", tab[[i]])
## tab[[i]] <- gsub("\\(\\\\num\\{NA\\}\\)", "", tab[[i]])
## tab[[i]] <- gsub("\\[,\\s*\\]", "", tab[[i]])
## tab[[i]] <- gsub("\\[\\\\num\\{NA\\}, \\\\num\\{NA\\}\\]",
## "", tab[[i]])
## }
## idx <- apply(tab, 1, function(x) any(x != ""))
## tab <- tab[idx, ]
## out <- factory(tab, align = align, fmt = fmt, hrule = hrule,
## gof_idx = hrule, notes = notes, output = output, title = title,
## add_rows = add_rows, add_columns = add_columns, escape = escape,
## output_factory = output_factory, output_format = output_format,
## output_file = output_file, ...)
## out <- set_span_cbind(out, span_cbind)
## if (settings_equal("function_called", "modelsummary_rbind")) {
## return(out)
## }
## else if (!is.null(output_file) || isTRUE(output == "jupyter") ||
## (isTRUE(output == "default") && settings_equal("output_default",
## "jupyter"))) {
## settings_rm()
## return(invisible(out))
## }
## else {
## settings_rm()
## return(out)
## }
## }
## <bytecode: 0x0000020345b60468>
## <environment: namespace:modelsummary>
Percent on public income assistance(calworks) would be unnecessary. This is becasse the coefficient of STR variable increases once calworks added in the model4.