Este documento muestra cómo se reorganiza la base de datos
data_full en formato ancho para aplicar la metodología de
Diferencia en Diferencia (DiD).
library(tidyverse)
library(haven)
library(MatchIt)
library(modelr)
library(magrittr)
setwd("C:/Users/mosia/Desktop/Curso de especializacion/control de lectura en Rstudio")
data_full <- haven::read_dta("evaluation.dta")
data <- data_full %>%
tidyr::pivot_wider(
names_from = round,
values_from = c(
"health_expenditures", "poverty_index", "age_hh", "age_sp",
"educ_hh", "educ_sp", "female_hh", "indigenous", "hhsize",
"dirtfloor", "bathroom", "land", "hospital_distance"
)
) %>%
dplyr::filter(!base::is.na(health_expenditures_0))
# Verificar resultado
dim(data)
## [1] 9913 34
head(data, 10)
## # A tibble: 10 × 34
## locality_identifier household_identifier treatment_locality
## <dbl> <dbl> <dbl>
## 1 26 5 1
## 2 26 11 1
## 3 26 13 1
## 4 26 16 1
## 5 26 21 1
## 6 26 22 1
## 7 26 23 1
## 8 26 25 1
## 9 26 27 1
## 10 26 28 1
## # ℹ 31 more variables: promotion_locality <dbl>, eligible <dbl>,
## # enrolled <dbl>, enrolled_rp <dbl>, hospital <dbl>,
## # health_expenditures_0 <dbl>, health_expenditures_1 <dbl>,
## # poverty_index_0 <dbl>, poverty_index_1 <dbl>, age_hh_0 <dbl>,
## # age_hh_1 <dbl>, age_sp_0 <dbl>, age_sp_1 <dbl>, educ_hh_0 <dbl>,
## # educ_hh_1 <dbl>, educ_sp_0 <dbl>, educ_sp_1 <dbl>, female_hh_0 <dbl>,
## # female_hh_1 <dbl>, indigenous_0 <dbl>, indigenous_1 <dbl>, …
nrow(data)
## [1] 9913
model_lg2 <- MatchIt::matchit(
formula = enrolled ~ age_hh_0 + educ_hh_0 + age_sp_0 + educ_sp_0 +
female_hh_0 + indigenous_0 + hhsize_0 + dirtfloor_0 + bathroom_0 +
hospital_distance_0,
data = data,
distance = "glm", link = "logit",
method = "nearest", replace = FALSE, ratio = 1
)
model_lg2
## A `matchit` object
## - method: 1:1 nearest neighbor matching without replacement
## - distance: Propensity score
## - estimated with logistic regression
## - number of obs.: 9913 (original), 5928 (matched)
## - target estimand: ATT
## - covariates: age_hh_0, educ_hh_0, age_sp_0, educ_sp_0, female_hh_0, indigenous_0, hhsize_0, dirtfloor_0, bathroom_0, hospital_distance_0
data %>%
modelr::add_predictions(
model = model_lg2$model, var = "pred", type = "response"
) %>%
dplyr::mutate(
enrolled_lab = base::ifelse(enrolled == 1, "Sí", "No")
) %>%
ggplot2::ggplot(
mapping = ggplot2::aes(x = pred, colour = enrolled_lab, fill = enrolled_lab)
) +
ggplot2::geom_density(alpha = 1/3)
data_ps2 <- MatchIt::match.data(object = model_lg2)
data_ps2 %>%
dplyr::mutate(
enrolled_lab = base::ifelse(enrolled == 1, "Sí", "No")
) %>%
ggplot2::ggplot(
mapping = ggplot2::aes(x = distance, colour = enrolled_lab, fill = enrolled_lab)
) +
ggplot2::geom_density(alpha = 1/3)
model_ps2 <- stats::lm(
data = data_ps2,
formula = health_expenditures_1 ~ enrolled,
weights = data_ps2$weights
)
summary(model_ps2)
##
## Call:
## stats::lm(formula = health_expenditures_1 ~ enrolled, data = data_ps2,
## weights = data_ps2$weights)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.658 -5.545 -2.068 2.913 80.228
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.8652 0.1555 114.90 <2e-16 ***
## enrolled -10.0166 0.2223 -45.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.21 on 5456 degrees of freedom
## (470 observations deleted due to missingness)
## Multiple R-squared: 0.2712, Adjusted R-squared: 0.271
## F-statistic: 2030 on 1 and 5456 DF, p-value: < 2.2e-16
data_ps_dd2 <- dplyr::left_join(
x = data_ps2 %>% dplyr::select(household_identifier, weights),
y = data_full
)
## Joining with `by = join_by(household_identifier)`
model_ps_dd2 <- stats::lm(
data = data_ps_dd2,
formula = health_expenditures ~ enrolled * round
)
summary(model_ps_dd2)
##
## Call:
## stats::lm(formula = health_expenditures ~ enrolled * round, data = data_ps_dd2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.581 -4.461 -1.049 2.946 80.306
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.0616 0.1217 123.800 < 2e-16 ***
## enrolled -0.5719 0.1721 -3.324 0.00089 ***
## round 2.7264 0.1721 15.846 < 2e-16 ***
## enrolled:round -9.3763 0.2433 -38.535 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.624 on 11852 degrees of freedom
## Multiple R-squared: 0.2337, Adjusted R-squared: 0.2335
## F-statistic: 1205 on 3 and 11852 DF, p-value: < 2.2e-16