# Paquetes
req <- c("tidyverse", "readxl", "janitor", "rdrobust", "lfe", "gtsummary", "stargazer")
new <- req[!(req %in% installed.packages()[, "Package"])]
if (length(new)) install.packages(new, dependencies = TRUE)
invisible(lapply(req, library, character.only = TRUE))
# Cargar datos desde ruta absoluta
dat <- read_excel("C:/Users/vacak/OneDrive/Documentos/edi_final.xlsx") %>%
janitor::clean_names()
fix_hb <- function(x) {
x <- ifelse(x %in% c("#NULL!", "NULL", "", NA), NA, x)
as.numeric(x)
}
edat <- dat %>%
filter(
edadmeses_hb_mas_cercano <= 60,
edad_meses_aplicacion <= 60,
resultado %in% c(1, 2)
) %>%
mutate(
hb_ajustada_mas_cercano = fix_hb(hb_ajustada_mas_cercano),
run = hb_ajustada_mas_cercano - 10.5,
D = as.integer(run < 0),
y = as.integer(resultado == 2)
) %>%
filter(!is.na(hb_ajustada_mas_cercano), !is.na(run), !is.na(D), !is.na(y))
edat %>%
select(run, D, y, genero) %>%
tbl_summary(by = D,
label = list(run ~ "Dist. umbral (Hb)",
y ~ "Riesgo EDI"),
statistic = list(
all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} / {N} ({p}%)"
))
| Characteristic | 0 N = 1,6161 |
1 N = 8251 |
|---|---|---|
| Dist. umbral (Hb) | 1.03 (0.73) | -1.05 (1.54) |
| Riesgo EDI | 735 / 1,616 (45%) | 436 / 825 (53%) |
| genero | ||
| 0 | 838 / 1,616 (52%) | 394 / 825 (48%) |
| 1 | 778 / 1,616 (48%) | 431 / 825 (52%) |
| 1 Mean (SD); n / N (%) | ||
rdrobustbw_obj <- with(edat, rdbwselect(y = y, x = run, c = 0, p = 1, kernel = "triangular"))
h <- tryCatch({
if (!is.null(bw_obj$h[1])) bw_obj$h[1] else 1
}, error = function(e) 1)
rd_main <- with(edat, rdrobust(y = y, x = run, c = 0,
p = 1, h = h, kernel = "triangular",
cluster = idninio))
summary(rd_main)
## Sharp RD estimates using local polynomial regression.
##
## Number of Obs. 2441
## BW type Manual
## Kernel Triangular
## VCE method NN
##
## Number of Obs. 825 1616
## Eff. Number of Obs. 525 857
## Order est. (p) 1 1
## Order bias (q) 2 2
## BW est. (h) 1.000 1.000
## BW bias (b) 1.000 1.000
## rho (h/b) 1.000 1.000
## Unique Obs. 825 1616
##
## =====================================================================
## Point Robust Inference
## Estimate z P>|z| [ 95% C.I. ]
## ---------------------------------------------------------------------
## RD Effect 0.054 0.263 0.793 [-0.153 , 0.200]
## =====================================================================
edat <- edat %>% mutate(y_pct = y * 100)
fe1 <- felm(y_pct ~ D + run | factor(idninio) | 0 | idninio, data = edat, exactDOF = TRUE)
fe2 <- felm(y_pct ~ D + run + genero | factor(idninio) | 0 | idninio, data = edat, exactDOF = TRUE)
stargazer(fe1, fe2,
title = "RDD paramétrico – Resultado EDI (riesgo)",
type = "text", df = FALSE,
covariate.labels = c("Tratado (D)", "Run", "Género = niño"))
##
## RDD paramétrico – Resultado EDI (riesgo)
## ================================================
## Dependent variable:
## ----------------------------
## y_pct
## (1) (2)
## ------------------------------------------------
## Tratado (D) 5.333 5.333
## (8.480) (8.480)
##
## Run -6.124* -6.124*
## (3.551) (3.551)
##
## Género = niño
## (0.000)
##
## ------------------------------------------------
## Observations 2,441 2,441
## R2 0.917 0.917
## Adjusted R2 0.265 0.265
## Residual Std. Error 42.833 42.833
## ================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
window_vals <- c(h / 2, h, h * 2)
obs_por_banda <- sapply(window_vals, function(w) sum(abs(edat$run) <= w))
print(obs_por_banda)
## [1] 844 1432 2202
res_list <- lapply(window_vals, function(w) {
data_sub <- subset(edat, abs(run) <= w)
if (nrow(data_sub) >= 50) {
tryCatch(
felm(y_pct ~ D + run | factor(idninio) | 0 | idninio, data = data_sub),
error = function(e) NULL
)
} else {
NULL
}
})
res_list_clean <- Filter(Negate(is.null), res_list)
if (length(res_list_clean) > 0) {
stargazer(res_list_clean,
title = "Sensibilidad al ancho de banda (±h/2, ±h, ±2h)",
type = "text", df = FALSE)
} else {
cat("❌ Ninguna especificación se pudo estimar. Verifica el tamaño muestral en las bandas.\n")
}
##
## Sensibilidad al ancho de banda (±h/2, ±h, ±2h)
## =================================================
## Dependent variable:
## -----------------------------
## y_pct
## (1) (2) (3)
## -------------------------------------------------
## D -29.413 7.394 6.147
## (28.820) (17.185) (11.023)
##
## run -39.218 2.701 -5.004
## (41.859) (13.416) (6.260)
##
## -------------------------------------------------
## Observations 844 1,432 2,202
## R2 0.961 0.940 0.923
## Adjusted R2 0.261 0.274 0.279
## Residual Std. Error 42.982 42.610 42.432
## =================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
edat_plot <- edat %>%
filter(abs(run) <= 3) %>%
mutate(bin = cut(run, breaks = seq(-3, 3, 0.2), include.lowest = TRUE)) %>%
group_by(bin) %>%
summarise(run_mid = mean(run, na.rm = TRUE), y_bar = mean(y), .groups = "drop")
ggplot(edat, aes(x = run, y = y)) +
geom_point(alpha = .2) +
geom_line(data = edat_plot, aes(x = run_mid, y = y_bar), colour = "black", size = 1) +
geom_smooth(data = subset(edat, run < 0), method = "loess", se = FALSE) +
geom_smooth(data = subset(edat, run >= 0), method = "loess", se = FALSE) +
geom_vline(xintercept = 0, linetype = "dashed", colour = "red", size = 1) +
labs(x = "Hb ajustada – 10.5 (g/dL)", y = "Probabilidad de riesgo EDI")