1. Carga de librerías y datos

# Paquetes
req <- c("tidyverse", "readxl", "janitor", "rdrobust", "lfe", "gtsummary", "stargazer")
new <- req[!(req %in% installed.packages()[, "Package"])]
if (length(new)) install.packages(new, dependencies = TRUE)
invisible(lapply(req, library, character.only = TRUE))

# Cargar datos desde ruta absoluta
dat <- read_excel("C:/Users/vacak/OneDrive/Documentos/edi_final.xlsx") %>%
  janitor::clean_names()

2. Preparación de variables

fix_hb <- function(x) {
  x <- ifelse(x %in% c("#NULL!", "NULL", "", NA), NA, x)
  as.numeric(x)
}

edat <- dat %>%
  filter(
    edadmeses_hb_mas_cercano <= 60,
    edad_meses_aplicacion    <= 60,
    resultado %in% c(1, 2)
  ) %>%
  mutate(
    hb_ajustada_mas_cercano = fix_hb(hb_ajustada_mas_cercano),
    run = hb_ajustada_mas_cercano - 10.5,
    D   = as.integer(run < 0),
    y   = as.integer(resultado == 2)
  ) %>%
  filter(!is.na(hb_ajustada_mas_cercano), !is.na(run), !is.na(D), !is.na(y))

3. Estadísticas descriptivas

edat %>%
  select(run, D, y, genero) %>%
  tbl_summary(by = D,
              label = list(run ~ "Dist. umbral (Hb)",
                           y   ~ "Riesgo EDI"),
              statistic = list(
                all_continuous() ~ "{mean} ({sd})",
                all_categorical() ~ "{n} / {N} ({p}%)"
              ))
Characteristic 0
N = 1,616
1
1
N = 825
1
Dist. umbral (Hb) 1.03 (0.73) -1.05 (1.54)
Riesgo EDI 735 / 1,616 (45%) 436 / 825 (53%)
genero

    0 838 / 1,616 (52%) 394 / 825 (48%)
    1 778 / 1,616 (48%) 431 / 825 (52%)
1 Mean (SD); n / N (%)

4. Estimación RDD con rdrobust

bw_obj <- with(edat, rdbwselect(y = y, x = run, c = 0, p = 1, kernel = "triangular"))

h <- tryCatch({
  if (!is.null(bw_obj$h[1])) bw_obj$h[1] else 1
}, error = function(e) 1)

rd_main <- with(edat, rdrobust(y = y, x = run, c = 0,
                               p = 1, h = h, kernel = "triangular",
                               cluster = idninio))
summary(rd_main)
## Sharp RD estimates using local polynomial regression.
## 
## Number of Obs.                 2441
## BW type                      Manual
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.                  825         1616
## Eff. Number of Obs.             525          857
## Order est. (p)                    1            1
## Order bias  (q)                   2            2
## BW est. (h)                   1.000        1.000
## BW bias (b)                   1.000        1.000
## rho (h/b)                     1.000        1.000
## Unique Obs.                     825         1616
## 
## =====================================================================
##                    Point    Robust Inference
##                 Estimate         z     P>|z|      [ 95% C.I. ]       
## ---------------------------------------------------------------------
##      RD Effect     0.054     0.263     0.793    [-0.153 , 0.200]     
## =====================================================================

5. Regresiones paramétricas

edat <- edat %>% mutate(y_pct = y * 100)

fe1 <- felm(y_pct ~ D + run | factor(idninio) | 0 | idninio, data = edat, exactDOF = TRUE)
fe2 <- felm(y_pct ~ D + run + genero | factor(idninio) | 0 | idninio, data = edat, exactDOF = TRUE)

stargazer(fe1, fe2,
          title = "RDD paramétrico – Resultado EDI (riesgo)",
          type = "text", df = FALSE,
          covariate.labels = c("Tratado (D)", "Run", "Género = niño"))
## 
## RDD paramétrico – Resultado EDI (riesgo)
## ================================================
##                         Dependent variable:     
##                     ----------------------------
##                                y_pct            
##                          (1)            (2)     
## ------------------------------------------------
## Tratado (D)             5.333          5.333    
##                        (8.480)        (8.480)   
##                                                 
## Run                    -6.124*        -6.124*   
##                        (3.551)        (3.551)   
##                                                 
## Género = niño                                   
##                                       (0.000)   
##                                                 
## ------------------------------------------------
## Observations            2,441          2,441    
## R2                      0.917          0.917    
## Adjusted R2             0.265          0.265    
## Residual Std. Error     42.833        42.833    
## ================================================
## Note:                *p<0.1; **p<0.05; ***p<0.01

6. Sensibilidad al ancho de banda

window_vals <- c(h / 2, h, h * 2)
obs_por_banda <- sapply(window_vals, function(w) sum(abs(edat$run) <= w))
print(obs_por_banda)
## [1]  844 1432 2202
res_list <- lapply(window_vals, function(w) {
  data_sub <- subset(edat, abs(run) <= w)
  if (nrow(data_sub) >= 50) {
    tryCatch(
      felm(y_pct ~ D + run | factor(idninio) | 0 | idninio, data = data_sub),
      error = function(e) NULL
    )
  } else {
    NULL
  }
})

res_list_clean <- Filter(Negate(is.null), res_list)

if (length(res_list_clean) > 0) {
  stargazer(res_list_clean,
            title = "Sensibilidad al ancho de banda (±h/2, ±h, ±2h)",
            type = "text", df = FALSE)
} else {
  cat("❌ Ninguna especificación se pudo estimar. Verifica el tamaño muestral en las bandas.\n")
}
## 
## Sensibilidad al ancho de banda (±h/2, ±h, ±2h)
## =================================================
##                          Dependent variable:     
##                     -----------------------------
##                                 y_pct            
##                        (1)       (2)       (3)   
## -------------------------------------------------
## D                    -29.413    7.394     6.147  
##                     (28.820)  (17.185)  (11.023) 
##                                                  
## run                  -39.218    2.701    -5.004  
##                     (41.859)  (13.416)   (6.260) 
##                                                  
## -------------------------------------------------
## Observations           844      1,432     2,202  
## R2                    0.961     0.940     0.923  
## Adjusted R2           0.261     0.274     0.279  
## Residual Std. Error  42.982    42.610    42.432  
## =================================================
## Note:                 *p<0.1; **p<0.05; ***p<0.01

7. Gráfico del RDD

edat_plot <- edat %>%
  filter(abs(run) <= 3) %>%
  mutate(bin = cut(run, breaks = seq(-3, 3, 0.2), include.lowest = TRUE)) %>%
  group_by(bin) %>%
  summarise(run_mid = mean(run, na.rm = TRUE), y_bar = mean(y), .groups = "drop")

ggplot(edat, aes(x = run, y = y)) +
  geom_point(alpha = .2) +
  geom_line(data = edat_plot, aes(x = run_mid, y = y_bar), colour = "black", size = 1) +
  geom_smooth(data = subset(edat, run < 0), method = "loess", se = FALSE) +
  geom_smooth(data = subset(edat, run >= 0), method = "loess", se = FALSE) +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "red", size = 1) +
  labs(x = "Hb ajustada – 10.5 (g/dL)", y = "Probabilidad de riesgo EDI")