# ==========================================================
# MODELO DE ASISTENCIA ESCOLAR – CEDE
# RStudio | Base R
# ==========================================================
# ----------------------------------------------------------
# 1. LIBRERÍAS
# ----------------------------------------------------------
library(haven)
## Warning: package 'haven' was built under R version 4.5.2
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.2
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# ----------------------------------------------------------
# 2. CARGAR BASES
# ----------------------------------------------------------
buen_gobierno <- read_dta("PANEL_BUEN_GOBIERNO_2021.dta")
caracteristicas <- read_dta("PANEL_CARACTERISTICAS_GENERALES_2022.dta")
educacion <- read_dta("PANEL_EDUCACION_2021.dta")
# ----------------------------------------------------------
# 3. SELECCIÓN DE VARIABLES
# ----------------------------------------------------------
cg_limpia <- caracteristicas[, c(
"codmpio",
"ano",
"IPM",
"gini",
"indrural",
"disbogota",
"pobreza"
)]
ed_limpia <- educacion[, c(
"codmpio",
"ano",
"t_establ_o",
"t_establ",
"per_alfa",
"ind_alfa",
"asistesc_5_a_24"
)]
# ----------------------------------------------------------
# 4. UNIR BASES
# ----------------------------------------------------------
datos_unidos <- merge(
cg_limpia,
ed_limpia,
by = c("codmpio", "ano"),
all = TRUE
)
# ----------------------------------------------------------
# 5. LIMPIEZA DE DATOS
# ----------------------------------------------------------
datos_unidos <- na.omit(datos_unidos)
vars_num <- c(
"IPM", "gini", "indrural", "disbogota", "pobreza",
"t_establ_o", "t_establ", "per_alfa",
"ind_alfa", "asistesc_5_a_24"
)
datos_unidos[vars_num] <- lapply(datos_unidos[vars_num], as.numeric)
cat("Observaciones:", nrow(datos_unidos), "\n")
## Observaciones: 984
cat("Variables:", ncol(datos_unidos), "\n\n")
## Variables: 12
# ----------------------------------------------------------
# 6. BASE PARA LOS MODELOS
# ----------------------------------------------------------
y_var <- "asistesc_5_a_24"
x_vars <- c("indrural", "gini", "IPM", "disbogota")
df <- datos_unidos[, c(y_var, x_vars)]
df <- na.omit(df)
# ----------------------------------------------------------
# 7. MODELO MCO (NIVEL–NIVEL)
# ----------------------------------------------------------
fml_ols <- as.formula(
paste(y_var, "~", paste(x_vars, collapse = " + "))
)
m_ols <- lm(fml_ols, data = df)
summary(m_ols)
##
## Call:
## lm(formula = fml_ols, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50.099 -3.475 0.318 4.094 15.403
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 105.725141 2.875897 36.762 < 2e-16 ***
## indrural -5.374865 1.181544 -4.549 6.07e-06 ***
## gini -64.841139 6.794240 -9.544 < 2e-16 ***
## IPM -0.188010 0.020039 -9.382 < 2e-16 ***
## disbogota 0.002208 0.001510 1.462 0.144
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.566 on 979 degrees of freedom
## Multiple R-squared: 0.3551, Adjusted R-squared: 0.3525
## F-statistic: 134.8 on 4 and 979 DF, p-value: < 2.2e-16
# ----------------------------------------------------------
# 8. MODELO LOG–LIN
# ----------------------------------------------------------
df$log_asist <- log(df[[y_var]])
fml_loglin <- as.formula(
paste("log_asist ~", paste(x_vars, collapse = " + "))
)
m_loglin <- lm(fml_loglin, data = df)
summary(m_loglin)
##
## Call:
## lm(formula = fml_loglin, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6419 -0.0530 0.0171 0.0792 0.2760
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.985e+00 8.308e-02 60.001 < 2e-16 ***
## indrural -7.436e-02 3.413e-02 -2.179 0.0296 *
## gini -1.278e+00 1.963e-01 -6.510 1.20e-10 ***
## IPM -4.148e-03 5.789e-04 -7.165 1.53e-12 ***
## disbogota 5.792e-05 4.361e-05 1.328 0.1844
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1897 on 979 degrees of freedom
## Multiple R-squared: 0.2067, Adjusted R-squared: 0.2035
## F-statistic: 63.79 on 4 and 979 DF, p-value: < 2.2e-16
# ----------------------------------------------------------
# 9. DIAGNÓSTICOS – FUNCIÓN GENERAL
# ----------------------------------------------------------
diagnosticos <- function(modelo, nombre) {
cat("\n==============================\n")
cat("DIAGNÓSTICOS:", nombre, "\n")
cat("==============================\n")
res <- residuals(modelo)
fit <- fitted(modelo)
n <- length(res)
# Linealidad
par(mfrow = c(2,2))
for (v in colnames(model.matrix(modelo))[-1]) {
plot(df[[v]], res,
xlab = v, ylab = "Residuos",
main = paste("Residuos vs", v))
abline(h = 0, col = "red", lty = 2)
}
par(mfrow = c(1,1))
# Heterocedasticidad (Breusch–Pagan)
u2 <- res^2
m_bp <- lm(u2 ~ fit)
LM <- n * summary(m_bp)$r.squared
pval <- 1 - pchisq(LM, df = length(coef(modelo)) - 1)
cat("\nBreusch–Pagan\n")
cat("LM =", LM, "\n")
cat("p-value =", pval, "\n")
plot(fit, res,
xlab = "Valores ajustados",
ylab = "Residuos",
main = "Residuos vs Ajustados")
abline(h = 0, col = "red", lty = 2)
# Normalidad
print(shapiro.test(res))
qqnorm(res); qqline(res, col = "red")
# Multicolinealidad
X <- model.matrix(modelo)[, -1]
VIF <- diag(solve(cor(X)))
print(VIF)
# RESET
print(resettest(modelo, power = 2:3, type = "fitted"))
# Autocorrelación
print(dwtest(modelo))
}
# ----------------------------------------------------------
# 10. EJECUTAR DIAGNÓSTICOS
# ----------------------------------------------------------
diagnosticos(m_ols, "MCO nivel–nivel")
##
## ==============================
## DIAGNÓSTICOS: MCO nivel–nivel
## ==============================

##
## Breusch–Pagan
## LM = 36.44478
## p-value = 2.343822e-07

##
## Shapiro-Wilk normality test
##
## data: res
## W = 0.91166, p-value < 2.2e-16

## indrural gini IPM disbogota
## 1.812836 1.175749 2.333562 1.693738
##
## RESET test
##
## data: modelo
## RESET = 39.383, df1 = 2, df2 = 977, p-value < 2.2e-16
##
##
## Durbin-Watson test
##
## data: modelo
## DW = 1.5984, p-value = 9.477e-11
## alternative hypothesis: true autocorrelation is greater than 0
diagnosticos(m_loglin, "LOG–LIN")
##
## ==============================
## DIAGNÓSTICOS: LOG–LIN
## ==============================

##
## Breusch–Pagan
## LM = 11.72503
## p-value = 0.01951759

##
## Shapiro-Wilk normality test
##
## data: res
## W = 0.48468, p-value < 2.2e-16

## indrural gini IPM disbogota
## 1.812836 1.175749 2.333562 1.693738
##
## RESET test
##
## data: modelo
## RESET = 42.482, df1 = 2, df2 = 977, p-value < 2.2e-16
##
##
## Durbin-Watson test
##
## data: modelo
## DW = 1.8172, p-value = 0.001682
## alternative hypothesis: true autocorrelation is greater than 0
# ----------------------------------------------------------
# 11. GRÁFICOS OBSERVADO vs AJUSTADO
# ----------------------------------------------------------
par(mfrow = c(1,2))
plot(df[[y_var]], fitted(m_ols),
xlab = "Asistencia observada",
ylab = "Asistencia ajustada",
main = "MCO")
abline(0,1,col="red",lty=2)
plot(df$log_asist, fitted(m_loglin),
xlab = "Log(asistencia) observada",
ylab = "Log(asistencia) ajustada",
main = "LOG–LIN")
abline(0,1,col="red",lty=2)

par(mfrow = c(1,1))
# ----------------------------------------------------------
# 12. CRITERIOS DE INFORMACIÓN (AIC y BIC)
# ----------------------------------------------------------
cat("\n==============================\n")
##
## ==============================
cat("CRITERIOS DE INFORMACIÓN\n")
## CRITERIOS DE INFORMACIÓN
cat("==============================\n")
## ==============================
# AIC
AIC_ols <- AIC(m_ols)
AIC_loglin <- AIC(m_loglin)
# BIC
BIC_ols <- BIC(m_ols)
BIC_loglin <- BIC(m_loglin)
# Mostrar resultados
cat("\nAIC\n")
##
## AIC
cat("MCO nivel–nivel :", AIC_ols, "\n")
## MCO nivel–nivel : 6503.062
cat("LOG–LIN :", AIC_loglin, "\n")
## LOG–LIN : -472.216
cat("\nBIC\n")
##
## BIC
cat("MCO nivel–nivel :", BIC_ols, "\n")
## MCO nivel–nivel : 6532.412
cat("LOG–LIN :", BIC_loglin, "\n")
## LOG–LIN : -442.8662
# Comparación automática
cat("\nMODELO PREFERIDO SEGÚN:\n")
##
## MODELO PREFERIDO SEGÚN:
if (AIC_loglin < AIC_ols) {
cat("- AIC: LOG–LIN\n")
} else {
cat("- AIC: MCO nivel–nivel\n")
}
## - AIC: LOG–LIN
if (BIC_loglin < BIC_ols) {
cat("- BIC: LOG–LIN\n")
} else {
cat("- BIC: MCO nivel–nivel\n")
}
## - BIC: LOG–LIN
cat("\nCÓDIGO EJECUTADO CORRECTAMENTE\n")
##
## CÓDIGO EJECUTADO CORRECTAMENTE