paises <- c("MEX", "PHL", "ZMB", "COL", "THA")
# https://data.worldbank.org/indicator?tab=all
indicadores <- c(
gdp_pc = "NY.GDP.PCAP.KD", # GDP per capita
hiv = "SH.DYN.AIDS.ZS", # HIV prevalence (% ages 15-49)
health_exp = "SH.XPD.CHEX.PC.CD" # Health expendature per capita
)
datos <- WDI(country = paises,
indicator = indicadores,
start = 2014,
end = 2023)
datos <- datos %>%
select(iso2c, country, year, gdp_pc, hiv, health_exp) %>%
na.omit()
# Este paso se hace cuando las variables son muy grandes y cuando la relación es proporcional.
# NO USAR cuando hay valores negativos o ceros, cuando los datos son porcentajes o
# cuando la relación es lineal.
datos <- datos %>%
mutate(
lgdp_pc = log(gdp_pc),
lhiv = log(hiv),
lhealth = log(health_exp),
)
panel_data <- pdata.frame(datos, index = c("iso2c", "year"))
# Pooled Ordinary Least Squares Model (Pooled OLS)
modelo_pool <- plm(lhiv ~ lgdp_pc + lhealth,
data = panel_data,
model = "pooling")
summary(modelo_pool)
## Pooling Model
##
## Call:
## plm(formula = lhiv ~ lgdp_pc + lhealth, data = panel_data, model = "pooling")
##
## Balanced Panel: n = 5, T = 10, N = 50
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -2.39977 -0.45549 0.30925 0.85917 1.28335
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 15.22101 2.99280 5.0859 6.289e-06 ***
## lgdp_pc -2.59492 0.72736 -3.5676 0.0008418 ***
## lhealth 1.16862 0.63206 1.8489 0.0707673 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 104.79
## Residual Sum of Squares: 57.348
## R-Squared: 0.45274
## Adj. R-Squared: 0.42945
## F-statistic: 19.4413 on 2 and 47 DF, p-value: 7.0391e-07
# Compara Pooled vs Aleatorios
# Si p-value < 0.05, Pooled NO es adecuado, probar Aleatorios
# Si p-value > 0.05, usar Pooled
plmtest(modelo_pool, type = "bp")
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: lhiv ~ lgdp_pc + lhealth
## chisq = 195.54, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
model_fe <- plm(lhiv ~ lgdp_pc + lhealth,
data = panel_data,
model = "within")
summary(model_fe)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = lhiv ~ lgdp_pc + lhealth, data = panel_data, model = "within")
##
## Balanced Panel: n = 5, T = 10, N = 50
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.466281 -0.065444 -0.011854 0.067430 0.395080
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## lgdp_pc 2.116566 0.597209 3.5441 0.0009642 ***
## lhealth 0.077004 0.191992 0.4011 0.6903465
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 2.1056
## Residual Sum of Squares: 1.4034
## R-Squared: 0.3335
## Adj. R-Squared: 0.2405
## F-statistic: 10.7579 on 2 and 43 DF, p-value: 0.00016284
# Compara Fijos vs Pooled
# Si p-value < 0.05, usar Efectos Fijos
# Si p-value > 0.05, usar Pooled
pFtest(model_fe, modelo_pool)
##
## F test for individual effects
##
## data: lhiv ~ lgdp_pc + lhealth
## F = 428.54, df1 = 4, df2 = 43, p-value < 2.2e-16
## alternative hypothesis: significant effects
model_re <- plm(lhiv ~ lgdp_pc + lhealth,
data = panel_data,
model = "random")
summary(model_re)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = lhiv ~ lgdp_pc + lhealth, data = panel_data, model = "random")
##
## Balanced Panel: n = 5, T = 10, N = 50
##
## Effects:
## var std.dev share
## idiosyncratic 0.03264 0.18066 0.012
## individual 2.67724 1.63623 0.988
## theta: 0.9651
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.501226 -0.095662 -0.042098 0.157757 0.403542
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) -12.32040 4.42328 -2.7854 0.005347 **
## lgdp_pc 1.35485 0.58560 2.3136 0.020690 *
## lhealth 0.14948 0.20509 0.7289 0.466088
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 2.2306
## Residual Sum of Squares: 1.7796
## R-Squared: 0.20218
## Adj. R-Squared: 0.16823
## Chisq: 11.9104 on 2 DF, p-value: 0.0025923
# Compara Fijos vs Aleatorios
# Si p-value < 0.05, usar Efectos Fijos
# Si p-value > 0.05, usar Efectos Aleatorios
phtest(model_fe, model_re)
##
## Hausman Test
##
## data: lhiv ~ lgdp_pc + lhealth
## chisq = 28.715, df = 2, p-value = 5.817e-07
## alternative hypothesis: one model is inconsistent
# Evalúa si la varianza de los errores es constante.
# install.packages("lmtest")
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(model_fe)
##
## studentized Breusch-Pagan test
##
## data: model_fe
## BP = 19.767, df = 2, p-value = 5.101e-05
# Interpretación:
# Si p-value < 0.05 → Existe heterocedasticidad (problema detectado)
# Si p-value > 0.05 → No hay evidencia de heterocedasticidad
# Evalúa si los errores están correlacionados en el tiempo dentro de cada país.
# Prueba de Wooldridge (más apropiada para Efectos Fijos)
pwartest(model_fe)
##
## Wooldridge's test for serial correlation in FE panels
##
## data: model_fe
## F = 278.6, df1 = 1, df2 = 43, p-value < 2.2e-16
## alternative hypothesis: serial correlation
# Prueba Breusch-Godfrey para panel (más apropiada para Efectos Aleatorios)
# pbgtest(model_re)
# Interpretación:
# Si p-value < 0.05 → Existe autocorrelación serial (problema detectado)
# Si p-value > 0.05 → No hay evidencia de autocorrelación
# Corrige heterocedasticidad y autocorrelación dentro de cada país
# install.packages("sandwich")
library(sandwich)
modelo_robusto <- coeftest(model_fe,
vcov = vcovHC(model_fe,
method = "arellano",
type = "HC1",
cluster = "group"))
modelo_robusto
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## lgdp_pc 2.116566 0.795649 2.6602 0.01093 *
## lhealth 0.077004 0.351166 0.2193 0.82747
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Interpretación:
# Los coeficientes NO cambian.
# Cambian los errores estándar, estadísticos t y p-values.
# Si una variable sigue siendo significativa después de la corrección,
# el resultado es estadísticamente más confiable.