paises <- c("MEX", "PHL", "ZMB", "COL", "THA")
# https://data.worldbank.org/indicator?tab=all
indicadores <- c(
  gdp_pc = "NY.GDP.PCAP.KD", # GDP per capita
  hiv = "SH.DYN.AIDS.ZS", # HIV prevalence (% ages 15-49)
  health_exp = "SH.XPD.CHEX.PC.CD" # Health expendature per capita
)
datos <- WDI(country = paises,
             indicator = indicadores,
             start = 2014,
             end = 2023)
datos <- datos %>%
  select(iso2c, country, year, gdp_pc, hiv, health_exp) %>%
  na.omit()
# Este paso se hace cuando las variables son muy grandes y cuando la relación es proporcional.
# NO USAR cuando hay valores negativos o ceros, cuando los datos son porcentajes o
# cuando la relación es lineal.
datos <- datos %>%
  mutate(
    lgdp_pc = log(gdp_pc),
    lhiv = log(hiv),
    lhealth = log(health_exp),
  )
panel_data <- pdata.frame(datos, index = c("iso2c", "year"))
# Pooled Ordinary Least Squares Model (Pooled OLS)
modelo_pool <- plm(lhiv ~ lgdp_pc + lhealth,
                   data = panel_data,
                   model = "pooling")
summary(modelo_pool)
## Pooling Model
## 
## Call:
## plm(formula = lhiv ~ lgdp_pc + lhealth, data = panel_data, model = "pooling")
## 
## Balanced Panel: n = 5, T = 10, N = 50
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -2.39977 -0.45549  0.30925  0.85917  1.28335 
## 
## Coefficients:
##             Estimate Std. Error t-value  Pr(>|t|)    
## (Intercept) 15.22101    2.99280  5.0859 6.289e-06 ***
## lgdp_pc     -2.59492    0.72736 -3.5676 0.0008418 ***
## lhealth      1.16862    0.63206  1.8489 0.0707673 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    104.79
## Residual Sum of Squares: 57.348
## R-Squared:      0.45274
## Adj. R-Squared: 0.42945
## F-statistic: 19.4413 on 2 and 47 DF, p-value: 7.0391e-07
# Compara Pooled vs Aleatorios
# Si p-value < 0.05, Pooled NO es adecuado, probar Aleatorios
# Si p-value > 0.05, usar Pooled
plmtest(modelo_pool, type = "bp")
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  lhiv ~ lgdp_pc + lhealth
## chisq = 195.54, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
model_fe <- plm(lhiv ~ lgdp_pc + lhealth,
                data = panel_data,
                model = "within")
summary(model_fe)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = lhiv ~ lgdp_pc + lhealth, data = panel_data, model = "within")
## 
## Balanced Panel: n = 5, T = 10, N = 50
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.466281 -0.065444 -0.011854  0.067430  0.395080 
## 
## Coefficients:
##         Estimate Std. Error t-value  Pr(>|t|)    
## lgdp_pc 2.116566   0.597209  3.5441 0.0009642 ***
## lhealth 0.077004   0.191992  0.4011 0.6903465    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    2.1056
## Residual Sum of Squares: 1.4034
## R-Squared:      0.3335
## Adj. R-Squared: 0.2405
## F-statistic: 10.7579 on 2 and 43 DF, p-value: 0.00016284
# Compara Fijos vs Pooled
# Si p-value < 0.05, usar Efectos Fijos
# Si p-value > 0.05, usar Pooled
pFtest(model_fe, modelo_pool)
## 
##  F test for individual effects
## 
## data:  lhiv ~ lgdp_pc + lhealth
## F = 428.54, df1 = 4, df2 = 43, p-value < 2.2e-16
## alternative hypothesis: significant effects
model_re <- plm(lhiv ~ lgdp_pc + lhealth,
                data = panel_data,
                model = "random")
summary(model_re)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = lhiv ~ lgdp_pc + lhealth, data = panel_data, model = "random")
## 
## Balanced Panel: n = 5, T = 10, N = 50
## 
## Effects:
##                   var std.dev share
## idiosyncratic 0.03264 0.18066 0.012
## individual    2.67724 1.63623 0.988
## theta: 0.9651
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.501226 -0.095662 -0.042098  0.157757  0.403542 
## 
## Coefficients:
##              Estimate Std. Error z-value Pr(>|z|)   
## (Intercept) -12.32040    4.42328 -2.7854 0.005347 **
## lgdp_pc       1.35485    0.58560  2.3136 0.020690 * 
## lhealth       0.14948    0.20509  0.7289 0.466088   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    2.2306
## Residual Sum of Squares: 1.7796
## R-Squared:      0.20218
## Adj. R-Squared: 0.16823
## Chisq: 11.9104 on 2 DF, p-value: 0.0025923
# Compara Fijos vs Aleatorios
# Si p-value < 0.05, usar Efectos Fijos
# Si p-value > 0.05, usar Efectos Aleatorios
phtest(model_fe, model_re)
## 
##  Hausman Test
## 
## data:  lhiv ~ lgdp_pc + lhealth
## chisq = 28.715, df = 2, p-value = 5.817e-07
## alternative hypothesis: one model is inconsistent
# Evalúa si la varianza de los errores es constante.
# install.packages("lmtest")
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
bptest(model_fe)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_fe
## BP = 19.767, df = 2, p-value = 5.101e-05
# Interpretación:
# Si p-value < 0.05 → Existe heterocedasticidad (problema detectado)
# Si p-value > 0.05 → No hay evidencia de heterocedasticidad
# Evalúa si los errores están correlacionados en el tiempo dentro de cada  país.
# Prueba de Wooldridge (más apropiada para Efectos Fijos)
pwartest(model_fe)
## 
##  Wooldridge's test for serial correlation in FE panels
## 
## data:  model_fe
## F = 278.6, df1 = 1, df2 = 43, p-value < 2.2e-16
## alternative hypothesis: serial correlation
# Prueba Breusch-Godfrey para panel (más apropiada para Efectos Aleatorios)
# pbgtest(model_re)
# Interpretación:
# Si p-value < 0.05 → Existe autocorrelación serial (problema detectado)
# Si p-value > 0.05 → No hay evidencia de autocorrelación
# Corrige heterocedasticidad y autocorrelación dentro de cada país
# install.packages("sandwich")
library(sandwich)
modelo_robusto <- coeftest(model_fe,
                           vcov = vcovHC(model_fe,
                                         method = "arellano",
                                         type = "HC1",
                                         cluster = "group"))
modelo_robusto
## 
## t test of coefficients:
## 
##         Estimate Std. Error t value Pr(>|t|)  
## lgdp_pc 2.116566   0.795649  2.6602  0.01093 *
## lhealth 0.077004   0.351166  0.2193  0.82747  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Interpretación:
# Los coeficientes NO cambian.
# Cambian los errores estándar, estadísticos t y p-values.
# Si una variable sigue siendo significativa después de la corrección,
# el resultado es estadísticamente más confiable.