############################################################
# LABORATORIO ECONOMETRIA AVANZADA
# MCO vs SE robustos vs GLS/WLS
############################################################

# ================================
# 1. LIBRERIAS
# ================================

library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.2
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)
library(ggplot2)

set.seed(123)

# ================================
# 2. PARAMETROS
# ================================

n <- 100
R <- 1000

beta0 <- 1
beta1 <- 2
sigma <- 1

# ================================
# 3. ALMACENAMIENTO
# ================================

b_ols <- numeric(R)
b_hc3 <- numeric(R)
b_wls <- numeric(R)

cover_ols <- numeric(R)
cover_hc3 <- numeric(R)
cover_wls <- numeric(R)

reject_ols <- numeric(R)
reject_hc3 <- numeric(R)
reject_wls <- numeric(R)

# ================================
# 4. SIMULACION
# ================================

for(i in 1:R){
  
  # generar x
  x <- runif(n,1,5)
  
  # heterocedasticidad
  u <- rnorm(n,0,sigma*x)
  
  y <- beta0 + beta1*x + u
  
  # =====================
  # MCO
  # =====================
  
  ols <- lm(y ~ x)
  
  b <- coef(ols)[2]
  se <- summary(ols)$coefficients[2,2]
  
  ci_low <- b - 1.96*se
  ci_high <- b + 1.96*se
  
  b_ols[i] <- b
  cover_ols[i] <- (ci_low <= beta1 & beta1 <= ci_high)
  reject_ols[i] <- (abs((b-beta1)/se) > 1.96)
  
  # =====================
  # HC3
  # =====================
  
  robust <- coeftest(ols, vcov = vcovHC(ols, type="HC3"))
  
  b <- robust[2,1]
  se <- robust[2,2]
  
  ci_low <- b - 1.96*se
  ci_high <- b + 1.96*se
  
  b_hc3[i] <- b
  cover_hc3[i] <- (ci_low <= beta1 & beta1 <= ci_high)
  reject_hc3[i] <- (abs((b-beta1)/se) > 1.96)
  
  # =====================
  # WLS (GLS correcto)
  # =====================
  
  wls <- lm(y ~ x, weights = 1/x^2)
  
  b <- coef(wls)[2]
  se <- summary(wls)$coefficients[2,2]
  
  ci_low <- b - 1.96*se
  ci_high <- b + 1.96*se
  
  b_wls[i] <- b
  cover_wls[i] <- (ci_low <= beta1 & beta1 <= ci_high)
  reject_wls[i] <- (abs((b-beta1)/se) > 1.96)
  
}

# ================================
# 5. METRICAS
# ================================

sesgo <- c(
  mean(b_ols - beta1),
  mean(b_hc3 - beta1),
  mean(b_wls - beta1)
)

rmse <- c(
  sqrt(mean((b_ols-beta1)^2)),
  sqrt(mean((b_hc3-beta1)^2)),
  sqrt(mean((b_wls-beta1)^2))
)

cobertura <- c(
  mean(cover_ols),
  mean(cover_hc3),
  mean(cover_wls)
)

tamano_test <- c(
  mean(reject_ols),
  mean(reject_hc3),
  mean(reject_wls)
)

resultados <- data.frame(
  Metodo=c("OLS","HC3","WLS"),
  Sesgo=sesgo,
  RMSE=rmse,
  Cobertura_IC95=cobertura,
  Tamano_Test_5=tamano_test
)

print(resultados)
##   Metodo        Sesgo      RMSE Cobertura_IC95 Tamano_Test_5
## 1    OLS -0.004260850 0.2897936          0.935         0.065
## 2    HC3 -0.004260850 0.2897936          0.948         0.052
## 3    WLS  0.002421286 0.2265671          0.953         0.047
############################################################
# PARTE B MEJORADA: CASO APLICADO
############################################################

set.seed(123)

n <- 200

Q <- runif(n,10,100)
Z <- rnorm(n)

# heterocedasticidad moderada
u <- rnorm(n,0,0.2)

logC <- 1 + 0.7*log(Q) + 0.3*Z + u

C <- exp(logC)

data <- data.frame(C,Q,Z)

############################################################
# MODELO
############################################################

modelo <- lm(log(C) ~ log(Q) + Z, data=data)

summary(modelo)
## 
## Call:
## lm(formula = log(C) ~ log(Q) + Z, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57230 -0.12129  0.00505  0.13651  0.50358 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.16735    0.10684   10.93   <2e-16 ***
## log(Q)       0.65883    0.02717   24.25   <2e-16 ***
## Z            0.30302    0.01493   20.29   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2027 on 197 degrees of freedom
## Multiple R-squared:  0.8297, Adjusted R-squared:  0.8279 
## F-statistic: 479.8 on 2 and 197 DF,  p-value: < 2.2e-16
############################################################
# ERRORES ROBUSTOS
############################################################

library(lmtest)
library(sandwich)

coeftest(modelo, vcov = vcovHC(modelo, type="HC3"))
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 1.167349   0.099951  11.679 < 2.2e-16 ***
## log(Q)      0.658832   0.025610  25.726 < 2.2e-16 ***
## Z           0.303015   0.014357  21.106 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############################################################
# TEST HETEROCEDASTICIDAD
############################################################

bptest(modelo)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo
## BP = 0.47523, df = 2, p-value = 0.7885
############################################################
# GRAFICO
############################################################

library(ggplot2)

data$residuos <- resid(modelo)
data$pred <- fitted(modelo)

ggplot(data, aes(pred,residuos))+
  geom_point()+
  geom_hline(yintercept=0,color="red")+
  theme_minimal()+
  labs(
    title="Residuos vs Valores Ajustados",
    x="Valores Ajustados",
    y="Residuos"
  )