Mostramos con simulación Monte Carlo que los estimadores MCO de una regresión lineal simple son insesgados y que su varianza disminuye al crecer el tamaño muestral. Se reproducen dos demos:
n,
mostramos la convergencia de las medias de
\(\hat\beta_0\) y \(\hat\beta_1\) a los verdaderos \(\beta_0,\beta_1\)
al aumentar el número de simulaciones \(R\).n.set.seed(123)
beta0 <- 2 # intercepto verdadero
beta1 <- 3 # pendiente verdadera
sigma <- 2 # desvío estándar del error
n <- 80 # tamaño muestral (fijo en esta demo)
R <- 8000 # número de simulaciones
b0_hat <- numeric(R)
b1_hat <- numeric(R)
for (r in 1:R) {
x <- runif(n, 0, 10)
eps <- rnorm(n, 0, sigma)
y <- beta0 + beta1*x + eps
fit <- lm(y ~ x)
b0_hat[r] <- coef(fit)[1]
b1_hat[r] <- coef(fit)[2]
}
runmean <- function(z) cumsum(z) / seq_along(z)
b0_run <- runmean(b0_hat)
b1_run <- runmean(b1_hat)
op <- par(mfrow = c(1,2), mar = c(4,4,2,1))
plot(b0_run, type="l", xlab="Número de simulaciones (R)",
ylab=expression(paste("Media acumulada de ", hat(beta)[0])),
main=expression(paste("Convergencia de ", hat(beta)[0])),
lwd=1.2)
abline(h = beta0, col=2, lwd=2) # parámetro verdadero
grid()
plot(b1_run, type="l", xlab="Número de simulaciones (R)",
ylab=expression(paste("Media acumulada de ", hat(beta)[1])),
main=expression(paste("Convergencia de ", hat(beta)[1])),
lwd=1.2)
abline(h = beta1, col=2, lwd=2)
grid()
par(op)
cat("Demo 1 — medias finales:\n")
## Demo 1 — medias finales:
cat("E[beta0_hat] ≈", round(mean(b0_hat), 4), "vs", beta0, "\n")
## E[beta0_hat] ≈ 2.0027 vs 2
cat("E[beta1_hat] ≈", round(mean(b1_hat), 4), "vs", beta1, "\n")
## E[beta1_hat] ≈ 2.9997 vs 3
n_grid <- c(20, 40, 80, 160, 320, 640, 1000) # tamaños muestrales
R_per_n <- 2000 # simulaciones por cada n
res <- lapply(n_grid, function(nn){
b0 <- numeric(R_per_n)
b1 <- numeric(R_per_n)
for (r in 1:R_per_n) {
x <- runif(nn, 0, 10)
eps <- rnorm(nn, 0, sigma)
y <- beta0 + beta1*x + eps
fit <- lm(y ~ x)
b0[r] <- coef(fit)[1]
b1[r] <- coef(fit)[2]
}
data.frame(
n = nn,
mean_b0 = mean(b0),
mean_b1 = mean(b1),
bias_b0 = mean(b0) - beta0,
bias_b1 = mean(b1) - beta1,
sd_b0 = sd(b0),
sd_b1 = sd(b1),
rmse_b0 = sqrt(mean((b0 - beta0)^2)),
rmse_b1 = sqrt(mean((b1 - beta1)^2)),
se_mean_b0 = sd(b0)/sqrt(R_per_n),
se_mean_b1 = sd(b1)/sqrt(R_per_n)
)
})
tab <- do.call(rbind, res)
tab
op <- par(mfrow = c(1,2), mar = c(4,4,2,1))
plot(tab$n, tab$mean_b0, type="b", pch=19,
xlab="n (tamaño muestral)", ylab=expression(E[MC](hat(beta)[0])),
main=expression(paste("Media Monte Carlo de ", hat(beta)[0])))
abline(h = beta0, col=2, lwd=2)
arrows(tab$n, tab$mean_b0 - 1.96*tab$se_mean_b0,
tab$n, tab$mean_b0 + 1.96*tab$se_mean_b0,
angle=90, code=3, length=0.05)
grid()
plot(tab$n, tab$mean_b1, type="b", pch=19,
xlab="n (tamaño muestral)", ylab=expression(E[MC](hat(beta)[1])),
main=expression(paste("Media Monte Carlo de ", hat(beta)[1])))
abline(h = beta1, col=2, lwd=2)
arrows(tab$n, tab$mean_b1 - 1.96*tab$se_mean_b1,
tab$n, tab$mean_b1 + 1.96*tab$se_mean_b1,
angle=90, code=3, length=0.05)
grid()
par(op)
op <- par(mfrow = c(1,2), mar = c(4,4,2,1))
plot(tab$n, tab$rmse_b0, type="b", pch=19, log="x",
xlab="n (escala log)", ylab="RMSE intercepto",
main="RMSE de hat(beta0) ↓ al crecer n")
grid()
plot(tab$n, tab$rmse_b1, type="b", pch=19, log="x",
xlab="n (escala log)", ylab="RMSE pendiente",
main="RMSE de hat(beta1) ↓ al crecer n")
grid()
par(op)
n, el
sesgo tiende a cero y el RMSE
disminuye, mostrando la consistencia y mayor
precisión de los estimadores.n.sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=Spanish_Argentina.utf8 LC_CTYPE=Spanish_Argentina.utf8
## [3] LC_MONETARY=Spanish_Argentina.utf8 LC_NUMERIC=C
## [5] LC_TIME=Spanish_Argentina.utf8
##
## time zone: America/Buenos_Aires
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.37 R6_2.6.1 fastmap_1.2.0 xfun_0.54
## [5] cachem_1.1.0 knitr_1.50 htmltools_0.5.8.1 rmarkdown_2.30
## [9] lifecycle_1.0.4 cli_3.6.5 vctrs_0.6.5 sass_0.4.10
## [13] jquerylib_0.1.4 compiler_4.5.1 rstudioapi_0.17.1 tools_4.5.1
## [17] evaluate_1.0.5 bslib_0.9.0 yaml_2.3.10 rlang_1.1.6
## [21] jsonlite_2.0.0