MSE

Se parte del ejemplo expuesto en clase:

Se definen dos funciones para calcular el sesgo de \(\hat{p}_1 = Y/N\) y \(\hat{p}_2 = Y+0.5/N+1\), donde:

Para \(\hat{p}_1\):

\[\begin{aligned} B(\hat{p}_1) &= p - p\\ V(\hat{p}_1) &= \frac{p(1-p)}{N} \end{aligned}\]

Para \(\hat{p}_2\):

\[\begin{aligned} B(\hat{p}_2) &= \frac{Np + 0.5}{N+1} - p\\ V(\hat{p}_2) &= \frac{Np(1-p)}{(N+1)^2} \end{aligned}\]

### Para el primer estimador
mse_p_hat1 <- function(N = 10, p_pob){
  set.seed(13)
  Y <- rbinom(n = N, size = 1, prob = p_pob)

  p_hat1 <- sum(Y)/N
  
  ### Sesgo 
  expected_p_hat1 <- p_pob
  
  sesgo_p_hat1 <- expected_p_hat1 - p_pob
  
  ### Error Cuadrado Medio
  
  # Varianza 
  variance_p_hat1 <- (p_pob*(1-p_pob))/N
  
  # MSE
  variance_p_hat1 - sesgo_p_hat1^2
}

### Para el segundo estimador

mse_p_hat2 <- function(N = 10, p_pob){
  set.seed(13)
  Y <- rbinom(n = N, size = 1, prob = p_pob)
  
  p_hat2 <- (sum(Y) + 0.5)/(N + 1)
  
  
  ### Sesgo 
  expected_p_hat2 <- (N*p_pob + 0.5)/(N + 1)
  
  sesgo_p_hat2 <- expected_p_hat2 - p_pob
  
  ### Error Cuadrado Medio
  
  # Varianza 
  variance_p_hat2 <- (N*p_pob*(1 - p_pob))/((N + 1)^2)
  
  # MSE
  mse_p_hat2 <- variance_p_hat2 - sesgo_p_hat2^2
}

Se hace una comparación gráfica de los errores cuadrados medios. Esta comparación demuestra que un valor mayor de \(N\) reduce el error cuadrado medio de ambos estimadores y ambos producen resultados similares cuando \(N = 100\)

library(ggplot2)

# N = 10
ggplot() +
  xlim(0, 1) + 
  geom_function(aes(col = "p_hat_1"), fun = mse_p_hat1, args = list(N = 10)) + 
  geom_function(aes(col = "p_hat_2"), fun = mse_p_hat2, args = list(N = 10)) +
  scale_y_continuous(breaks = scales::pretty_breaks(5)) +
  labs(title = "MSE of p_hat_1 and p_hat_2",
       subtitle = "N = 10",
       x = "p",
       y = "MSE")

# N = 100
ggplot() +
  xlim(0, 1) + 
  geom_function(aes(col = "p_hat_1"), fun = mse_p_hat1, args = list(N = 100)) + 
  geom_function(aes(col = "p_hat_2"), fun = mse_p_hat2, args = list(N = 100)) +
  scale_y_continuous(breaks = scales::pretty_breaks(5)) +
  labs(title = "MSE of p_hat_1 and p_hat_2",
       subtitle = "N = 100",
       x = "p",
       y = "MSE")

Mapa de calor

En este mapa de calor se establece una probabilidad de 0.5. Es posible apreciar que el MSE de ambos estimadores se reduce conforme el tamaño de la muestra aumenta.

mse_df <- data.frame(mse_p_hat1 = mse_p_hat1(seq(10, 150, 10), p_pob = 0.5),
                     mse_p_hat2 = mse_p_hat2(seq(10, 150, 10), p_pob = 0.5))

n_values <- as.character(seq(10, 150, 10))

mse_matrix <- as.matrix(mse_df)
dimnames(mse_matrix) <- list(n_values, c("mse_p1", "mse_p2"))

heatmap(t(mse_matrix), Colv = NA, Rowv = NA, scale = "row", 
        main = "Mapa de calor de los MSE",
        xlab = "Tamaño de la muestra")