1 Paquetes

# install.packages(c("tidyverse","scales"))   # si faltan
library(tidyverse)
library(scales)

# Función para apilar dos gráficos SIN depender de 'patchwork'
stack_plots <- function(p1, p2) {
  if (requireNamespace("cowplot", quietly = TRUE)) {
    return(cowplot::plot_grid(p1, p2, ncol = 1, align = "v"))
  } else if (requireNamespace("gridExtra", quietly = TRUE)) {
    return(gridExtra::grid.arrange(p1, p2, ncol = 1))
  } else {
    warning("Instalá 'cowplot' o 'gridExtra' para verlos juntos. Se imprimen por separado.")
    print(p1); print(p2)
    return(invisible(NULL))
  }
}

2 1. IC para la diferencia de medias (varianzas iguales): pooled

set.seed(123)

n1 <- 20; n2 <- 25
mu1 <- 10; mu2 <- 12
sigma <- 3

x1 <- rnorm(n1, mean = mu1, sd = sigma)
x2 <- rnorm(n2, mean = mu2, sd = sigma)

datos_pooled <- tibble(
  valor = c(x1, x2),
  grupo = factor(c(rep("Grupo 1", n1), rep("Grupo 2", n2)))
)

xbar1 <- mean(x1); xbar2 <- mean(x2)
s1 <- sd(x1);     s2 <- sd(x2)

sp2 <- ((n1-1)*s1^2 + (n2-1)*s2^2) / (n1 + n2 - 2)
sp  <- sqrt(sp2)
df  <- n1 + n2 - 2
alpha <- 0.05
tcrit <- qt(1 - alpha/2, df = df)

diff_hat <- xbar1 - xbar2
se_diff  <- sp * sqrt(1/n1 + 1/n2)
IC_pooled <- diff_hat + c(-1, 1)*tcrit*se_diff

2.1 Gráficos

params <- datos_pooled %>%
  group_by(grupo) %>%
  summarise(mu = mean(valor), sd = sd(valor), .groups = "drop")

xgrid <- tibble(x = seq(min(datos_pooled$valor) - 3, max(datos_pooled$valor) + 3, length.out = 400))

norm_curves <- xgrid %>%
  tidyr::crossing(params) %>%
  mutate(d = dnorm(x, mean = mu, sd = sd))

g_pooled_hist <- ggplot(datos_pooled, aes(valor, fill = grupo, color = grupo)) +
  geom_histogram(aes(y = ..density..), bins = 30, position = "identity",
                 alpha = 0.25, color = NA) +
  geom_line(data = norm_curves, aes(x = x, y = d, color = grupo), linewidth = 1.2) +
  labs(title = "Curva Normal teórica por grupo (varianzas iguales)",
       x = "Valor", y = "Densidad") +
  theme_minimal()

ci_df <- tibble(estimacion = diff_hat, li = IC_pooled[1], ls = IC_pooled[2])

g_pooled_ci <- ggplot(ci_df, aes(x = "μ1 − μ2", y = estimacion)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = li, ymax = ls), width = 0.05, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  labs(title = "IC 95% para (μ1 − μ2) — pooled",
       x = "", y = "Diferencia estimada") +
  theme_minimal()

stack_plots(g_pooled_hist, g_pooled_ci)

3 2. IC para la diferencia de medias (varianzas desiguales): Welch

set.seed(42)

n1_w <- 15; n2_w <- 30
mu1_w <- 10; mu2_w <- 12
sd1_w <- 2; sd2_w <- 5

x1w <- rnorm(n1_w, mu1_w, sd1_w)
x2w <- rnorm(n2_w, mu2_w, sd2_w)

datos_w <- tibble(
  valor = c(x1w, x2w),
  grupo = factor(c(rep("Grupo 1", n1_w), rep("Grupo 2", n2_w)))
)

xbar1w <- mean(x1w); xbar2w <- mean(x2w)
s1w <- sd(x1w);     s2w <- sd(x2w)

SEw  <- sqrt(s1w^2/n1_w + s2w^2/n2_w)

df_welch <- ( (s1w^2/n1_w + s2w^2/n2_w)^2 ) /
  ( ((s1w^2/n1_w)^2)/(n1_w-1) + ((s2w^2/n2_w)^2)/(n2_w-1) )

alpha <- 0.05
tcrit_w <- qt(1 - alpha/2, df = df_welch)

diff_hat_w <- xbar1w - xbar2w
IC_welch <- diff_hat_w + c(-1, 1)*tcrit_w*SEw

3.1 Gráficos

params_w <- datos_w %>% group_by(grupo) %>%
  summarise(mu = mean(valor), sd = sd(valor), .groups = "drop")

xgrid_w <- tibble(x = seq(min(datos_w$valor) - 3, max(datos_w$valor) + 3, length.out = 400))
curvas_w <- tidyr::crossing(xgrid_w, params_w) %>%
  mutate(d = dnorm(x, mean = mu, sd = sd))

g_w_hist <- ggplot(datos_w, aes(valor, fill = grupo, color = grupo)) +
  geom_histogram(aes(y = ..density..), bins = 30, position = "identity",
                 alpha = 0.25, color = NA) +
  geom_line(data = curvas_w, aes(x = x, y = d, color = grupo), linewidth = 1.2) +
  labs(title = "Welch: varianzas desiguales",
       x = "Valor", y = "Densidad") +
  theme_minimal()

g_w_box <- ggplot(datos_w, aes(grupo, valor, fill = grupo)) +
  geom_boxplot(width = 0.5, alpha = 0.45, outlier.alpha = 0.5) +
  stat_summary(fun = mean, geom = "point", shape = 21, size = 3, fill = "white") +
  labs(title = "Dispersión por grupo", x = NULL, y = "Valor") +
  theme_minimal() + theme(legend.position = "none")

stack_plots(g_w_hist, g_w_box)

4 3. IC para proporciones y para p1 − p2

set.seed(2025)

n1p <- 150; n2p <- 200
p1_true <- 0.35; p2_true <- 0.25

x1p <- rbinom(1, n1p, p1_true)
x2p <- rbinom(1, n2p, p2_true)

phat1 <- x1p/n1p
phat2 <- x2p/n2p
alpha <- 0.05
z <- qnorm(1 - alpha/2)

se1 <- sqrt(phat1*(1 - phat1)/n1p)
se2 <- sqrt(phat2*(1 - phat2)/n2p)
ic1 <- phat1 + c(-1, 1)*z*se1
ic2 <- phat2 + c(-1, 1)*z*se2

se_diff <- sqrt(phat1*(1 - phat1)/n1p + phat2*(1 - phat2)/n2p)
diff_hat <- phat1 - phat2
ic_diff <- diff_hat + c(-1, 1)*z*se_diff

4.1 Gráficos

plot_props <- tibble(
  grupo = c("Grupo 1", "Grupo 2"),
  p = c(phat1, phat2),
  li = c(ic1[1], ic2[1]),
  ls = c(ic1[2], ic2[2])
)

g_prop_indiv <- ggplot(plot_props, aes(grupo, p, fill = grupo)) +
  geom_col(width = 0.55, alpha = 0.6, color = NA) +
  geom_errorbar(aes(ymin = li, ymax = ls), width = 0.15, linewidth = 1) +
  scale_y_continuous(labels = percent_format(accuracy = 1), limits = c(0, 1)) +
  labs(title = "Proporciones muestrales con IC 95%", x = NULL, y = "Proporción") +
  theme_minimal() + theme(legend.position = "none")

plot_diff <- tibble(est = diff_hat, li = ic_diff[1], ls = ic_diff[2])

g_prop_diff <- ggplot(plot_diff, aes(x = "p1 − p2", y = est)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = li, ymax = ls), width = 0.06, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  labs(title = "IC 95% para p1 − p2", x = "", y = "Diferencia estimada") +
  theme_minimal()

stack_plots(g_prop_indiv, g_prop_diff)

5 4. IC para una varianza con \(\chi^2\)

n  <- 15; s  <- 2.5; s2 <- s^2; nu <- n - 1; alpha <- 0.05
chi_lo <- qchisq(alpha/2,     df = nu)
chi_hi <- qchisq(1 - alpha/2, df = nu)
LI_var <- (nu * s2) / chi_hi
LS_var <- (nu * s2) / chi_lo
IC_var <- c(LI_var, LS_var)
IC_sd  <- sqrt(IC_var)
IC_var; IC_sd

## [1]  3.350058 15.545258

## [1] 1.830316 3.942748

5.1 Gráfico χ² con área central 1−α

x_max <- qchisq(0.999, df = nu)
xs <- seq(0, x_max, length.out = 1000)
dens <- dchisq(xs, df = nu)
df_all   <- data.frame(x = xs, y = dens)
df_shade <- subset(df_all, x >= chi_lo & x <= chi_hi)

g_chi <- ggplot(df_all, aes(x, y)) +
  geom_line(size = 1) +
  geom_ribbon(data = df_shade, aes(ymin = 0, ymax = y),
              fill = "steelblue", alpha = 0.25) +
  geom_vline(xintercept = chi_lo, linetype = "dashed") +
  geom_vline(xintercept = chi_hi, linetype = "dashed") +
  labs(title = bquote("Distribución " * chi^2 ~ "(df =" ~ .(nu) * ")"),
       subtitle = bquote("Área sombreada = " ~ 1 - alpha ~ " (IC central)"),
       x = "x", y = "densidad") +
  theme_minimal()

g_chi

6 5. IC para el cociente de varianzas \(\sigma_1^2/\sigma_2^2\) con F

set.seed(123)
n1F <- 22; n2F <- 18
sd1F <- 3.0; sd2F <- 2.0

x1F <- rnorm(n1F, 10, sd1F)
x2F <- rnorm(n2F, 10, sd2F)

s1_2 <- var(x1F); s2_2 <- var(x2F); ratio_hat <- s1_2 / s2_2

alpha <- 0.05; v1 <- n1F - 1; v2 <- n2F - 1

F_hi <- qf(1 - alpha/2, df1 = v1, df2 = v2)   # derecha
F_lo <- qf(1 - alpha/2, df1 = v2, df2 = v1)   # izq (invertido)

LI <- ratio_hat / F_hi
LS <- ratio_hat * F_lo
IC_ratio <- c(LI, LS)
IC_sigma <- sqrt(IC_ratio)
IC_ratio

## [1] 1.140739 7.365165

6.1 Gráficos: F (estadístico) y “termómetro” (parámetro)

x_max <- qf(0.995, df1 = v1, df2 = v2)
xs <- seq(0.001, x_max, length.out = 1000)
dens <- df(xs, df1 = v1, df2 = v2)
left_F  <- 1 / qf(1 - alpha/2, df1 = v2, df2 = v1)
right_F <-     qf(1 - alpha/2, df1 = v1, df2 = v2)

g_F <- ggplot(data.frame(x=xs,y=dens), aes(x,y)) +
  geom_line() +
  geom_ribbon(data = subset(data.frame(x=xs,y=dens), x >= left_F & x <= right_F),
              aes(ymin = 0, ymax = y), fill = "steelblue", alpha = .25) +
  geom_vline(xintercept = left_F,  linetype = "dashed") +
  geom_vline(xintercept = right_F, linetype = "dashed") +
  labs(title = sprintf("Distribución F (df1=%d, df2=%d)", v1, v2),
       subtitle = "Área sombreada = 1−α (escala F)",
       x = "F", y = "densidad")

df_ci_ratio <- tibble(parametro = "σ1²/σ2²", est = ratio_hat, li = LI, ls = LS)

g_ratio <- ggplot(df_ci_ratio, aes(x = parametro, y = est)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = li, ymax = ls), width = 0.08, linewidth = 1) +
  geom_hline(yintercept = 1, linetype = "dotted") +
  coord_flip() +
  labs(title = "IC 95% para σ1²/σ2² (escala del parámetro)",
       y = "σ1²/σ2²", x = "")

stack_plots(g_F, g_ratio)

Intervalos de confianza: medias, proporciones y varianzas (con gráficos)

Juan Pablo Tedesca

2025-10-15

1 Paquetes

2 1. IC para la diferencia de medias (varianzas iguales): pooled

2.1 Gráficos

3 2. IC para la diferencia de medias (varianzas desiguales): Welch

3.1 Gráficos

4 3. IC para proporciones y para p1 − p2

4.1 Gráficos

5 4. IC para una varianza con \(\chi^2\)

5.1 Gráfico χ² con área central 1−α

6 5. IC para el cociente de varianzas \(\sigma_1^2/\sigma_2^2\) con F

6.1 Gráficos: F (estadístico) y “termómetro” (parámetro)