Documento auto-contenido para RStudio. Knit a HTML y luego publicá en RPubs.

1 1) Diferencia de medias con varianzas conocidas (Z)

suppressPackageStartupMessages({
  library(ggplot2)
})

set.seed(123)
alpha <- 0.05

n1 <- 40; n2 <- 35
sigma1 <- 2; sigma2 <- 3
x1 <- rnorm(n1, mean=15, sd=sigma1)
x2 <- rnorm(n2, mean=16, sd=sigma2)

xbar1 <- mean(x1); xbar2 <- mean(x2)
se_z  <- sqrt(sigma1^2/n1 + sigma2^2/n2)
z_calc <- ((xbar1 - xbar2) - 0) / se_z
p_bi   <- 2*pnorm(-abs(z_calc))
zcrit  <- qnorm(1 - alpha/2)
ICz    <- (xbar1-xbar2) + c(-1,1)*zcrit*se_z

data.frame(xbar1 = xbar1, xbar2 = xbar2, Zcalc = z_calc, p_bilateral = p_bi,
           IC95_low = ICz[1], IC95_high = ICz[2])

##      xbar1    xbar2     Zcalc p_bilateral IC95_low IC95_high
## 1 15.09037 16.01447 -1.546328   0.1220253 -2.09541 0.2471945

df_z <- rbind(
  data.frame(linea="Línea 1", valor=x1),
  data.frame(linea="Línea 2", valor=x2)
)
ggplot(df_z, aes(valor, fill=linea)) +
  geom_histogram(aes(y=after_stat(density)), bins=20, alpha=.6, position="identity") +
  geom_density(alpha=.2) + labs(x="Minutos", y="Densidad",
  title="Varianzas conocidas: histogramas y densidades")

2 2) Dos medias: varianzas desconocidas pero iguales (t pooled)

set.seed(1234)
n1 <- 25; n2 <- 27
y1 <- rnorm(n1, 70, 10)
y2 <- rnorm(n2, 75, 10)
s1 <- var(y1); s2 <- var(y2)
sp2 <- ((n1-1)*s1 + (n2-1)*s2)/(n1+n2-2)
se_t <- sqrt(sp2*(1/n1 + 1/n2))
t_calc <- (mean(y1)-mean(y2) - 0) / se_t
gl <- n1+n2-2
p_bi <- 2*pt(-abs(t_calc), df=gl)
tcrit <- qt(1 - alpha/2, df=gl)
ICt   <- (mean(y1)-mean(y2)) + c(-1,1)*tcrit*se_t

data.frame(mean1=mean(y1), mean2=mean(y2), t_calc=t_calc, gl=gl,
           p_bilateral=p_bi, IC95_low=ICt[1], IC95_high=ICt[2])

##      mean1    mean2     t_calc gl p_bilateral  IC95_low IC95_high
## 1 67.58218 67.96437 -0.1591746 50   0.8741725 -5.204934  4.440547

df2 <- data.frame(grupo=rep(c("A","B"), c(n1,n2)), puntaje=c(y1,y2))
ggplot(df2, aes(grupo, puntaje)) + geom_boxplot() +
  labs(x="", y="Puntaje", title="t pooled: comparación de grupos")

3 3) Dos medias: varianzas desiguales (t de Welch)

set.seed(2025)
a <- rnorm(22, 50, 6)
b <- rnorm(30, 53, 12)  # mayor dispersión
xbar_a <- mean(a); xbar_b <- mean(b)
sa2 <- var(a); sb2 <- var(b)
se_w <- sqrt(sa2/length(a) + sb2/length(b))
t_w  <- (xbar_a - xbar_b) / se_w
nu <- (sa2/length(a) + sb2/length(b))^2 /
      ((sa2/length(a))^2/(length(a)-1) + (sb2/length(b))^2/(length(b)-1))
p_bi <- 2*pt(-abs(t_w), df=nu)
tcrit <- qt(1 - alpha/2, df=nu)
ICw   <- (xbar_a - xbar_b) + c(-1,1)*tcrit*se_w

data.frame(t_welch=t_w, gl_aprox=nu, p_bilateral=p_bi,
           IC95_low=ICw[1], IC95_high=ICw[2])

##      t_welch gl_aprox p_bilateral  IC95_low IC95_high
## 1 -0.9671805 40.38955   0.3392098 -7.915135   2.79046

4 4) Pareadas (antes–después): t sobre diferencias

set.seed(4321)
n <- 30
antes  <- rnorm(n, 60, 8)
mejora <- rnorm(n, 5, 3)
despues <- antes + mejora
d <- despues - antes
dbar <- mean(d); sd_d <- sd(d)
se_d <- sd_d / sqrt(n)
t_d  <- (dbar - 0) / se_d
p_bi <- 2*pt(-abs(t_d), df=n-1)
tcrit <- qt(1 - alpha/2, df=n-1)
ICd   <- dbar + c(-1,1)*tcrit*se_d

out <- data.frame(mean_d = dbar, sd_d = sd_d, t_calc = t_d, gl = n-1,
                  p_bilateral = p_bi, IC95_low = ICd[1], IC95_high = ICd[2])

par(mfrow=c(1,2))
ylim <- range(c(antes, despues))
plot(c(1,2), c(min(ylim), max(ylim)), type="n", xaxt="n", xlab="", ylab="Puntaje",
     main="Trayectorias pareadas")
axis(1, at=c(1,2), labels=c("Antes","Después"))
for (i in 1:n) lines(c(1,2), c(antes[i], despues[i]), type="b", pch=16)
hist(d, breaks=10, main="Distribución de diferencias (d)", xlab="d = después - antes")

par(mfrow=c(1,1))

out

##     mean_d     sd_d   t_calc gl  p_bilateral IC95_low IC95_high
## 1 6.392462 2.274423 15.39421 29 1.717055e-15 5.543179  7.241746

5 5) Test de una varianza (χ²) y su IC

set.seed(2468)
x <- rnorm(25, mean = 100, sd = 6)     # datos de ejemplo
n  <- length(x); s2 <- var(x)
sigma0 <- 6; sigma02 <- sigma0^2       # H0: sigma^2 = 6^2

chi_calc <- (n - 1) * s2 / sigma02
df <- n - 1
p_bi <- 2 * min(pchisq(chi_calc, df), 1 - pchisq(chi_calc, df))

LI <- (n - 1) * s2 / qchisq(1 - alpha/2, df)
LS <- (n - 1) * s2 / qchisq(alpha/2,     df)

data.frame(s2 = s2, chi_calc = chi_calc, df = df,
           p_bilateral = p_bi, IC95_sigma2_low = LI, IC95_sigma2_high = LS,
           IC95_sigma_low = sqrt(LI), IC95_sigma_high = sqrt(LS))

##         s2 chi_calc df p_bilateral IC95_sigma2_low IC95_sigma2_high
## 1 30.75709 20.50472 24   0.6645163        18.75238         59.52432
##   IC95_sigma_low IC95_sigma_high
## 1       4.330402        7.715201

6 6) Igualdad de varianzas (F) y IC del cociente

set.seed(321)
g1 <- rnorm(18, 100, 5)
g2 <- rnorm(24, 102, 8)
n1 <- length(g1); n2 <- length(g2)
s1 <- var(g1);    s2 <- var(g2)

if (s1 >= s2) {
  F_calc <- s1 / s2; df1 <- n1 - 1; df2 <- n2 - 1; orden <- "s1/s2"
} else {
  F_calc <- s2 / s1; df1 <- n2 - 1; df2 <- n1 - 1; orden <- "s2/s1"
}
p_right <- 1 - pf(F_calc, df1=df1, df2=df2)
p_left  <- pf(F_calc,  df1=df1, df2=df2)
p_bi    <- 2 * min(p_right, p_left)

# IC del ratio en ORDEN FIJO (s1/s2)
ratio_hat <- s1 / s2
LIr <- ratio_hat / qf(1 - alpha/2, df1 = n1 - 1, df2 = n2 - 1)
LSr <- ratio_hat / qf(alpha/2,     df1 = n1 - 1, df2 = n2 - 1)

data.frame(s1=s1, s2=s2, F_calc=F_calc, df1=df1, df2=df2, orden=orden,
           p_bilateral=p_bi, IC95_ratio_low=LIr, IC95_ratio_high=LSr)

##         s1      s2   F_calc df1 df2 orden p_bilateral IC95_ratio_low
## 1 20.54466 69.6983 3.392526  23  17 s2/s1  0.01245082      0.1220233
##   IC95_ratio_high
## 1       0.7581782

df5 <- rbind(data.frame(proc="P1", x=g1), data.frame(proc="P2", x=g2))
ggplot(df5, aes(x, fill=proc)) +
  geom_histogram(aes(y=after_stat(density)), bins=20, position="identity", alpha=.6) +
  geom_density(alpha=.2) +
  labs(x="Medición", y="Densidad", title="Comparación de dispersiones")

7 7) Diferencia de proporciones (Z) + IC

x1 <- 120; n1 <- 1500
x2 <- 170; n2 <- 1600
p1 <- x1/n1; p2 <- x2/n2
phat <- (x1+x2)/(n1+n2)
se_p <- sqrt(phat*(1-phat)*(1/n1 + 1/n2))
z_calc <- (p1 - p2) / se_p
p_bi   <- 2*pnorm(-abs(z_calc))
ICp    <- (p1 - p2) + c(-1,1)*qnorm(1-alpha/2)*sqrt(p1*(1-p1)/n1 + p2*(1-p2)/n2)

data.frame(p1=p1, p2=p2, Zcalc=z_calc, p_bilateral=p_bi,
           IC95_low=ICp[1], IC95_high=ICp[2])

##     p1      p2     Zcalc p_bilateral    IC95_low    IC95_high
## 1 0.08 0.10625 -2.508208  0.01213451 -0.04665785 -0.005842148

se1 <- sqrt(p1*(1-p1)/n1); se2 <- sqrt(p2*(1-p2)/n2)
df6 <- data.frame(sitio=c("A","B"), p=c(p1,p2), se=c(se1,se2))
ggplot(df6, aes(sitio, p)) +
  geom_col() +
  geom_errorbar(aes(ymin=p-1.96*se, ymax=p+1.96*se), width=.15) +
  labs(y="Proporción", x="Sitio", title="Proporciones con IC95")

Pruebas de hipótesis integradas: dos medias, varianzas y proporciones

Material para RPubs

2025-10-22

1 1) Diferencia de medias con varianzas conocidas (Z)

2 2) Dos medias: varianzas desconocidas pero iguales (t pooled)

3 3) Dos medias: varianzas desiguales (t de Welch)

4 4) Pareadas (antes–después): t sobre diferencias

5 5) Test de una varianza (χ²) y su IC

6 6) Igualdad de varianzas (F) y IC del cociente

7 7) Diferencia de proporciones (Z) + IC