Documento auto-contenido para RStudio. Knit a HTML y luego publicá en RPubs.
suppressPackageStartupMessages({
library(ggplot2)
})
set.seed(123)
alpha <- 0.05
n1 <- 40; n2 <- 35
sigma1 <- 2; sigma2 <- 3
x1 <- rnorm(n1, mean=15, sd=sigma1)
x2 <- rnorm(n2, mean=16, sd=sigma2)
xbar1 <- mean(x1); xbar2 <- mean(x2)
se_z <- sqrt(sigma1^2/n1 + sigma2^2/n2)
z_calc <- ((xbar1 - xbar2) - 0) / se_z
p_bi <- 2*pnorm(-abs(z_calc))
zcrit <- qnorm(1 - alpha/2)
ICz <- (xbar1-xbar2) + c(-1,1)*zcrit*se_z
data.frame(xbar1 = xbar1, xbar2 = xbar2, Zcalc = z_calc, p_bilateral = p_bi,
IC95_low = ICz[1], IC95_high = ICz[2])
## xbar1 xbar2 Zcalc p_bilateral IC95_low IC95_high
## 1 15.09037 16.01447 -1.546328 0.1220253 -2.09541 0.2471945
df_z <- rbind(
data.frame(linea="Línea 1", valor=x1),
data.frame(linea="Línea 2", valor=x2)
)
ggplot(df_z, aes(valor, fill=linea)) +
geom_histogram(aes(y=after_stat(density)), bins=20, alpha=.6, position="identity") +
geom_density(alpha=.2) + labs(x="Minutos", y="Densidad",
title="Varianzas conocidas: histogramas y densidades")
set.seed(1234)
n1 <- 25; n2 <- 27
y1 <- rnorm(n1, 70, 10)
y2 <- rnorm(n2, 75, 10)
s1 <- var(y1); s2 <- var(y2)
sp2 <- ((n1-1)*s1 + (n2-1)*s2)/(n1+n2-2)
se_t <- sqrt(sp2*(1/n1 + 1/n2))
t_calc <- (mean(y1)-mean(y2) - 0) / se_t
gl <- n1+n2-2
p_bi <- 2*pt(-abs(t_calc), df=gl)
tcrit <- qt(1 - alpha/2, df=gl)
ICt <- (mean(y1)-mean(y2)) + c(-1,1)*tcrit*se_t
data.frame(mean1=mean(y1), mean2=mean(y2), t_calc=t_calc, gl=gl,
p_bilateral=p_bi, IC95_low=ICt[1], IC95_high=ICt[2])
## mean1 mean2 t_calc gl p_bilateral IC95_low IC95_high
## 1 67.58218 67.96437 -0.1591746 50 0.8741725 -5.204934 4.440547
df2 <- data.frame(grupo=rep(c("A","B"), c(n1,n2)), puntaje=c(y1,y2))
ggplot(df2, aes(grupo, puntaje)) + geom_boxplot() +
labs(x="", y="Puntaje", title="t pooled: comparación de grupos")
set.seed(2025)
a <- rnorm(22, 50, 6)
b <- rnorm(30, 53, 12) # mayor dispersión
xbar_a <- mean(a); xbar_b <- mean(b)
sa2 <- var(a); sb2 <- var(b)
se_w <- sqrt(sa2/length(a) + sb2/length(b))
t_w <- (xbar_a - xbar_b) / se_w
nu <- (sa2/length(a) + sb2/length(b))^2 /
((sa2/length(a))^2/(length(a)-1) + (sb2/length(b))^2/(length(b)-1))
p_bi <- 2*pt(-abs(t_w), df=nu)
tcrit <- qt(1 - alpha/2, df=nu)
ICw <- (xbar_a - xbar_b) + c(-1,1)*tcrit*se_w
data.frame(t_welch=t_w, gl_aprox=nu, p_bilateral=p_bi,
IC95_low=ICw[1], IC95_high=ICw[2])
## t_welch gl_aprox p_bilateral IC95_low IC95_high
## 1 -0.9671805 40.38955 0.3392098 -7.915135 2.79046
set.seed(4321)
n <- 30
antes <- rnorm(n, 60, 8)
mejora <- rnorm(n, 5, 3)
despues <- antes + mejora
d <- despues - antes
dbar <- mean(d); sd_d <- sd(d)
se_d <- sd_d / sqrt(n)
t_d <- (dbar - 0) / se_d
p_bi <- 2*pt(-abs(t_d), df=n-1)
tcrit <- qt(1 - alpha/2, df=n-1)
ICd <- dbar + c(-1,1)*tcrit*se_d
out <- data.frame(mean_d = dbar, sd_d = sd_d, t_calc = t_d, gl = n-1,
p_bilateral = p_bi, IC95_low = ICd[1], IC95_high = ICd[2])
par(mfrow=c(1,2))
ylim <- range(c(antes, despues))
plot(c(1,2), c(min(ylim), max(ylim)), type="n", xaxt="n", xlab="", ylab="Puntaje",
main="Trayectorias pareadas")
axis(1, at=c(1,2), labels=c("Antes","Después"))
for (i in 1:n) lines(c(1,2), c(antes[i], despues[i]), type="b", pch=16)
hist(d, breaks=10, main="Distribución de diferencias (d)", xlab="d = después - antes")
par(mfrow=c(1,1))
out
## mean_d sd_d t_calc gl p_bilateral IC95_low IC95_high
## 1 6.392462 2.274423 15.39421 29 1.717055e-15 5.543179 7.241746
set.seed(2468)
x <- rnorm(25, mean = 100, sd = 6) # datos de ejemplo
n <- length(x); s2 <- var(x)
sigma0 <- 6; sigma02 <- sigma0^2 # H0: sigma^2 = 6^2
chi_calc <- (n - 1) * s2 / sigma02
df <- n - 1
p_bi <- 2 * min(pchisq(chi_calc, df), 1 - pchisq(chi_calc, df))
LI <- (n - 1) * s2 / qchisq(1 - alpha/2, df)
LS <- (n - 1) * s2 / qchisq(alpha/2, df)
data.frame(s2 = s2, chi_calc = chi_calc, df = df,
p_bilateral = p_bi, IC95_sigma2_low = LI, IC95_sigma2_high = LS,
IC95_sigma_low = sqrt(LI), IC95_sigma_high = sqrt(LS))
## s2 chi_calc df p_bilateral IC95_sigma2_low IC95_sigma2_high
## 1 30.75709 20.50472 24 0.6645163 18.75238 59.52432
## IC95_sigma_low IC95_sigma_high
## 1 4.330402 7.715201
set.seed(321)
g1 <- rnorm(18, 100, 5)
g2 <- rnorm(24, 102, 8)
n1 <- length(g1); n2 <- length(g2)
s1 <- var(g1); s2 <- var(g2)
if (s1 >= s2) {
F_calc <- s1 / s2; df1 <- n1 - 1; df2 <- n2 - 1; orden <- "s1/s2"
} else {
F_calc <- s2 / s1; df1 <- n2 - 1; df2 <- n1 - 1; orden <- "s2/s1"
}
p_right <- 1 - pf(F_calc, df1=df1, df2=df2)
p_left <- pf(F_calc, df1=df1, df2=df2)
p_bi <- 2 * min(p_right, p_left)
# IC del ratio en ORDEN FIJO (s1/s2)
ratio_hat <- s1 / s2
LIr <- ratio_hat / qf(1 - alpha/2, df1 = n1 - 1, df2 = n2 - 1)
LSr <- ratio_hat / qf(alpha/2, df1 = n1 - 1, df2 = n2 - 1)
data.frame(s1=s1, s2=s2, F_calc=F_calc, df1=df1, df2=df2, orden=orden,
p_bilateral=p_bi, IC95_ratio_low=LIr, IC95_ratio_high=LSr)
## s1 s2 F_calc df1 df2 orden p_bilateral IC95_ratio_low
## 1 20.54466 69.6983 3.392526 23 17 s2/s1 0.01245082 0.1220233
## IC95_ratio_high
## 1 0.7581782
df5 <- rbind(data.frame(proc="P1", x=g1), data.frame(proc="P2", x=g2))
ggplot(df5, aes(x, fill=proc)) +
geom_histogram(aes(y=after_stat(density)), bins=20, position="identity", alpha=.6) +
geom_density(alpha=.2) +
labs(x="Medición", y="Densidad", title="Comparación de dispersiones")
x1 <- 120; n1 <- 1500
x2 <- 170; n2 <- 1600
p1 <- x1/n1; p2 <- x2/n2
phat <- (x1+x2)/(n1+n2)
se_p <- sqrt(phat*(1-phat)*(1/n1 + 1/n2))
z_calc <- (p1 - p2) / se_p
p_bi <- 2*pnorm(-abs(z_calc))
ICp <- (p1 - p2) + c(-1,1)*qnorm(1-alpha/2)*sqrt(p1*(1-p1)/n1 + p2*(1-p2)/n2)
data.frame(p1=p1, p2=p2, Zcalc=z_calc, p_bilateral=p_bi,
IC95_low=ICp[1], IC95_high=ICp[2])
## p1 p2 Zcalc p_bilateral IC95_low IC95_high
## 1 0.08 0.10625 -2.508208 0.01213451 -0.04665785 -0.005842148
se1 <- sqrt(p1*(1-p1)/n1); se2 <- sqrt(p2*(1-p2)/n2)
df6 <- data.frame(sitio=c("A","B"), p=c(p1,p2), se=c(se1,se2))
ggplot(df6, aes(sitio, p)) +
geom_col() +
geom_errorbar(aes(ymin=p-1.96*se, ymax=p+1.96*se), width=.15) +
labs(y="Proporción", x="Sitio", title="Proporciones con IC95")