library(BSDA)
## Warning: package 'BSDA' was built under R version 4.5.1
## Cargando paquete requerido: lattice
##
## Adjuntando el paquete: 'BSDA'
## The following object is masked from 'package:datasets':
##
## Orange
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.1
# leer los datos
D = read.csv("DatosEquipo7_filtrado.csv")
D$p34_2_bin = ifelse(D$p34_2 %in% c(1), "Sí", "No")
D$p34_3_bin = ifelse(D$p34_3 %in% c(1), "Sí", "No")
va1 = subset(D, p34_2_bin == "Sí")
va2 = subset(D, p34_3_bin == "Sí")
decirsi = rbind(data.frame(grupo = "Sí_p34_2", p4 = va1$p4, p26 = va1$p26), data.frame(grupo = "Sí_p34_3", p4 = va2$p4, p26 = va2$p26))
# grafica de caja para minutos de lectura
boxplot(p26 ~ grupo, data = decirsi, main = "Minutos de lectura (p26): Sí_p34_2 vs Sí_p34_3", xlab = " ", ylab = "Minutos de lectura continua", col = c("blue", "green"))
# grafica de caja para Libros leidos por año
boxplot(p4 ~ grupo, data = decirsi, main = "Libros leídos (p4): Sí_p34_2 vs Sí_p34_3", xlab = " ", ylab = "Número de libros leídos", col = c("blue", "green"))
boxplot(p26 ~ grupo, data = decirsi, main = "Comparación p26 con medias", xlab = " ", ylab = "Minutos de lectura", col = c("blue", "green"))
points(tapply(decirsi$p26, decirsi$grupo, mean, na.rm = TRUE),pch = 19, col = "red") # puntos rojos = medias
boxplot(p4 ~ grupo, data = decirsi, main = "Comparación p4 con medias", xlab = " ", ylab = "Libros leídos por año", col = c("blue", "green"))
points(tapply(decirsi$p4, decirsi$grupo, mean, na.rm = TRUE), pch = 19, col = "red") # puntos rojos = medias
# Para p4 (libros leídos al año)
ic_p4 = z.test(va1$p4, va2$p4, sigma.x=sd(va1$p4), sigma.y = sd(va2$p4), conf.level = 0.96)
ic_p4
##
## Two-sample z-Test
##
## data: va1$p4 and va2$p4
## z = -0.96899, p-value = 0.3326
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
## -0.6121911 0.2196949
## sample estimates:
## mean of x mean of y
## 1.763211 1.959459
# Para p26 (minutos continuos de lectura)
ic_p26 = z.test(va1$p26, va2$p26, sigma.x=sd(va1$p26), sigma.y = sd(va2$p26), conf.level = 0.96)
ic_p26
##
## Two-sample z-Test
##
## data: va1$p26 and va2$p26
## z = -1.4107, p-value = 0.1583
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
## -6.097674 1.131732
## sample estimates:
## mean of x mean of y
## 34.63415 37.11712
cat("Intervalo de confianza 96% para la diferencia de medias: \n")
## Intervalo de confianza 96% para la diferencia de medias:
cat("\nVariable p4 (libros leídos al año): \n")
##
## Variable p4 (libros leídos al año):
cat("Diferencia de medias =",round(ic_p4$estimate[1] - ic_p4$estimate[2], 2), "\nIC 96% = [",round(ic_p4$conf.int[1], 2), ", ",round(ic_p4$conf.int[2], 2), "]\n")
## Diferencia de medias = -0.2
## IC 96% = [ -0.61 , 0.22 ]
cat("\nVariable p26 (minutos continuos de lectura): \n")
##
## Variable p26 (minutos continuos de lectura):
cat("Diferencia de medias =", round(ic_p26$estimate[1] - ic_p26$estimate[2], 2), "\nIC 96% = [", round(ic_p26$conf.int[1], 2), ", ", round(ic_p26$conf.int[2], 2), "]\n")
## Diferencia de medias = -2.48
## IC 96% = [ -6.1 , 1.13 ]
LI_p26 <- ic_p26$conf.int[1]
LS_p26 <- ic_p26$conf.int[2]
plot(0, ylim = c(0, 2), xlim = c(LI_p26 - 1, LS_p26 + 1), yaxt = "n", ylab = "",
main = "Intervalo de confianza para la diferencia de medias p26")
axis(2, at = 1, labels = "Intervalo")
arrows(LI_p26, 1, LS_p26, 1, angle = 90, code = 3, length = 0.1, lwd = 2, col = "blue")
abline(v = ic_p26$estimate[1] - ic_p26$estimate[2], col = "red", lwd = 2)
LI_p4 <- ic_p4$conf.int[1]
LS_p4 <- ic_p4$conf.int[2]
plot(0, ylim = c(0, 2), xlim = c(LI_p4 - 1, LS_p4 + 1), yaxt = "n", ylab = "",
main = "Intervalo de confianza para la diferencia de medias p4")
axis(2, at = 1, labels = "Intervalo")
arrows(LI_p4, 1, LS_p4, 1, angle = 90, code = 3, length = 0.1, lwd = 2, col = "blue")
abline(v = ic_p4$estimate[1] - ic_p4$estimate[2], col = "red", lwd = 2)
shapiro.test(va1$p4)
##
## Shapiro-Wilk normality test
##
## data: va1$p4
## W = 0.4352, p-value < 2.2e-16
shapiro.test(va2$p26)
##
## Shapiro-Wilk normality test
##
## data: va2$p26
## W = 0.80049, p-value < 2.2e-16
Se rechaza la hipótesis nula de que la las variables tienen distribución normal.
var.test(va1$p4,va1$p26)
##
## F test to compare two variances
##
## data: va1$p4 and va1$p26
## F = 0.012163, num df = 983, denom df = 983, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.0107331 0.0137843
## sample estimates:
## ratio of variances
## 0.0121634
var.test(va2$p4,va2$p26)
##
## F test to compare two variances
##
## data: va2$p4 and va2$p26
## F = 0.013916, num df = 665, denom df = 665, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.01195228 0.01620304
## sample estimates:
## ratio of variances
## 0.01391629
var.test(D$p4,D$p26)
##
## F test to compare two variances
##
## data: D$p4 and D$p26
## F = 0.0099502, num df = 2015, denom df = 2015, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.009117967 0.010858488
## sample estimates:
## ratio of variances
## 0.009950243
var.test(D$p4,D$p26)
##
## F test to compare two variances
##
## data: D$p4 and D$p26
## F = 0.0099502, num df = 2015, denom df = 2015, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.009117967 0.010858488
## sample estimates:
## ratio of variances
## 0.009950243
tabla <- table(D$p34_2, D$p5)
chi <- chisq.test(tabla, correct = FALSE)
## Warning in chisq.test(tabla, correct = FALSE): Chi-squared approximation may be
## incorrect
# Ver frecuencias esperadas
chi$expected
##
## 0 1 2 3 4 5 6
## 0 28.13938 2.0282738 2.0049603 4.802579 7.506944 2.238095 0.2797619
## 1 589.13095 42.4642857 41.9761905 100.547619 157.166667 46.857143 5.8571429
## 2 575.95933 41.5148810 41.0376984 98.299603 153.652778 45.809524 5.7261905
## 3 13.77034 0.9925595 0.9811508 2.350198 3.673611 1.095238 0.1369048
# Verificar condición
if (all(chi$expected >= 5)) {
print("Usar chi-cuadrado")
} else {
print("Usar Fisher")
}
## [1] "Usar Fisher"
D$p34_2_fusion <- factor(D$p34_2_bin)
D$p34_3_fusion <- factor(D$p34_3_bin)
D$p5_fusion <- as.character(D$p5)
D$p5_fusion[D$p5 %in% c(1, 2)] <- "Trabajo"
D$p5_fusion[D$p5 %in% c(3, 4)] <- "Entretenimiento"
D$p5_fusion[D$p5 %in% c(0, 6)] <- "Otro"
D$p5_fusion[D$p5 == 5] <- "Religión"
D$p5_fusion <- factor(D$p5_fusion)
tabla_p34_2 <- table(D$p34_2_fusion, D$p5_fusion)
chi_p34_2 <- chisq.test(tabla_p34_2, correct = FALSE)
# Ver frecuencias esperadas
chi_p34_2$expected
##
## Entretenimiento Otro Religión Trabajo
## No 270.2857 624.0119 49.14286 88.55952
## Sí 257.7143 594.9881 46.85714 84.44048
# Verificar condición
if (all(chi_p34_2$expected >= 5)) {
print("Usar chi-cuadrado")
} else {
print("Usar Fisher")
}
## [1] "Usar chi-cuadrado"
tabla_p34_3 <- table(D$p34_3_fusion, D$p5_fusion)
chi_p34_3 <- chisq.test(tabla_p34_3, correct = FALSE)
# Ver frecuencias esperadas
chi_p34_3$expected
##
## Entretenimiento Otro Religión Trabajo
## No 353.5714 816.2946 64.28571 115.84821
## Sí 174.4286 402.7054 31.71429 57.15179
# Verificar condición
if (all(chi_p34_3$expected >= 5)) {
print("Usar chi-cuadrado")
} else {
print("Usar Fisher")
}
## [1] "Usar chi-cuadrado"
Fusionando las categorías podemos usar chi-cuadrado.
chi_p34_3
##
## Pearson's Chi-squared test
##
## data: tabla_p34_3
## X-squared = 131.24, df = 3, p-value < 2.2e-16
chi_p34_2
##
## Pearson's Chi-squared test
##
## data: tabla_p34_2
## X-squared = 140.63, df = 3, p-value < 2.2e-16
# Crear la tabla de contingencia
empleo <- matrix(c(273, 27,
261, 39,
356, 44),
nrow = 3,
byrow = TRUE,
dimnames = list(
Vuelo = c("Delta", "United", "US Airways"),
Variable = c("Demorado", "Tiempo")
))
# Prueba de independencia
resultado <- chisq.test(empleo)
# Mostrar el estadístico redondeado
round(resultado$statistic, 2)
## X-squared
## 2.45
# Valor p
resultado$p.value
## [1] 0.2935402
resultado
##
## Pearson's Chi-squared test
##
## data: empleo
## X-squared = 2.4515, df = 2, p-value = 0.2935
valor_critico <- qchisq(p = 0.99, df = 2)
round(valor_critico, 2)
## [1] 9.21
z_ci <- function(x, conf.level = 0.95) {
n <- length(x)
mean_x <- mean(x, na.rm = TRUE)
sd_x <- sd(x, na.rm = TRUE)
alpha <- 1 - conf.level
z <- qnorm(1 - alpha/2)
error <- z * sd_x / sqrt(n)
ci <- c(mean_x - error, mean_x + error)
return(list(mean = mean_x, ci = ci))
}
# Nivel de confianza
conf.level <- 0.96
# Subgrupos
grupo_2 <- subset(D, p34_2_bin == "Sí")
grupo_3 <- subset(D, p34_3_bin == "Sí")
# Intervalos de confianza individuales
ic_p4_g2 <- z_ci(grupo_2$p4, conf.level)
ic_p4_g3 <- z_ci(grupo_3$p4, conf.level)
ic_p26_g2 <- z_ci(grupo_2$p26, conf.level)
ic_p26_g3 <- z_ci(grupo_3$p26, conf.level)
z_diff_ci <- function(x1, x2, conf.level = 0.95) {
n1 <- length(x1)
n2 <- length(x2)
m1 <- mean(x1, na.rm = TRUE)
m2 <- mean(x2, na.rm = TRUE)
sd1 <- sd(x1, na.rm = TRUE)
sd2 <- sd(x2, na.rm = TRUE)
alpha <- 1 - conf.level
z <- qnorm(1 - alpha/2)
se_diff <- sqrt((sd1^2 / n1) + (sd2^2 / n2))
diff <- m1 - m2
error <- z * se_diff
ci <- c(diff - error, diff + error)
return(list(diff = diff, ci = ci))
}
# Diferencias de medias
ic_diff_p4 <- z_diff_ci(grupo_2$p4, grupo_3$p4, conf.level)
ic_diff_p26 <- z_diff_ci(grupo_2$p26, grupo_3$p26, conf.level)
cat("Media de p4 en grupo p34_2 = Sí:\n")
## Media de p4 en grupo p34_2 = Sí:
cat("Media =", ic_p4_g2$mean, "\n Intervalo de confianza 96% = [", ic_p4_g2$ci[1], ",", ic_p4_g2$ci[2], "]\n")
## Media = 1.763211
## Intervalo de confianza 96% = [ 1.51578 , 2.010642 ]
cat("Media de p4 en grupo p34_3 = Sí:\n")
## Media de p4 en grupo p34_3 = Sí:
cat("Media =", ic_p4_g3$mean, "\n Intervalo de confianza 96% = [", ic_p4_g3$ci[1], ",", ic_p4_g3$ci[2], "]\n")
## Media = 1.959459
## Intervalo de confianza 96% = [ 1.625115 , 2.293804 ]
cat("Media de p26 en grupo p34_2 = Sí:\n")
## Media de p26 en grupo p34_2 = Sí:
cat("Media =", ic_p26_g2$mean, "\n Intervalo de confianza 96% = [", ic_p26_g2$ci[1], ",", ic_p26_g2$ci[2], "]\n")
## Media = 34.63415
## Intervalo de confianza 96% = [ 32.39064 , 36.87765 ]
cat("Media de p26 en grupo p34_3 = Sí:\n")
## Media de p26 en grupo p34_3 = Sí:
cat("Media =", ic_p26_g3$mean, "\n Intervalo de confianza 96% = [", ic_p26_g3$ci[1], ",", ic_p26_g3$ci[2], "]\n")
## Media = 37.11712
## Intervalo de confianza 96% = [ 34.2829 , 39.95133 ]
cat("Diferencia de medias de p4 entre grupos:\n")
## Diferencia de medias de p4 entre grupos:
cat("Diferencia =", ic_diff_p4$diff, "\n Intervalo de confianza 96% = [", ic_diff_p4$ci[1], ",", ic_diff_p4$ci[2], "]\n")
## Diferencia = -0.1962481
## Intervalo de confianza 96% = [ -0.6121911 , 0.2196949 ]
cat("Diferencia de medias de p26 entre grupos:\n")
## Diferencia de medias de p26 entre grupos:
cat("Diferencia =", ic_diff_p26$diff, "\n Intervalo de confianza 96% = [", ic_diff_p26$ci[1], ",", ic_diff_p26$ci[2], "]\n")
## Diferencia = -2.482971
## Intervalo de confianza 96% = [ -6.097674 , 1.131732 ]
# Crear dataframe con los resultados
df_ic <- data.frame(
Variable = c("p4 (p34_2)", "p4 (p34_3)", "p26 (p34_2)", "p26 (p34_3)",
"Diferencia p4", "Diferencia p26"),
Media = c(ic_p4_g2$mean, ic_p4_g3$mean, ic_p26_g2$mean, ic_p26_g3$mean,
ic_diff_p4$diff, ic_diff_p26$diff),
IC_inf = c(ic_p4_g2$ci[1], ic_p4_g3$ci[1], ic_p26_g2$ci[1], ic_p26_g3$ci[1],
ic_diff_p4$ci[1], ic_diff_p26$ci[1]),
IC_sup = c(ic_p4_g2$ci[2], ic_p4_g3$ci[2], ic_p26_g2$ci[2], ic_p26_g3$ci[2],
ic_diff_p4$ci[2], ic_diff_p26$ci[2])
)
ggplot(df_ic, aes(y = Variable, x = Media)) +
geom_point(size = 3, color = "darkblue") +
geom_errorbarh(aes(xmin = IC_inf, xmax = IC_sup), height = 0.2, color = "steelblue") +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
labs(title = "Intervalos de confianza al 96%",
x = "Media estimada (con IC)",
y = "") +
theme_minimal(base_size = 14)
## Warning: `geom_errobarh()` was deprecated in ggplot2 4.0.0.
## ℹ Please use the `orientation` argument of `geom_errorbar()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `height` was translated to `width`.