# leer datos
D = read.csv("DatosEquipo7_filtrado1.csv")
D$p34_2_bin = ifelse(D$p34_2 %in% c(2,3), "Sí", "No")
D$p34_3_bin = ifelse(D$p34_3 %in% c(2,3), "Sí", "No")
grupo_2 = subset(D, p34_2_bin == "Sí") # vio leer a sus padres
grupo_3 = subset(D, p34_3_bin == "Sí") # le leían de niño
# Crear data frame unificado
decirsi = rbind(
data.frame(grupo = "Sí_p34_2", variable = "p4", valor = grupo_2$p4),
data.frame(grupo = "Sí_p34_3", variable = "p4", valor = grupo_3$p4),
data.frame(grupo = "Sí_p34_2", variable = "p26", valor = grupo_2$p26),
data.frame(grupo = "Sí_p34_3", variable = "p26", valor = grupo_3$p26),
data.frame(grupo = "Sí_p34_2", variable = "p5", valor = grupo_2$p5),
data.frame(grupo = "Sí_p34_3", variable = "p5", valor = grupo_3$p5),
data.frame(grupo = "Sí_p34_2", variable = "p30", valor = grupo_2$p30),
data.frame(grupo = "Sí_p34_3", variable = "p30", valor = grupo_3$p30)
)
# Nivel de confianza
conf.level = 0.96
# Comparacion de varianzas
cat("Comparación de Varianzas (var.test)\n")
## Comparación de Varianzas (var.test)
cat("Variable p4:\n")
## Variable p4:
print(var.test(grupo_2$p4, grupo_3$p4))
##
## F test to compare two variances
##
## data: grupo_2$p4 and grupo_3$p4
## F = 0.99489, num df = 984, denom df = 1302, p-value = 0.9339
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8853414 1.1190803
## sample estimates:
## ratio of variances
## 0.994892
cat("Variable p26:\n")
## Variable p26:
print(var.test(grupo_2$p26, grupo_3$p26))
##
## F test to compare two variances
##
## data: grupo_2$p26 and grupo_3$p26
## F = 0.96698, num df = 984, denom df = 1302, p-value = 0.5763
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8605011 1.0876820
## sample estimates:
## ratio of variances
## 0.966978
cat("Variable p5:\n")
## Variable p5:
print(var.test(grupo_2$p5, grupo_3$p5))
##
## F test to compare two variances
##
## data: grupo_2$p5 and grupo_3$p5
## F = 0.98587, num df = 984, denom df = 1302, p-value = 0.814
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8773128 1.1089321
## sample estimates:
## ratio of variances
## 0.9858699
cat("Variable p30:\n")
## Variable p30:
print(var.test(grupo_2$p30, grupo_3$p30))
##
## F test to compare two variances
##
## data: grupo_2$p30 and grupo_3$p30
## F = 0.98757, num df = 984, denom df = 1302, p-value = 0.8363
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8788218 1.1108395
## sample estimates:
## ratio of variances
## 0.9875657
cat("\nInterpretación: Si p > 0.04, no se rechaza H₀ → varianzas poblacionales similares.\n")
##
## Interpretación: Si p > 0.04, no se rechaza H₀ → varianzas poblacionales similares.
#Graficos de distribución
# p4
boxplot(p4 ~ p34_3_bin, data = D,
main = "Boxplot p4 (Libros leídos)",
xlab = "¿Le leían de niño?", ylab = "Libros leídos", col = c("blue", "green"))

hist(D$p4[D$p34_3_bin == "Sí"], main = "Histograma p4 - Sí", xlab = "Libros leídos", col = "blue")

hist(D$p4[D$p34_3_bin == "No"], main = "Histograma p4 - No", xlab = "Libros leídos", col = "green")

# p26
boxplot(p26 ~ p34_3_bin, data = D,
main = "Boxplot p26 (Minutos lectura)",
xlab = "¿Le leían de niño?", ylab = "Minutos lectura", col = c("blue", "green"))

hist(D$p26[D$p34_3_bin == "Sí"], main = "Histograma p26 - Sí", xlab = "Minutos lectura", col = "blue")

hist(D$p26[D$p34_3_bin == "No"], main = "Histograma p26 - No", xlab = "Minutos lectura", col = "green")

# p5
boxplot(p5 ~ p34_3_bin, data = D,
main = "Boxplot p5 (Tiempo leyendo)",
xlab = "¿Le leían de niño?", ylab = "Tiempo leyendo", col = c("blue", "green"))

hist(D$p5[D$p34_3_bin == "Sí"], main = "Histograma p5 - Sí", xlab = "Tiempo leyendo", col = "blue")

hist(D$p5[D$p34_3_bin == "No"], main = "Histograma p5 - No", xlab = "Tiempo leyendo", col = "green")

# p30
boxplot(p30 ~ p34_3_bin, data = D,
main = "Boxplot p30 (Comprensión lectora)",
xlab = "¿Le leían de niño?", ylab = "Comprensión lectora", col = c("blue", "green"))

hist(D$p30[D$p34_3_bin == "Sí"], main = "Histograma p30 - Sí", xlab = "Comprensión lectora", col = "blue")

hist(D$p30[D$p34_3_bin == "No"], main = "Histograma p30 - No", xlab = "Comprensión lectora", col = "green")

# Graficos de medias
# p4
boxplot(valor ~ grupo, data = subset(decirsi, variable == "p4"),
main = "Libros leídos (p4): Sí_p34_2 vs Sí_p34_3",
xlab = "Grupo", ylab = "Número de libros leídos", col = c("blue", "green"))
points(tapply(subset(decirsi, variable == "p4")$valor, subset(decirsi, variable == "p4")$grupo, mean, na.rm = TRUE),
pch = 19, col = "red")

# p26
boxplot(valor ~ grupo, data = subset(decirsi, variable == "p26"),
main = "Minutos de lectura (p26): Sí_p34_2 vs Sí_p34_3",
xlab = "Grupo", ylab = "Minutos de lectura continua", col = c("blue", "green"))
points(tapply(subset(decirsi, variable == "p26")$valor, subset(decirsi, variable == "p26")$grupo, mean, na.rm = TRUE),
pch = 19, col = "red")

# p5
boxplot(valor ~ grupo, data = subset(decirsi, variable == "p5"),
main = "Tiempo leyendo (p5): Sí_p34_2 vs Sí_p34_3",
xlab = "Grupo", ylab = "Tiempo leyendo", col = c("blue", "green"))
points(tapply(subset(decirsi, variable == "p5")$valor, subset(decirsi, variable == "p5")$grupo, mean, na.rm = TRUE),
pch = 19, col = "red")

# p30
boxplot(valor ~ grupo, data = subset(decirsi, variable == "p30"),
main = "Comprensión lectora (p30): Sí_p34_2 vs Sí_p34_3",
xlab = "Grupo", ylab = "Nivel de comprensión lectora", col = c("blue", "green"))
points(tapply(subset(decirsi, variable == "p30")$valor, subset(decirsi, variable == "p30")$grupo, mean, na.rm = TRUE),
pch = 19, col = "red")

# intervalos de confianza 96%
cat("\nIntervalos de confianza 96% para la diferencia de medias poblacionales:\n")
##
## Intervalos de confianza 96% para la diferencia de medias poblacionales:
cat("Variable p4:\n")
## Variable p4:
print(t.test(grupo_2$p4, grupo_3$p4, conf.level = conf.level))
##
## Welch Two Sample t-test
##
## data: grupo_2$p4 and grupo_3$p4
## t = -1.0983, df = 2121.7, p-value = 0.2722
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
## -0.3534894 0.1072479
## sample estimates:
## mean of x mean of y
## 0.8477157 0.9708365
cat("Variable p26:\n")
## Variable p26:
print(t.test(grupo_2$p26, grupo_3$p26, conf.level = conf.level))
##
## Welch Two Sample t-test
##
## data: grupo_2$p26 and grupo_3$p26
## t = -1.7848, df = 2136.9, p-value = 0.07443
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
## -4.8413480 0.3406526
## sample estimates:
## mean of x mean of y
## 20.21320 22.46355
cat("Variable p5:\n")
## Variable p5:
print(t.test(grupo_2$p5, grupo_3$p5, conf.level = conf.level))
##
## Welch Two Sample t-test
##
## data: grupo_2$p5 and grupo_3$p5
## t = -1.1972, df = 2126.6, p-value = 0.2314
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
## -0.24676161 0.06508637
## sample estimates:
## mean of x mean of y
## 1.099492 1.190330
cat("Variable p30:\n")
## Variable p30:
print(t.test(grupo_2$p30, grupo_3$p30, conf.level = conf.level))
##
## Welch Two Sample t-test
##
## data: grupo_2$p30 and grupo_3$p30
## t = -2.9368, df = 2125.7, p-value = 0.003352
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
## -0.32301200 -0.05706054
## sample estimates:
## mean of x mean of y
## 1.608122 1.798158