# leer datos

D = read.csv("DatosEquipo7_filtrado1.csv")


D$p34_2_bin = ifelse(D$p34_2 %in% c(2,3), "Sí", "No")
D$p34_3_bin = ifelse(D$p34_3 %in% c(2,3), "Sí", "No")


grupo_2 = subset(D, p34_2_bin == "Sí")  # vio leer a sus padres
grupo_3 = subset(D, p34_3_bin == "Sí")  # le leían de niño

# Crear data frame unificado 

decirsi = rbind(
data.frame(grupo = "Sí_p34_2", variable = "p4", valor = grupo_2$p4),
data.frame(grupo = "Sí_p34_3", variable = "p4", valor = grupo_3$p4),
data.frame(grupo = "Sí_p34_2", variable = "p26", valor = grupo_2$p26),
data.frame(grupo = "Sí_p34_3", variable = "p26", valor = grupo_3$p26),
data.frame(grupo = "Sí_p34_2", variable = "p5", valor = grupo_2$p5),
data.frame(grupo = "Sí_p34_3", variable = "p5", valor = grupo_3$p5),
data.frame(grupo = "Sí_p34_2", variable = "p30", valor = grupo_2$p30),
data.frame(grupo = "Sí_p34_3", variable = "p30", valor = grupo_3$p30)
)

# Nivel de confianza

conf.level = 0.96

# Comparacion de varianzas

cat("Comparación de Varianzas (var.test)\n")
## Comparación de Varianzas (var.test)
cat("Variable p4:\n")
## Variable p4:
print(var.test(grupo_2$p4, grupo_3$p4))
## 
##  F test to compare two variances
## 
## data:  grupo_2$p4 and grupo_3$p4
## F = 0.99489, num df = 984, denom df = 1302, p-value = 0.9339
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8853414 1.1190803
## sample estimates:
## ratio of variances 
##           0.994892
cat("Variable p26:\n")
## Variable p26:
print(var.test(grupo_2$p26, grupo_3$p26))
## 
##  F test to compare two variances
## 
## data:  grupo_2$p26 and grupo_3$p26
## F = 0.96698, num df = 984, denom df = 1302, p-value = 0.5763
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8605011 1.0876820
## sample estimates:
## ratio of variances 
##           0.966978
cat("Variable p5:\n")
## Variable p5:
print(var.test(grupo_2$p5, grupo_3$p5))
## 
##  F test to compare two variances
## 
## data:  grupo_2$p5 and grupo_3$p5
## F = 0.98587, num df = 984, denom df = 1302, p-value = 0.814
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8773128 1.1089321
## sample estimates:
## ratio of variances 
##          0.9858699
cat("Variable p30:\n")
## Variable p30:
print(var.test(grupo_2$p30, grupo_3$p30))
## 
##  F test to compare two variances
## 
## data:  grupo_2$p30 and grupo_3$p30
## F = 0.98757, num df = 984, denom df = 1302, p-value = 0.8363
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8788218 1.1108395
## sample estimates:
## ratio of variances 
##          0.9875657
cat("\nInterpretación: Si p > 0.04, no se rechaza H₀ → varianzas poblacionales similares.\n")
## 
## Interpretación: Si p > 0.04, no se rechaza H₀ → varianzas poblacionales similares.
#Graficos de distribución

# p4

boxplot(p4 ~ p34_3_bin, data = D,
main = "Boxplot p4 (Libros leídos)",
xlab = "¿Le leían de niño?", ylab = "Libros leídos", col = c("blue", "green"))

hist(D$p4[D$p34_3_bin == "Sí"], main = "Histograma p4 - Sí", xlab = "Libros leídos", col = "blue")

hist(D$p4[D$p34_3_bin == "No"], main = "Histograma p4 - No", xlab = "Libros leídos", col = "green")

# p26

boxplot(p26 ~ p34_3_bin, data = D,
main = "Boxplot p26 (Minutos lectura)",
xlab = "¿Le leían de niño?", ylab = "Minutos lectura", col = c("blue", "green"))

hist(D$p26[D$p34_3_bin == "Sí"], main = "Histograma p26 - Sí", xlab = "Minutos lectura", col = "blue")

hist(D$p26[D$p34_3_bin == "No"], main = "Histograma p26 - No", xlab = "Minutos lectura", col = "green")

# p5

boxplot(p5 ~ p34_3_bin, data = D,
main = "Boxplot p5 (Tiempo leyendo)",
xlab = "¿Le leían de niño?", ylab = "Tiempo leyendo", col = c("blue", "green"))

hist(D$p5[D$p34_3_bin == "Sí"], main = "Histograma p5 - Sí", xlab = "Tiempo leyendo", col = "blue")

hist(D$p5[D$p34_3_bin == "No"], main = "Histograma p5 - No", xlab = "Tiempo leyendo", col = "green")

# p30

boxplot(p30 ~ p34_3_bin, data = D,
main = "Boxplot p30 (Comprensión lectora)",
xlab = "¿Le leían de niño?", ylab = "Comprensión lectora", col = c("blue", "green"))

hist(D$p30[D$p34_3_bin == "Sí"], main = "Histograma p30 - Sí", xlab = "Comprensión lectora", col = "blue")

hist(D$p30[D$p34_3_bin == "No"], main = "Histograma p30 - No", xlab = "Comprensión lectora", col = "green")

# Graficos de medias

# p4

boxplot(valor ~ grupo, data = subset(decirsi, variable == "p4"),
main = "Libros leídos (p4): Sí_p34_2 vs Sí_p34_3",
xlab = "Grupo", ylab = "Número de libros leídos", col = c("blue", "green"))
points(tapply(subset(decirsi, variable == "p4")$valor, subset(decirsi, variable == "p4")$grupo, mean, na.rm = TRUE),
pch = 19, col = "red")

# p26

boxplot(valor ~ grupo, data = subset(decirsi, variable == "p26"),
main = "Minutos de lectura (p26): Sí_p34_2 vs Sí_p34_3",
xlab = "Grupo", ylab = "Minutos de lectura continua", col = c("blue", "green"))
points(tapply(subset(decirsi, variable == "p26")$valor, subset(decirsi, variable == "p26")$grupo, mean, na.rm = TRUE),
pch = 19, col = "red")

# p5

boxplot(valor ~ grupo, data = subset(decirsi, variable == "p5"),
main = "Tiempo leyendo (p5): Sí_p34_2 vs Sí_p34_3",
xlab = "Grupo", ylab = "Tiempo leyendo", col = c("blue", "green"))
points(tapply(subset(decirsi, variable == "p5")$valor, subset(decirsi, variable == "p5")$grupo, mean, na.rm = TRUE),
pch = 19, col = "red")

# p30

boxplot(valor ~ grupo, data = subset(decirsi, variable == "p30"),
main = "Comprensión lectora (p30): Sí_p34_2 vs Sí_p34_3",
xlab = "Grupo", ylab = "Nivel de comprensión lectora", col = c("blue", "green"))
points(tapply(subset(decirsi, variable == "p30")$valor, subset(decirsi, variable == "p30")$grupo, mean, na.rm = TRUE),
pch = 19, col = "red")

# intervalos de confianza 96%
cat("\nIntervalos de confianza 96% para la diferencia de medias poblacionales:\n")
## 
## Intervalos de confianza 96% para la diferencia de medias poblacionales:
cat("Variable p4:\n")
## Variable p4:
print(t.test(grupo_2$p4, grupo_3$p4, conf.level = conf.level))
## 
##  Welch Two Sample t-test
## 
## data:  grupo_2$p4 and grupo_3$p4
## t = -1.0983, df = 2121.7, p-value = 0.2722
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
##  -0.3534894  0.1072479
## sample estimates:
## mean of x mean of y 
## 0.8477157 0.9708365
cat("Variable p26:\n")
## Variable p26:
print(t.test(grupo_2$p26, grupo_3$p26, conf.level = conf.level))
## 
##  Welch Two Sample t-test
## 
## data:  grupo_2$p26 and grupo_3$p26
## t = -1.7848, df = 2136.9, p-value = 0.07443
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
##  -4.8413480  0.3406526
## sample estimates:
## mean of x mean of y 
##  20.21320  22.46355
cat("Variable p5:\n")
## Variable p5:
print(t.test(grupo_2$p5, grupo_3$p5, conf.level = conf.level))
## 
##  Welch Two Sample t-test
## 
## data:  grupo_2$p5 and grupo_3$p5
## t = -1.1972, df = 2126.6, p-value = 0.2314
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
##  -0.24676161  0.06508637
## sample estimates:
## mean of x mean of y 
##  1.099492  1.190330
cat("Variable p30:\n")
## Variable p30:
print(t.test(grupo_2$p30, grupo_3$p30, conf.level = conf.level))
## 
##  Welch Two Sample t-test
## 
## data:  grupo_2$p30 and grupo_3$p30
## t = -2.9368, df = 2125.7, p-value = 0.003352
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
##  -0.32301200 -0.05706054
## sample estimates:
## mean of x mean of y 
##  1.608122  1.798158