# leer datos

D = read.csv("DatosEquipo7_filtrado1.csv")

D$p34_2_bin = ifelse(D$p34_2 %in% c(2,3), "Sí", "No") # ver leer a padres
D$p34_3_bin = ifelse(D$p34_3 %in% c(2,3), "Sí", "No") # le leían de niño

grupo_2 = subset(D, p34_2_bin == "Sí")
grupo_3 = subset(D, p34_3_bin == "Sí")

decirsi = rbind(
data.frame(grupo = "Sí_p34_2", variable = "p4", valor = grupo_2$p4),
data.frame(grupo = "Sí_p34_3", variable = "p4", valor = grupo_3$p4),
data.frame(grupo = "Sí_p34_2", variable = "p26", valor = grupo_2$p26),
data.frame(grupo = "Sí_p34_3", variable = "p26", valor = grupo_3$p26),
data.frame(grupo = "Sí_p34_2", variable = "p5", valor = grupo_2$p5),
data.frame(grupo = "Sí_p34_3", variable = "p5", valor = grupo_3$p5),
data.frame(grupo = "Sí_p34_2", variable = "p30", valor = grupo_2$p30),
data.frame(grupo = "Sí_p34_3", variable = "p30", valor = grupo_3$p30))

#Nivel de confianza

conf.level = 0.96

#comparacion de varianzas


cat("Comparación de varianzas (h_0: varianzas poblacionales iguales)\n\n")
## Comparación de varianzas (h_0: varianzas poblacionales iguales)
cat("Variable p4:\n")
## Variable p4:
print(var.test(grupo_2$p4, grupo_3$p4))
## 
##  F test to compare two variances
## 
## data:  grupo_2$p4 and grupo_3$p4
## F = 0.99489, num df = 984, denom df = 1302, p-value = 0.9339
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8853414 1.1190803
## sample estimates:
## ratio of variances 
##           0.994892
cat("\nVariable p26:\n")
## 
## Variable p26:
print(var.test(grupo_2$p26, grupo_3$p26))
## 
##  F test to compare two variances
## 
## data:  grupo_2$p26 and grupo_3$p26
## F = 0.96698, num df = 984, denom df = 1302, p-value = 0.5763
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8605011 1.0876820
## sample estimates:
## ratio of variances 
##           0.966978
cat("\nVariable p5:\n")
## 
## Variable p5:
print(var.test(grupo_2$p5, grupo_3$p5))
## 
##  F test to compare two variances
## 
## data:  grupo_2$p5 and grupo_3$p5
## F = 0.98587, num df = 984, denom df = 1302, p-value = 0.814
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8773128 1.1089321
## sample estimates:
## ratio of variances 
##          0.9858699
cat("\nVariable p30:\n")
## 
## Variable p30:
print(var.test(grupo_2$p30, grupo_3$p30))
## 
##  F test to compare two variances
## 
## data:  grupo_2$p30 and grupo_3$p30
## F = 0.98757, num df = 984, denom df = 1302, p-value = 0.8363
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8788218 1.1108395
## sample estimates:
## ratio of variances 
##          0.9875657
cat("Interpretación: Si el valor p > 0.04, no se rechaza h_0, sugiriendo varianzas poblacionales similares.\n\n")
## Interpretación: Si el valor p > 0.04, no se rechaza h_0, sugiriendo varianzas poblacionales similares.
#graficas de distribución

boxplot(p4 ~ p34_3_bin, data = D, main = "p4 (Libros leídos)", xlab = "¿Le leían de niño?", ylab = "Libros leídos", col = c("blue", "green"))

boxplot(p26 ~ p34_3_bin, data = D, main = "p26 (Minutos lectura)", xlab = "¿Le leían de niño?", ylab = "Minutos lectura", col = c("blue", "green"))

boxplot(p5 ~ p34_3_bin, data = D, main = "p5 (Tiempo leyendo)", xlab = "¿Le leían de niño?", ylab = "Tiempo leyendo", col = c("blue", "green"))

boxplot(p30 ~ p34_3_bin, data = D, main = "p30 (Comprensión lectora)", xlab = "¿Le leían de niño?", ylab = "Comprensión lectora", col = c("blue", "green"))

#Intervalos de confianza medias



cat("Intervalos de confianza del 96% para la diferencia de medias poblacionales:\n")
## Intervalos de confianza del 96% para la diferencia de medias poblacionales:
cat("Variable p4:\n")
## Variable p4:
print(t.test(grupo_2$p4, grupo_3$p4, conf.level = conf.level))
## 
##  Welch Two Sample t-test
## 
## data:  grupo_2$p4 and grupo_3$p4
## t = -1.0983, df = 2121.7, p-value = 0.2722
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
##  -0.3534894  0.1072479
## sample estimates:
## mean of x mean of y 
## 0.8477157 0.9708365
cat("\nVariable p26:\n")
## 
## Variable p26:
print(t.test(grupo_2$p26, grupo_3$p26, conf.level = conf.level))
## 
##  Welch Two Sample t-test
## 
## data:  grupo_2$p26 and grupo_3$p26
## t = -1.7848, df = 2136.9, p-value = 0.07443
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
##  -4.8413480  0.3406526
## sample estimates:
## mean of x mean of y 
##  20.21320  22.46355
cat("\nVariable p5:\n")
## 
## Variable p5:
print(t.test(grupo_2$p5, grupo_3$p5, conf.level = conf.level))
## 
##  Welch Two Sample t-test
## 
## data:  grupo_2$p5 and grupo_3$p5
## t = -1.1972, df = 2126.6, p-value = 0.2314
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
##  -0.24676161  0.06508637
## sample estimates:
## mean of x mean of y 
##  1.099492  1.190330
cat("\nVariable p30:\n")
## 
## Variable p30:
print(t.test(grupo_2$p30, grupo_3$p30, conf.level = conf.level))
## 
##  Welch Two Sample t-test
## 
## data:  grupo_2$p30 and grupo_3$p30
## t = -2.9368, df = 2125.7, p-value = 0.003352
## alternative hypothesis: true difference in means is not equal to 0
## 96 percent confidence interval:
##  -0.32301200 -0.05706054
## sample estimates:
## mean of x mean of y 
##  1.608122  1.798158
#Intervalos de confianza proporciones



cat("Intervalos de confianza 96% para proporciones poblacionales (proporción de 'Sí' en P34_2 y P34_3 según nivel de comprensión lectora P30):\n")
## Intervalos de confianza 96% para proporciones poblacionales (proporción de 'Sí' en P34_2 y P34_3 según nivel de comprensión lectora P30):
niveles = unique(D$p30)

for (nivel in niveles) {
x_2 = sum(D$p34_2_bin[D$p30 == nivel] == "Sí", na.rm = TRUE)
n_2 = sum(D$p30 == nivel, na.rm = TRUE)
x_3 = sum(D$p34_3_bin[D$p30 == nivel] == "Sí", na.rm = TRUE)
n_3 = n_2 # mismo nivel de P30




cat("Nivel de comprensión lectora:", nivel, "\n")
cat("P34_2 (vio leer a padres):\n")
print(prop.test(x_2, n_2, conf.level = conf.level))
cat("P34_3 (le leían de niño):\n")
print(prop.test(x_3, n_3, conf.level = conf.level))
cat("\n------------------------------------\n")
}
## Nivel de comprensión lectora: 0 
## P34_2 (vio leer a padres):
## 
##  1-sample proportions test with continuity correction
## 
## data:  x_2 out of n_2, null probability 0.5
## X-squared = 49.191, df = 1, p-value = 2.323e-12
## alternative hypothesis: true p is not equal to 0.5
## 96 percent confidence interval:
##  0.5968489 0.6746710
## sample estimates:
##         p 
## 0.6366366 
## 
## P34_3 (le leían de niño):
## 
##  1-sample proportions test with continuity correction
## 
## data:  x_3 out of n_3, null probability 0.5
## X-squared = 162.52, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 96 percent confidence interval:
##  0.7109201 0.7813897
## sample estimates:
##         p 
## 0.7477477 
## 
## 
## ------------------------------------
## Nivel de comprensión lectora: 3 
## P34_2 (vio leer a padres):
## 
##  1-sample proportions test with continuity correction
## 
## data:  x_2 out of n_2, null probability 0.5
## X-squared = 45.605, df = 1, p-value = 1.447e-11
## alternative hypothesis: true p is not equal to 0.5
## 96 percent confidence interval:
##  0.3424129 0.4150599
## sample estimates:
##         p 
## 0.3780645 
## 
## P34_3 (le leían de niño):
## 
##  1-sample proportions test with continuity correction
## 
## data:  x_3 out of n_3, null probability 0.5
## X-squared = 20.485, df = 1, p-value = 6.01e-06
## alternative hypothesis: true p is not equal to 0.5
## 96 percent confidence interval:
##  0.5445506 0.6184176
## sample estimates:
##         p 
## 0.5819355 
## 
## 
## ------------------------------------
## Nivel de comprensión lectora: 4 
## P34_2 (vio leer a padres):
## 
##  1-sample proportions test with continuity correction
## 
## data:  x_2 out of n_2, null probability 0.5
## X-squared = 27.934, df = 1, p-value = 1.255e-07
## alternative hypothesis: true p is not equal to 0.5
## 96 percent confidence interval:
##  0.2912779 0.4061282
## sample estimates:
##         p 
## 0.3465347 
## 
## P34_3 (le leían de niño):
## 
##  1-sample proportions test with continuity correction
## 
## data:  x_3 out of n_3, null probability 0.5
## X-squared = 1.3201, df = 1, p-value = 0.2506
## alternative hypothesis: true p is not equal to 0.5
## 96 percent confidence interval:
##  0.4740902 0.5942387
## sample estimates:
##         p 
## 0.5346535 
## 
## 
## ------------------------------------
## Nivel de comprensión lectora: 2 
## P34_2 (vio leer a padres):
## 
##  1-sample proportions test with continuity correction
## 
## data:  x_2 out of n_2, null probability 0.5
## X-squared = 4.533, df = 1, p-value = 0.03325
## alternative hypothesis: true p is not equal to 0.5
## 96 percent confidence interval:
##  0.5025881 0.6453121
## sample estimates:
##         p 
## 0.5754717 
## 
## P34_3 (le leían de niño):
## 
##  1-sample proportions test with continuity correction
## 
## data:  x_3 out of n_3, null probability 0.5
## X-squared = 35.703, df = 1, p-value = 2.298e-09
## alternative hypothesis: true p is not equal to 0.5
## 96 percent confidence interval:
##  0.6373769 0.7693259
## sample estimates:
##         p 
## 0.7075472 
## 
## 
## ------------------------------------
## Nivel de comprensión lectora: 1 
## P34_2 (vio leer a padres):
## 
##  1-sample proportions test with continuity correction
## 
## data:  x_2 out of n_2, null probability 0.5
## X-squared = 7.35, df = 1, p-value = 0.006706
## alternative hypothesis: true p is not equal to 0.5
## 96 percent confidence interval:
##  0.5429204 0.7980981
## sample estimates:
##         p 
## 0.6833333 
## 
## P34_3 (le leían de niño):
## 
##  1-sample proportions test with continuity correction
## 
## data:  x_3 out of n_3, null probability 0.5
## X-squared = 8.8167, df = 1, p-value = 0.002985
## alternative hypothesis: true p is not equal to 0.5
## 96 percent confidence interval:
##  0.5600575 0.8119389
## sample estimates:
##   p 
## 0.7 
## 
## 
## ------------------------------------
# alta vs baja comprension lectora


alta = D$p30 %in% c("Mayor parte", "Completo")
baja = D$p30 %in% c("Poco", "Medio")

x_alta = sum(D$p34_2_bin[alta] == "Sí", na.rm = TRUE)
n_alta = sum(alta, na.rm = TRUE)
x_baja = sum(D$p34_2_bin[baja] == "Sí", na.rm = TRUE)
n_baja = sum(baja, na.rm = TRUE)


# Diferencia de medias p4
ic_diff_p4 = t.test(grupo_2$p4, grupo_3$p4, conf.level = conf.level)
# Diferencia de medias p26
ic_diff_p26 = t.test(grupo_2$p26, grupo_3$p26, conf.level = conf.level)


#Diferencia p4 
plot(0, xlim = c(ic_diff_p4$conf.int[1], ic_diff_p4$conf.int[2]), ylim = c(0, 1), yaxt = "n", ylab = "", xlab = "Diferencia en libros (p4)", main = "Intervalos de Confianza96%: Diferencia p4 (p34_2 vs p34_3)")
arrows(ic_diff_p4$conf.int[1], 0.5, ic_diff_p4$conf.int[2], 0.5, angle = 90, code = 3, length = 0.1, lwd = 2)
points(ic_diff_p4$estimate[1] - ic_diff_p4$estimate[2], 0.5, pch = 19, cex = 1.2, col = "red")

#Diferencia p26
plot(0, xlim = c(ic_diff_p26$conf.int[1], ic_diff_p26$conf.int[2]), ylim = c(0, 1), yaxt = "n", ylab = "", xlab = "Diferencia en minutos (p26)", main = "Intervalos de Confianza96%: Diferencia p26 (p34_2 vs p34_3)")
arrows(ic_diff_p26$conf.int[1], 0.5, ic_diff_p26$conf.int[2], 0.5, angle = 90, code = 3, length = 0.1, lwd = 2)
points(ic_diff_p26$estimate[1] - ic_diff_p26$estimate[2], 0.5, pch = 19, cex = 1.2, col = "red")

```