########################################################
# TALLER - INTERVALOS DE CONFIANZA Y BOOTSTRAP EN R
########################################################

############################
# PROBLEMA 1
############################

# Datos
sigma2 <- 1000
sigma <- sqrt(sigma2)
n <- 12
xbar <- 3250

# IC 95%
alpha <- 0.05
z <- qnorm(1 - alpha/2)

LI95 <- xbar - z*(sigma/sqrt(n))
LS95 <- xbar + z*(sigma/sqrt(n))

LI95
## [1] 3232.108
LS95
## [1] 3267.892
# IC 90%
alpha <- 0.10
z <- qnorm(1 - alpha/2)

LI90 <- xbar - z*(sigma/sqrt(n))
LS90 <- xbar + z*(sigma/sqrt(n))

LI90
## [1] 3234.985
LS90
## [1] 3265.015
# IC 99%
alpha <- 0.01
z <- qnorm(1 - alpha/2)

LI99 <- xbar - z*(sigma/sqrt(n))
LS99 <- xbar + z*(sigma/sqrt(n))

LI99
## [1] 3226.486
LS99
## [1] 3273.514
# Comparación de anchos
ancho90 <- LS90 - LI90
ancho95 <- LS95 - LI95
ancho99 <- LS99 - LI99

ancho90
## [1] 30.03078
ancho95
## [1] 35.78388
ancho99
## [1] 47.02799
# Tamaño de muestra
# Error máximo = 15
E <- 15
confianza <- 0.99
z <- qnorm(1 - 0.01/2)

n_requerido <- (z*sigma/E)^2
ceiling(n_requerido)
## [1] 30
############################
# PROBLEMA 2
############################

datos2 <- c(16.8, 17.2, 17.4, 16.9, 16.5, 17.1)

# Prueba de normalidad
shapiro.test(datos2)
## 
##  Shapiro-Wilk normality test
## 
## data:  datos2
## W = 0.98779, p-value = 0.9831
# IC 99% para la media
t.test(datos2, conf.level = 0.99)
## 
##  One Sample t-test
## 
## data:  datos2
## t = 130.47, df = 5, p-value = 5.017e-10
## alternative hypothesis: true mean is not equal to 0
## 99 percent confidence interval:
##  16.45847 17.50820
## sample estimates:
## mean of x 
##  16.98333
############################
# PROBLEMA 3
############################

n <- 51
s <- 0.37
alpha <- 0.05

# IC para sigma
chi_inf <- qchisq(alpha/2, df = n-1)
chi_sup <- qchisq(1-alpha/2, df = n-1)

LI_var <- ((n-1)*s^2)/chi_sup
LS_var <- ((n-1)*s^2)/chi_inf

LI_sigma <- sqrt(LI_var)
LS_sigma <- sqrt(LS_var)

LI_sigma
## [1] 0.3095824
LS_sigma
## [1] 0.4599389
# Comentario:
# Si aumenta n, el intervalo se vuelve más estrecho.



############################
# PROBLEMA 4
############################

x <- 823
n <- 1000

prop.test(x, n, conf.level = 0.95, correct = FALSE)
## 
##  1-sample proportions test without continuity correction
## 
## data:  x out of n, null probability 0.5
## X-squared = 417.32, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.7981213 0.8454066
## sample estimates:
##     p 
## 0.823
############################
# PROBLEMA 5
############################

datos5 <- c(
1.542,1.622,1.440,1.459,1.598,1.585,1.466,1.608,1.533,1.498,
1.532,1.546,1.520,1.532,1.600,1.466,1.494,78.000,1.523,1.504,
1.499,1.548,1.542,1.397,1.545,1.611,1.626,1.511,1.487,1.558
)

# Prueba de normalidad
shapiro.test(datos5)
## 
##  Shapiro-Wilk normality test
## 
## data:  datos5
## W = 0.18266, p-value = 8.189e-12
# Histograma
hist(datos5)

# Boxplot
boxplot(datos5)

# IC 99% para la media
t.test(datos5, conf.level = 0.99)
## 
##  One Sample t-test
## 
## data:  datos5
## t = 1.6005, df = 29, p-value = 0.1203
## alternative hypothesis: true mean is not equal to 0
## 99 percent confidence interval:
##  -2.946281 11.105747
## sample estimates:
## mean of x 
##  4.079733
# NOTA:
# El valor 78.000 parece un dato atípico o error de digitación.
# Si se desea analizar sin ese dato:

datos5_limpios <- datos5[datos5 != 78]

shapiro.test(datos5_limpios)
## 
##  Shapiro-Wilk normality test
## 
## data:  datos5_limpios
## W = 0.97294, p-value = 0.6417
t.test(datos5_limpios, conf.level = 0.99)
## 
##  One Sample t-test
## 
## data:  datos5_limpios
## t = 145.33, df = 28, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 99 percent confidence interval:
##  1.501653 1.559865
## sample estimates:
## mean of x 
##  1.530759
############################
# PROBLEMA 6
############################

datos6 <- c(
3,4,2,5,4,8,2,9,3,6,2,8,3,3,5,
6,3,7,2,8,4,4,4,0,5,2,3,0,4,8
)

# IC 95% para la media
t.test(datos6, conf.level = 0.95)
## 
##  One Sample t-test
## 
## data:  datos6
## t = 9.71, df = 29, p-value = 1.281e-10
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  3.341659 5.125008
## sample estimates:
## mean of x 
##  4.233333
# IC 98% para la varianza
n <- length(datos6)
s2 <- var(datos6)
alpha <- 0.02

chi_inf <- qchisq(alpha/2, df = n-1)
chi_sup <- qchisq(1-alpha/2, df = n-1)

LI_var <- ((n-1)*s2)/chi_sup
LS_var <- ((n-1)*s2)/chi_inf

LI_var
## [1] 3.33482
LS_var
## [1] 11.59942
############################
# PROBLEMA 7
############################

sigma <- 40
E <- 15
confianza <- 0.95

z <- qnorm(1 - 0.05/2)

n <- (z*sigma/E)^2

ceiling(n)
## [1] 28
############################
# PROBLEMA 8
############################

estandar <- c(
428,419,458,439,441,456,
463,429,438,445,441,463
)

digital <- c(
462,448,435,465,429,472,
453,459,427,468,452,447
)

# IC diferencia de medias
t.test(estandar, digital,
       conf.level = 0.95,
       var.equal = FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  estandar and digital
## t = -1.355, df = 21.955, p-value = 0.1892
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -20.456585   4.289919
## sample estimates:
## mean of x mean of y 
##  443.3333  451.4167
# Interpretación:
# Si el intervalo incluye 0, no hay diferencia significativa.



############################
# PROBLEMA 9
############################

x <- 13
n <- 87

# IC 95% para proporción
prop.test(x, n,
          conf.level = 0.95,
          correct = FALSE)
## 
##  1-sample proportions test without continuity correction
## 
## data:  x out of n, null probability 0.5
## X-squared = 42.77, df = 1, p-value = 6.157e-11
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.08945443 0.23904600
## sample estimates:
##         p 
## 0.1494253
# Tamaño de muestra
# Error máximo = 0.03

E <- 0.03
z <- qnorm(1 - 0.05/2)

# Sin información previa:
p <- 0.5

n <- (z^2 * p * (1-p)) / E^2

ceiling(n)
## [1] 1068
# Corrección por población finita:
# n_ajustada = n / (1 + ((n-1)/N))



############################
# PROBLEMA 10
############################

datos10 <- c(7.69, 4.97, 4.56, 6.49, 4.34, 6.24, 4.45)

# Media original
xbar <- mean(datos10)

# Bootstrap
set.seed(123)

k <- 1000
n <- length(datos10)

medias_boot <- numeric(k)

for(i in 1:k){
  
  muestra_boot <- sample(datos10,
                         size = n,
                         replace = TRUE)
  
  medias_boot[i] <- mean(muestra_boot)
}

# Percentiles
P2.5 <- quantile(medias_boot, 0.025)
P97.5 <- quantile(medias_boot, 0.975)

# Método 1
IC1 <- c(P2.5, P97.5)
IC1
##     2.5%    97.5% 
## 4.748393 6.508643
# Método 2
IC2 <- c(2*xbar - P97.5,
         2*xbar - P2.5)

IC2
##    97.5%     2.5% 
## 4.559929 6.320179
# Histograma bootstrap
hist(medias_boot,
     main = "Distribución Bootstrap",
     xlab = "Media Bootstrap")