########################################################
# TALLER - INTERVALOS DE CONFIANZA Y BOOTSTRAP EN R
########################################################
############################
# PROBLEMA 1
############################
# Datos
sigma2 <- 1000
sigma <- sqrt(sigma2)
n <- 12
xbar <- 3250
# IC 95%
alpha <- 0.05
z <- qnorm(1 - alpha/2)
LI95 <- xbar - z*(sigma/sqrt(n))
LS95 <- xbar + z*(sigma/sqrt(n))
LI95
## [1] 3232.108
LS95
## [1] 3267.892
# IC 90%
alpha <- 0.10
z <- qnorm(1 - alpha/2)
LI90 <- xbar - z*(sigma/sqrt(n))
LS90 <- xbar + z*(sigma/sqrt(n))
LI90
## [1] 3234.985
LS90
## [1] 3265.015
# IC 99%
alpha <- 0.01
z <- qnorm(1 - alpha/2)
LI99 <- xbar - z*(sigma/sqrt(n))
LS99 <- xbar + z*(sigma/sqrt(n))
LI99
## [1] 3226.486
LS99
## [1] 3273.514
# Comparación de anchos
ancho90 <- LS90 - LI90
ancho95 <- LS95 - LI95
ancho99 <- LS99 - LI99
ancho90
## [1] 30.03078
ancho95
## [1] 35.78388
ancho99
## [1] 47.02799
# Tamaño de muestra
# Error máximo = 15
E <- 15
confianza <- 0.99
z <- qnorm(1 - 0.01/2)
n_requerido <- (z*sigma/E)^2
ceiling(n_requerido)
## [1] 30
############################
# PROBLEMA 2
############################
datos2 <- c(16.8, 17.2, 17.4, 16.9, 16.5, 17.1)
# Prueba de normalidad
shapiro.test(datos2)
##
## Shapiro-Wilk normality test
##
## data: datos2
## W = 0.98779, p-value = 0.9831
# IC 99% para la media
t.test(datos2, conf.level = 0.99)
##
## One Sample t-test
##
## data: datos2
## t = 130.47, df = 5, p-value = 5.017e-10
## alternative hypothesis: true mean is not equal to 0
## 99 percent confidence interval:
## 16.45847 17.50820
## sample estimates:
## mean of x
## 16.98333
############################
# PROBLEMA 3
############################
n <- 51
s <- 0.37
alpha <- 0.05
# IC para sigma
chi_inf <- qchisq(alpha/2, df = n-1)
chi_sup <- qchisq(1-alpha/2, df = n-1)
LI_var <- ((n-1)*s^2)/chi_sup
LS_var <- ((n-1)*s^2)/chi_inf
LI_sigma <- sqrt(LI_var)
LS_sigma <- sqrt(LS_var)
LI_sigma
## [1] 0.3095824
LS_sigma
## [1] 0.4599389
# Comentario:
# Si aumenta n, el intervalo se vuelve más estrecho.
############################
# PROBLEMA 4
############################
x <- 823
n <- 1000
prop.test(x, n, conf.level = 0.95, correct = FALSE)
##
## 1-sample proportions test without continuity correction
##
## data: x out of n, null probability 0.5
## X-squared = 417.32, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.7981213 0.8454066
## sample estimates:
## p
## 0.823
############################
# PROBLEMA 5
############################
datos5 <- c(
1.542,1.622,1.440,1.459,1.598,1.585,1.466,1.608,1.533,1.498,
1.532,1.546,1.520,1.532,1.600,1.466,1.494,78.000,1.523,1.504,
1.499,1.548,1.542,1.397,1.545,1.611,1.626,1.511,1.487,1.558
)
# Prueba de normalidad
shapiro.test(datos5)
##
## Shapiro-Wilk normality test
##
## data: datos5
## W = 0.18266, p-value = 8.189e-12
# Histograma
hist(datos5)

# Boxplot
boxplot(datos5)

# IC 99% para la media
t.test(datos5, conf.level = 0.99)
##
## One Sample t-test
##
## data: datos5
## t = 1.6005, df = 29, p-value = 0.1203
## alternative hypothesis: true mean is not equal to 0
## 99 percent confidence interval:
## -2.946281 11.105747
## sample estimates:
## mean of x
## 4.079733
# NOTA:
# El valor 78.000 parece un dato atÃpico o error de digitación.
# Si se desea analizar sin ese dato:
datos5_limpios <- datos5[datos5 != 78]
shapiro.test(datos5_limpios)
##
## Shapiro-Wilk normality test
##
## data: datos5_limpios
## W = 0.97294, p-value = 0.6417
t.test(datos5_limpios, conf.level = 0.99)
##
## One Sample t-test
##
## data: datos5_limpios
## t = 145.33, df = 28, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 99 percent confidence interval:
## 1.501653 1.559865
## sample estimates:
## mean of x
## 1.530759
############################
# PROBLEMA 6
############################
datos6 <- c(
3,4,2,5,4,8,2,9,3,6,2,8,3,3,5,
6,3,7,2,8,4,4,4,0,5,2,3,0,4,8
)
# IC 95% para la media
t.test(datos6, conf.level = 0.95)
##
## One Sample t-test
##
## data: datos6
## t = 9.71, df = 29, p-value = 1.281e-10
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 3.341659 5.125008
## sample estimates:
## mean of x
## 4.233333
# IC 98% para la varianza
n <- length(datos6)
s2 <- var(datos6)
alpha <- 0.02
chi_inf <- qchisq(alpha/2, df = n-1)
chi_sup <- qchisq(1-alpha/2, df = n-1)
LI_var <- ((n-1)*s2)/chi_sup
LS_var <- ((n-1)*s2)/chi_inf
LI_var
## [1] 3.33482
LS_var
## [1] 11.59942
############################
# PROBLEMA 7
############################
sigma <- 40
E <- 15
confianza <- 0.95
z <- qnorm(1 - 0.05/2)
n <- (z*sigma/E)^2
ceiling(n)
## [1] 28
############################
# PROBLEMA 8
############################
estandar <- c(
428,419,458,439,441,456,
463,429,438,445,441,463
)
digital <- c(
462,448,435,465,429,472,
453,459,427,468,452,447
)
# IC diferencia de medias
t.test(estandar, digital,
conf.level = 0.95,
var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: estandar and digital
## t = -1.355, df = 21.955, p-value = 0.1892
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -20.456585 4.289919
## sample estimates:
## mean of x mean of y
## 443.3333 451.4167
# Interpretación:
# Si el intervalo incluye 0, no hay diferencia significativa.
############################
# PROBLEMA 9
############################
x <- 13
n <- 87
# IC 95% para proporción
prop.test(x, n,
conf.level = 0.95,
correct = FALSE)
##
## 1-sample proportions test without continuity correction
##
## data: x out of n, null probability 0.5
## X-squared = 42.77, df = 1, p-value = 6.157e-11
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.08945443 0.23904600
## sample estimates:
## p
## 0.1494253
# Tamaño de muestra
# Error máximo = 0.03
E <- 0.03
z <- qnorm(1 - 0.05/2)
# Sin información previa:
p <- 0.5
n <- (z^2 * p * (1-p)) / E^2
ceiling(n)
## [1] 1068
# Corrección por población finita:
# n_ajustada = n / (1 + ((n-1)/N))
############################
# PROBLEMA 10
############################
datos10 <- c(7.69, 4.97, 4.56, 6.49, 4.34, 6.24, 4.45)
# Media original
xbar <- mean(datos10)
# Bootstrap
set.seed(123)
k <- 1000
n <- length(datos10)
medias_boot <- numeric(k)
for(i in 1:k){
muestra_boot <- sample(datos10,
size = n,
replace = TRUE)
medias_boot[i] <- mean(muestra_boot)
}
# Percentiles
P2.5 <- quantile(medias_boot, 0.025)
P97.5 <- quantile(medias_boot, 0.975)
# Método 1
IC1 <- c(P2.5, P97.5)
IC1
## 2.5% 97.5%
## 4.748393 6.508643
# Método 2
IC2 <- c(2*xbar - P97.5,
2*xbar - P2.5)
IC2
## 97.5% 2.5%
## 4.559929 6.320179
# Histograma bootstrap
hist(medias_boot,
main = "Distribución Bootstrap",
xlab = "Media Bootstrap")
