R Markdown

df <- data.frame(
peso = ChickWeight$weight,
dieta = ChickWeight$Diet
)

head(df)
##   peso dieta
## 1   42     1
## 2   51     1
## 3   59     1
## 4   64     1
## 5   76     1
## 6   93     1
nrow(df)
## [1] 578
summary(df$peso)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    35.0    63.0   103.0   121.8   163.8   373.0
set.seed(456)
n_srs <- 120

muestra_srs <- df %>% slice_sample(n = n_srs)

media_srs <- mean(muestra_srs$peso)
media_srs
## [1] 120.925
Nh <- df %>%
group_by(dieta) %>%
summarise(Nh = n())

N_total <- nrow(df)

Nh <- Nh %>%
mutate(W_h = Nh / N_total)

Nh
## # A tibble: 4 × 3
##   dieta    Nh   W_h
##   <fct> <int> <dbl>
## 1 1       220 0.381
## 2 2       120 0.208
## 3 3       120 0.208
## 4 4       118 0.204
Nh <- Nh %>% mutate(nh_prop = round(W_h * n_srs))
Nh
## # A tibble: 4 × 4
##   dieta    Nh   W_h nh_prop
##   <fct> <int> <dbl>   <dbl>
## 1 1       220 0.381      46
## 2 2       120 0.208      25
## 3 3       120 0.208      25
## 4 4       118 0.204      24

Construir muestras estratificadas

set.seed(456)

niveles_dieta <- Nh$dieta

lista_prop <- lapply(niveles_dieta, function(d) {
df_h <- df[df$dieta == d, ]
nh   <- Nh$nh_prop[Nh$dieta == d]
idx  <- sample(seq_len(nrow(df_h)), size = nh, replace = FALSE)
df_h[idx, ]
})

muestra_prop <- do.call(rbind, lista_prop)

head(muestra_prop)
##     peso dieta
## 163  192     1
## 38    49     1
## 213   65     1
## 27    55     1
## 25    43     1
## 206  120     1
table(muestra_prop$dieta)
## 
##  1  2  3  4 
## 46 25 25 24
Nh$nh_prop
## [1] 46 25 25 24
sh <- df %>%
group_by(dieta) %>%
summarise(S_h = sd(peso))

Nh <- Nh %>% left_join(sh, by = "dieta")

Nh <- Nh %>%
mutate(
peso_opt = Nh * S_h,
nh_opt = round(n_srs * peso_opt / sum(peso_opt))
)

Nh
## # A tibble: 4 × 7
##   dieta    Nh   W_h nh_prop   S_h peso_opt nh_opt
##   <fct> <int> <dbl>   <dbl> <dbl>    <dbl>  <dbl>
## 1 1       220 0.381      46  56.7   12464.     38
## 2 2       120 0.208      25  71.6    8593.     26
## 3 3       120 0.208      25  86.5   10385.     31
## 4 4       118 0.204      24  68.8    8122.     25
set.seed(456)

lista_opt <- lapply(niveles_dieta, function(d) {
df_h <- df[df$dieta == d, ]
nh   <- Nh$nh_opt[Nh$dieta == d]
idx  <- sample(seq_len(nrow(df_h)), size = nh, replace = FALSE)
df_h[idx, ]
})

muestra_opt <- do.call(rbind, lista_opt)

head(muestra_opt)
##     peso dieta
## 163  192     1
## 38    49     1
## 213   65     1
## 27    55     1
## 25    43     1
## 206  120     1
table(muestra_opt$dieta)
## 
##  1  2  3  4 
## 38 26 31 25
Nh$nh_opt
## [1] 38 26 31 25
medias_estratos <- df %>%
group_by(dieta) %>%
summarise(Media_real = mean(peso), .groups = "drop")

medias_estratos
## # A tibble: 4 × 2
##   dieta Media_real
##   <fct>      <dbl>
## 1 1           103.
## 2 2           123.
## 3 3           143.
## 4 4           135.
medias_prop <- muestra_prop %>%
group_by(dieta) %>%
summarise(m = mean(peso), .groups = "drop")

tabla_prop <- Nh %>%
left_join(medias_prop, by = "dieta")

tabla_prop
## # A tibble: 4 × 8
##   dieta    Nh   W_h nh_prop   S_h peso_opt nh_opt     m
##   <fct> <int> <dbl>   <dbl> <dbl>    <dbl>  <dbl> <dbl>
## 1 1       220 0.381      46  56.7   12464.     38  103.
## 2 2       120 0.208      25  71.6    8593.     26  115.
## 3 3       120 0.208      25  86.5   10385.     31  139.
## 4 4       118 0.204      24  68.8    8122.     25  124.
media_prop <- sum(tabla_prop$W_h * tabla_prop$m)
media_prop
## [1] 117.4028
medias_opt <- muestra_opt %>%
group_by(dieta) %>%
summarise(m = mean(peso), .groups = "drop")

tabla_opt <- Nh %>%
left_join(medias_opt, by = "dieta")

tabla_opt
## # A tibble: 4 × 8
##   dieta    Nh   W_h nh_prop   S_h peso_opt nh_opt     m
##   <fct> <int> <dbl>   <dbl> <dbl>    <dbl>  <dbl> <dbl>
## 1 1       220 0.381      46  56.7   12464.     38  104.
## 2 2       120 0.208      25  71.6    8593.     26  114.
## 3 3       120 0.208      25  86.5   10385.     31  147.
## 4 4       118 0.204      24  68.8    8122.     25  125.
media_opt <- sum(tabla_opt$W_h * tabla_opt$m)
media_opt
## [1] 119.3076
list(
Media_poblacional_real    = mean(df$peso),
Media_SRS                 = media_srs,
Media_estratificada_prop  = media_prop,
Media_estratificada_opt   = media_opt
)
## $Media_poblacional_real
## [1] 121.8183
## 
## $Media_SRS
## [1] 120.925
## 
## $Media_estratificada_prop
## [1] 117.4028
## 
## $Media_estratificada_opt
## [1] 119.3076