output: github_document: default html_document: default knit: (function(inputFile, encoding) { rmarkdown::render(inputFile, encoding = encoding, output_format = “all”) }) editor_options: markdown: wrap: 72 —

Librerias

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Poblacion

# Definir los datos de los estratos manualmente
stratum <- c("A", "B", "C")
Nh <- c(3000, 4000, 5000)  # Tamaños de población por estrato
Sh <- c(48, 79, 76)        # Desviaciones estándar por estrato
 
# Combinar en un data frame para mejor manejo
strata_data <- data.frame(stratum, Nh, Sh)
strata_data
##   stratum   Nh Sh
## 1       A 3000 48
## 2       B 4000 79
## 3       C 5000 76
# Calcular proporciones
N <- sum(strata_data$Nh)
mutate(strata_data, porcentaje = Nh / N)
##   stratum   Nh Sh porcentaje
## 1       A 3000 48  0.2500000
## 2       B 4000 79  0.3333333
## 3       C 5000 76  0.4166667
strata_data
##   stratum   Nh Sh
## 1       A 3000 48
## 2       B 4000 79
## 3       C 5000 76
N_total<- sum(strata_data$Nh)
strata_data = strata_data %>% mutate(peso= Nh/N_total) %>% mutate(nh = round(400*peso,0))%>%
                  mutate(peso_optimo= Nh*Sh) %>%
                mutate(nh_optimo = round(400*peso_optimo / sum(peso_optimo),0))
strata_data
##   stratum   Nh Sh      peso  nh peso_optimo nh_optimo
## 1       A 3000 48 0.2500000 100      144000        69
## 2       B 4000 79 0.3333333 133      316000       150
## 3       C 5000 76 0.4166667 167      380000       181
set.seed(123)
# Parámetros poblacionales
N_A <- 3000; mu_A <- 50; sigma_A <- 10; poblacion_A <- rnorm(N_A, mean = mu_A, sd = sigma_A)
N_B <- 4000; mu_B <- 65; sigma_B <- 25; poblacion_B <- rnorm(N_B, mean = mu_B, sd = sigma_B)
N_C <- 5000; mu_C <- 80; sigma_C <- 50; poblacion_C <- rnorm(N_C, mean = mu_C, sd = sigma_C)
 
N_total <- sum(N_A, N_B, N_C)
data <- data.frame(N_A, N_B, N_C, N_total)
set.seed(456)
sample_A<- sample(poblacion_A, size = 100, replace = FALSE)
sample_A
##   [1] 69.39756 60.24475 63.38975 45.88079 52.88170 52.29395 37.80190 67.79038
##   [9] 44.82018 52.46799 53.77388 49.72653 61.81618 48.31892 31.93107 53.00132
##  [17] 41.48635 47.94174 40.03509 42.34001 45.31300 39.28253 67.51757 54.77037
##  [25] 28.01077 71.61416 58.84650 49.77166 47.58209 37.77597 40.52525 58.82923
##  [33] 40.95785 75.42904 54.26464 64.74314 50.07290 38.74603 33.32525 47.85495
##  [41] 47.39168 49.99615 47.95237 47.89266 46.87456 49.02588 59.62528 72.81967
##  [49] 29.06361 53.03529 39.06699 53.31434 54.27069 50.56017 61.95206 64.96822
##  [57] 55.57012 61.68384 62.46424 54.54769 47.64300 65.32424 48.55951 58.70434
##  [65] 50.37788 54.33676 36.83490 54.44400 51.19245 41.40985 43.13976 48.09641
##  [73] 59.47231 42.00486 53.89331 60.51701 38.14520 63.70004 38.87455 27.43465
##  [81] 61.48808 47.84619 32.48932 47.06387 55.04126 48.84778 50.87244 61.06837
##  [89] 50.84737 68.66852 45.72804 31.65080 51.74136 61.45263 40.86434 44.06051
##  [97] 39.36674 48.80547 54.83618 70.06681
sample_B<- sample(poblacion_B, size = 133, replace = FALSE)
sample_B
##   [1]  40.636669  51.203138  98.110435  85.113863  73.908353  46.314495
##   [7]  59.789372  55.819754  52.695227  93.938065  72.078312  76.919986
##  [13]  61.209631  40.281340  57.553268   7.380116  24.428793  73.279458
##  [19]  64.738855  83.151902  72.378675  99.339153  75.184227  39.830568
##  [25]  33.149525  49.221159  79.579944  49.796287  51.501939  29.288135
##  [31]  60.258378  79.055845  20.927318  75.232461  13.144835  69.638364
##  [37]  32.486325  68.486778  33.770439  88.149317  76.601235  55.782891
##  [43]  22.273496  35.421651  49.695706  82.837245  31.869218  79.812428
##  [49]  39.405921  49.442447  80.219645  75.283767  94.261301  43.174137
##  [55]  85.454055  88.840026  66.474387  81.490443  31.109523  60.068245
##  [61]  65.830601  84.788294  78.116110  84.009579 146.897707  22.336759
##  [67]  15.766300  61.997131 105.801466  50.417202  63.682461  25.377560
##  [73]  23.997500  66.557205  34.421948  64.716834  97.782040 128.509216
##  [79]  69.699119  52.205651  35.957578  73.868470  87.449606  85.646659
##  [85]  53.814780  66.550660  77.174945  45.128586  55.327698  93.189837
##  [91] 107.077102  83.030797  25.117712  46.308131  29.511541  63.125508
##  [97]  68.405363  62.365290  48.206651  87.798599  66.461424  27.200385
## [103]  92.273784 106.784211 107.869030  71.265446  87.934059 111.445531
## [109]  47.258098  68.250456  61.222851  86.596199  83.662778  40.669651
## [115]  75.090518 102.585510  82.418135  10.650089  39.048949  82.567396
## [121]  21.800644  92.463329  88.390858  41.770089  80.534827  83.361455
## [127]  73.804022 114.627795  31.665852  75.565211  66.573443 116.346529
## [133]  68.998966
sample_C<- sample(poblacion_C, size = 167, replace = FALSE)
sample_C
##   [1] 141.4659159 -50.0634313  92.6227893 125.5015525 191.2400409  11.1939377
##   [7] 116.3011875 192.8632859 143.0668880   2.2784721 105.1950593  45.8309742
##  [13]  93.5504010  61.4513177   2.6279853 124.8144948  42.8957926 -27.2356349
##  [19] 116.2531126  81.5026593  67.5760314 125.9737181 187.9294395  38.7282810
##  [25]  70.6557008  60.7678495 118.6717212  66.7393889 176.5191247  18.1951608
##  [31] 161.3439530  41.3617676   2.1551984 170.3418196 123.4461575  58.5362551
##  [37] -50.6413849  24.0684910  79.1351607  42.0826751  97.9545231  35.8195157
##  [43]  59.1723719 114.5298701 110.3105530  -2.5165668  47.8961247 159.9611958
##  [49]  18.9995972  86.0811321  84.9554250  98.5606874 111.4151635 108.2846437
##  [55]  76.0129245 136.3928441 -32.2844578  31.9942533  55.8765083  80.1139382
##  [61]  95.0331341  88.1650146  54.9023206 120.3755352  77.3044776  97.6496808
##  [67] 103.1692791 138.9280982  32.2841939  -9.8119910  98.8473765  60.9459952
##  [73]  31.2480205  95.7721545  99.4848807 115.4961315  75.2691903 100.6113554
##  [79] -18.1015774 134.3833411  38.1352740 135.2800278 -25.8440781  92.6889727
##  [85] 208.2129207  59.4341097 104.6215132  90.9390085 106.6992918 121.1875030
##  [91]  68.3094219 150.7585593  79.4552235  17.2525635  43.0591628  53.0892844
##  [97]  46.8516325  97.5001261  52.5882738 109.8736310   7.2937017 108.4966603
## [103] 104.4182366 102.7681073  23.1373706 103.3489046  85.3495338  85.2116583
## [109] 113.7754076   0.7981494  80.4229321  81.0994795  91.0602348 139.6734020
## [115]  41.3194251  54.0182279 140.4450870  44.9789668  33.9984888 151.7054440
## [121]  43.3786981  69.0340503  40.8256509  51.4547212  94.3308082 101.6786089
## [127]  94.4172136 138.7525045  74.3630785  -6.4110224  37.2763033  49.3504177
## [133] 103.8018660  30.0249973  30.5018438 108.3622121  59.3289078  58.1044815
## [139] 122.9075466  61.8241699  48.4489058 100.3590529 127.0215730 113.7123037
## [145] 202.8450536  46.4394010  65.6673877  45.8900271 115.6467308  40.7991866
## [151]  58.0314132  95.1014235 131.7257726 111.0204489  86.5715398 196.8522311
## [157] -22.1905209  48.9194496  63.8543184 109.7073299  83.3995642  68.8621260
## [163]  38.7735958 128.8010526 115.8179168 101.7378727 161.5281848
mean(sample_A)
## [1] 50.8156
mean(sample_B)
## [1] 64.764
mean(sample_C)
## [1] 80.00295
media_poblacional <- (mean(poblacion_A) * N_A + mean(poblacion_B) * N_B + mean(poblacion_C) * N_C) / N_total
media_poblacional
## [1] 67.56835
# Calcular la media muestral estratificada estimada

n_A <- 100
n_B <- 133
n_C <- 167
n_total <- n_A + n_B + n_C
# Media de cada muestra

media_A <- mean(sample_A)
media_B <- mean(sample_B)
media_C <- mean(sample_C)
# Estimador de la media estratificada

media_estratificada <- (media_A * N_A + media_B * N_B + media_C * N_C) / N_total
media_estratificada
## [1] 67.62646
set.seed(456)
sample_A <- sample(poblacion_A, size = 69, replace = FALSE)
sample_B <- sample(poblacion_B, size = 150, replace = FALSE)
sample_C <- sample(poblacion_C, size = 181, replace = FALSE)
media_A <- mean(sample_A)
media_B <- mean(sample_B)
media_C <- mean(sample_C)

media_A
## [1] 51.47921
media_B
## [1] 64.78471
media_C
## [1] 80.99149
media_poblacional <- (mean(poblacion_A) * N_A + mean(poblacion_B) * N_B + mean(poblacion_C) * N_C) / N_total
media_poblacional
## [1] 67.56835
n_A <- 69
n_B <- 150
n_C <- 181
n_total <- n_A + n_B + n_C

media_estratificada <- (media_A * N_A + media_B * N_B + media_C * N_C) / N_total
media_estratificada
## [1] 68.21116
error_absoluto <- abs(media_poblacional - media_estratificada)
error_absoluto
## [1] 0.6428104