No. 3 Bootstrap

Data

set.seed(123)

# Membuat data populasi berdistribusi normal (N=100.000)
populasi <- rnorm(100000, mean = 100, sd = 15)
parameter_populasi <- mean(populasi)  # Nilai parameter yang akan diestimasi

# Menentukan konfigurasi sesuai tabel soal
konfigurasi <- data.frame(
  n = c(10, 10, 10, 10, 10,         # n=10
        15, 15, 15, 15, 15,         # n=15
        20, 20, 20, 20, 20,         # n=20
        30, 30, 30, 30, 30,         # n=30
        50, 50, 50, 50, 50,         # n=50
        100, 100, 100, 100, 100),   # n=100
  
  d = c(5, 8, 10, 15, 20,           # d untuk n=10
        8, 11, 15, 23, 30,          # d untuk n=15
        10, 15, 20, 30, 40,         # d untuk n=20
        15, 23, 30, 45, 60,         # d untuk n=30
        25, 38, 50, 75, 100,        # d untuk n=50
        50, 75, 100, 150, 200)      # d untuk n=100
)

# Menambahkan kolom kategori perbandingan d vs n
konfigurasi$kategori <- ifelse(konfigurasi$d < konfigurasi$n, "d < n",
                               ifelse(konfigurasi$d == konfigurasi$n, "d = n", "d > n"))

# Menambahkan kolom persentase d/n
konfigurasi$persentase <- paste0(konfigurasi$d/konfigurasi$n * 100, "%")

# Menampilkan konfigurasi
print(konfigurasi)
##      n   d kategori        persentase
## 1   10   5    d < n               50%
## 2   10   8    d < n               80%
## 3   10  10    d = n              100%
## 4   10  15    d > n              150%
## 5   10  20    d > n              200%
## 6   15   8    d < n 53.3333333333333%
## 7   15  11    d < n 73.3333333333333%
## 8   15  15    d = n              100%
## 9   15  23    d > n 153.333333333333%
## 10  15  30    d > n              200%
## 11  20  10    d < n               50%
## 12  20  15    d < n               75%
## 13  20  20    d = n              100%
## 14  20  30    d > n              150%
## 15  20  40    d > n              200%
## 16  30  15    d < n               50%
## 17  30  23    d < n 76.6666666666667%
## 18  30  30    d = n              100%
## 19  30  45    d > n              150%
## 20  30  60    d > n              200%
## 21  50  25    d < n               50%
## 22  50  38    d < n               76%
## 23  50  50    d = n              100%
## 24  50  75    d > n              150%
## 25  50 100    d > n              200%
## 26 100  50    d < n               50%
## 27 100  75    d < n               75%
## 28 100 100    d = n              100%
## 29 100 150    d > n              150%
## 30 100 200    d > n              200%

Fungsi Bootstrap

bootstrap_simulasi <- function(data_sampel, d, B = 1000) {
  # Fungsi untuk melakukan bootstrap dengan ukuran sampel d
  # Input: 
  #   data_sampel: vektor data sampel asli
  #   d: ukuran sampel bootstrap
  #   B: jumlah replikasi bootstrap
  
  bootstrap_means <- numeric(B)
  
  for(i in 1:B) {
    # Mengambil sampel bootstrap dengan ukuran d (dengan pengembalian)
    sampel_bootstrap <- sample(data_sampel, size = d, replace = TRUE)
    bootstrap_means[i] <- mean(sampel_bootstrap)
  }
  
  # Menghitung statistik
  hasil <- list(
    mean_bootstrap = mean(bootstrap_means),
    se_bootstrap = sd(bootstrap_means),
    ci_bootstrap = quantile(bootstrap_means, c(0.025, 0.975))
  )
  
  return(hasil)
}

Simulasi Bootstrap

B <- 1000  # Jumlah replikasi bootstrap
hasil_simulasi <- data.frame()

# Loop untuk setiap konfigurasi
for(i in 1:nrow(konfigurasi)) {
  
  n_aktual <- konfigurasi$n[i]
  d_aktual <- konfigurasi$d[i]
  
  # Mengambil satu sampel acak dari populasi dengan ukuran n
  sampel_aktual <- sample(populasi, size = n_aktual, replace = FALSE)
  mean_sampel <- mean(sampel_aktual)
  
  # Melakukan bootstrap
  hasil_bootstrap <- bootstrap_simulasi(sampel_aktual, d = d_aktual, B = B)
  
  # Menghitung bias
  bias <- hasil_bootstrap$mean_bootstrap - parameter_populasi
  
  # Menghitung MSE dan RMSE
  mse <- mean((rep(hasil_bootstrap$mean_bootstrap, B) - parameter_populasi)^2)
  rmse <- sqrt(mse)
  
  # Menyimpan hasil
  hasil_simulasi <- rbind(hasil_simulasi, data.frame(
    n = n_aktual,
    d = d_aktual,
    persentase = konfigurasi$persentase[i],
    kategori = konfigurasi$kategori[i],
    mean_populasi = parameter_populasi,
    mean_sampel = mean_sampel,
    mean_bootstrap = hasil_bootstrap$mean_bootstrap,
    bias = bias,
    se = hasil_bootstrap$se_bootstrap,
    ci_lower = hasil_bootstrap$ci_bootstrap[1],
    ci_upper = hasil_bootstrap$ci_bootstrap[2],
    mse = mse,
    rmse = rmse
  ))
  
  # Menampilkan progress
  if(i %% 5 == 0) {
    cat(paste("Progress:", i, "dari", nrow(konfigurasi), "konfigurasi selesai\n"))
  }
}
## Progress: 5 dari 30 konfigurasi selesai
## Progress: 10 dari 30 konfigurasi selesai
## Progress: 15 dari 30 konfigurasi selesai
## Progress: 20 dari 30 konfigurasi selesai
## Progress: 25 dari 30 konfigurasi selesai
## Progress: 30 dari 30 konfigurasi selesai

Hasil Simulasi

hasil_tampilan <- hasil_simulasi
hasil_tampilan[, c("mean_populasi", "mean_sampel", "mean_bootstrap", 
                   "bias", "se", "ci_lower", "ci_upper", "mse", "rmse")] <- 
  round(hasil_tampilan[, c("mean_populasi", "mean_sampel", "mean_bootstrap", 
                           "bias", "se", "ci_lower", "ci_upper", "mse", "rmse")], 3)

print(hasil_tampilan)
##          n   d        persentase kategori mean_populasi mean_sampel
## 2.5%    10   5               50%    d < n       100.015     100.318
## 2.5%1   10   8               80%    d < n       100.015     103.327
## 2.5%2   10  10              100%    d = n       100.015      97.448
## 2.5%3   10  15              150%    d > n       100.015      99.080
## 2.5%4   10  20              200%    d > n       100.015      96.464
## 2.5%5   15   8 53.3333333333333%    d < n       100.015     104.006
## 2.5%6   15  11 73.3333333333333%    d < n       100.015     107.964
## 2.5%7   15  15              100%    d = n       100.015      98.329
## 2.5%8   15  23 153.333333333333%    d > n       100.015     106.917
## 2.5%9   15  30              200%    d > n       100.015      94.744
## 2.5%10  20  10               50%    d < n       100.015     106.000
## 2.5%11  20  15               75%    d < n       100.015      95.607
## 2.5%12  20  20              100%    d = n       100.015     100.564
## 2.5%13  20  30              150%    d > n       100.015      93.772
## 2.5%14  20  40              200%    d > n       100.015      99.071
## 2.5%15  30  15               50%    d < n       100.015      99.498
## 2.5%16  30  23 76.6666666666667%    d < n       100.015     100.754
## 2.5%17  30  30              100%    d = n       100.015      98.380
## 2.5%18  30  45              150%    d > n       100.015      99.763
## 2.5%19  30  60              200%    d > n       100.015     101.292
## 2.5%20  50  25               50%    d < n       100.015      98.223
## 2.5%21  50  38               76%    d < n       100.015     103.024
## 2.5%22  50  50              100%    d = n       100.015     102.254
## 2.5%23  50  75              150%    d > n       100.015     102.363
## 2.5%24  50 100              200%    d > n       100.015      98.856
## 2.5%25 100  50               50%    d < n       100.015     100.995
## 2.5%26 100  75               75%    d < n       100.015      99.893
## 2.5%27 100 100              100%    d = n       100.015      98.232
## 2.5%28 100 150              150%    d > n       100.015     101.309
## 2.5%29 100 200              200%    d > n       100.015     102.961
##        mean_bootstrap   bias    se ci_lower ci_upper    mse  rmse
## 2.5%          100.243  0.228 6.127   89.494  113.723  0.052 0.228
## 2.5%1         103.342  3.327 6.356   91.419  116.400 11.069 3.327
## 2.5%2          97.791 -2.223 4.978   88.630  107.988  4.943 2.223
## 2.5%3          99.069 -0.946 3.117   93.141  105.459  0.895 0.946
## 2.5%4          96.398 -3.617 3.057   90.668  102.473 13.079 3.617
## 2.5%5         104.202  4.187 4.443   95.939  113.301 17.534 4.187
## 2.5%6         107.960  7.946 4.700   98.864  117.659 63.133 7.946
## 2.5%7          98.278 -1.736 3.329   91.274  104.282  3.014 1.736
## 2.5%8         106.978  6.963 3.547  100.441  114.380 48.482 6.963
## 2.5%9          94.785 -5.230 2.389   89.933   99.148 27.353 5.230
## 2.5%10        105.944  5.930 5.567   95.686  117.141 35.160 5.930
## 2.5%11         95.563 -4.452 2.975   90.249  101.797 19.820 4.452
## 2.5%12        100.626  0.612 2.959   95.102  106.300  0.374 0.612
## 2.5%13         93.724 -6.291 1.998   89.834   97.669 39.571 6.291
## 2.5%14         99.072 -0.943 3.066   93.044  104.885  0.889 0.943
## 2.5%15         99.631 -0.384 4.014   91.999  107.196  0.148 0.384
## 2.5%16        100.727  0.712 3.073   94.697  107.161  0.507 0.712
## 2.5%17         98.332 -1.683 2.505   93.408  103.038  2.833 1.683
## 2.5%18         99.880 -0.134 2.006   95.823  103.722  0.018 0.134
## 2.5%19        101.318  1.304 1.649   98.244  104.638  1.700 1.304
## 2.5%20         98.318 -1.697 3.035   92.466  103.976  2.879 1.697
## 2.5%21        103.074  3.059 2.325   98.481  107.658  9.360 3.059
## 2.5%22        102.152  2.138 2.099   98.100  106.395  4.570 2.138
## 2.5%23        102.431  2.417 1.628   99.277  105.664  5.841 2.417
## 2.5%24         98.721 -1.294 1.639   95.472  101.971  1.674 1.294
## 2.5%25        100.971  0.957 2.324   96.361  105.717  0.915 0.957
## 2.5%26         99.949 -0.065 1.648   96.759  103.191  0.004 0.065
## 2.5%27         98.219 -1.795 1.367   95.469  100.818  3.224 1.795
## 2.5%28        101.347  1.332 1.234   98.854  103.704  1.774 1.332
## 2.5%29        102.987  2.973 0.962  101.094  104.823  8.836 2.973

Perbandingan Hasil Simulasi

analisis_kategori <- aggregate(
  cbind(bias, se, mse, rmse) ~ kategori,
  data = hasil_simulasi,
  FUN = function(x) round(mean(abs(x)), 4)
)

print(analisis_kategori)
##   kategori   bias     se     mse   rmse
## 1    d < n 2.7453 3.8822 13.3818 2.7453
## 2    d = n 1.6979 2.8729  3.1595 1.6979
## 3    d > n 2.7868 2.1911 12.5094 2.7868
for(n_uniq in unique(hasil_simulasi$n)) {
  cat(paste("\n--- n =", n_uniq, "---\n"))
  
  data_n <- subset(hasil_simulasi, n == n_uniq)
  data_n[, c("mean_bootstrap", "bias", "se")] <- 
    round(data_n[, c("mean_bootstrap", "bias", "se")], 3)
  
  print(data_n[, c("d", "persentase", "kategori", "mean_bootstrap", "bias", "se")])
}
## 
## --- n = 10 ---
##        d persentase kategori mean_bootstrap   bias    se
## 2.5%   5        50%    d < n        100.243  0.228 6.127
## 2.5%1  8        80%    d < n        103.342  3.327 6.356
## 2.5%2 10       100%    d = n         97.791 -2.223 4.978
## 2.5%3 15       150%    d > n         99.069 -0.946 3.117
## 2.5%4 20       200%    d > n         96.398 -3.617 3.057
## 
## --- n = 15 ---
##        d        persentase kategori mean_bootstrap   bias    se
## 2.5%5  8 53.3333333333333%    d < n        104.202  4.187 4.443
## 2.5%6 11 73.3333333333333%    d < n        107.960  7.946 4.700
## 2.5%7 15              100%    d = n         98.278 -1.736 3.329
## 2.5%8 23 153.333333333333%    d > n        106.978  6.963 3.547
## 2.5%9 30              200%    d > n         94.785 -5.230 2.389
## 
## --- n = 20 ---
##         d persentase kategori mean_bootstrap   bias    se
## 2.5%10 10        50%    d < n        105.944  5.930 5.567
## 2.5%11 15        75%    d < n         95.563 -4.452 2.975
## 2.5%12 20       100%    d = n        100.626  0.612 2.959
## 2.5%13 30       150%    d > n         93.724 -6.291 1.998
## 2.5%14 40       200%    d > n         99.072 -0.943 3.066
## 
## --- n = 30 ---
##         d        persentase kategori mean_bootstrap   bias    se
## 2.5%15 15               50%    d < n         99.631 -0.384 4.014
## 2.5%16 23 76.6666666666667%    d < n        100.727  0.712 3.073
## 2.5%17 30              100%    d = n         98.332 -1.683 2.505
## 2.5%18 45              150%    d > n         99.880 -0.134 2.006
## 2.5%19 60              200%    d > n        101.318  1.304 1.649
## 
## --- n = 50 ---
##          d persentase kategori mean_bootstrap   bias    se
## 2.5%20  25        50%    d < n         98.318 -1.697 3.035
## 2.5%21  38        76%    d < n        103.074  3.059 2.325
## 2.5%22  50       100%    d = n        102.152  2.138 2.099
## 2.5%23  75       150%    d > n        102.431  2.417 1.628
## 2.5%24 100       200%    d > n         98.721 -1.294 1.639
## 
## --- n = 100 ---
##          d persentase kategori mean_bootstrap   bias    se
## 2.5%25  50        50%    d < n        100.971  0.957 2.324
## 2.5%26  75        75%    d < n         99.949 -0.065 1.648
## 2.5%27 100       100%    d = n         98.219 -1.795 1.367
## 2.5%28 150       150%    d > n        101.347  1.332 1.234
## 2.5%29 200       200%    d > n        102.987  2.973 0.962

Perbandingan Berdasarkan Rersentase d/n

# Mengelompokkan berdasarkan interval persentase
hasil_simulasi$kelompok_persen <- cut(
  hasil_simulasi$d / hasil_simulasi$n * 100,
  breaks = c(0, 75, 100, 150, Inf),
  labels = c("< 75%", "75-100%", "100-150%", "> 150%")
)

ringkasan_persen <- aggregate(
  cbind(bias, se, rmse) ~ kelompok_persen,
  data = hasil_simulasi,
  FUN = function(x) round(mean(abs(x)), 4)
)

print(ringkasan_persen)
##   kelompok_persen   bias     se   rmse
## 1           < 75% 2.8718 3.8703 2.8718
## 2         75-100% 1.9206 3.2212 1.9206
## 3        100-150% 2.2239 1.9965 2.2239
## 4          > 150% 3.1889 2.3300 3.1889

Perbandingan Akhir

# Membuat tabel dengan format yang rapi
tabel_lengkap <- hasil_simulasi[, c("n", "d", "persentase", "kategori", 
                                     "mean_bootstrap", "bias", "se", "rmse")]
tabel_lengkap[, 5:8] <- round(tabel_lengkap[, 5:8], 3)

# Mengurutkan berdasarkan n dan d
tabel_lengkap <- tabel_lengkap[order(tabel_lengkap$n, tabel_lengkap$d), ]

print(tabel_lengkap)
##          n   d        persentase kategori mean_bootstrap   bias    se  rmse
## 2.5%    10   5               50%    d < n        100.243  0.228 6.127 0.228
## 2.5%1   10   8               80%    d < n        103.342  3.327 6.356 3.327
## 2.5%2   10  10              100%    d = n         97.791 -2.223 4.978 2.223
## 2.5%3   10  15              150%    d > n         99.069 -0.946 3.117 0.946
## 2.5%4   10  20              200%    d > n         96.398 -3.617 3.057 3.617
## 2.5%5   15   8 53.3333333333333%    d < n        104.202  4.187 4.443 4.187
## 2.5%6   15  11 73.3333333333333%    d < n        107.960  7.946 4.700 7.946
## 2.5%7   15  15              100%    d = n         98.278 -1.736 3.329 1.736
## 2.5%8   15  23 153.333333333333%    d > n        106.978  6.963 3.547 6.963
## 2.5%9   15  30              200%    d > n         94.785 -5.230 2.389 5.230
## 2.5%10  20  10               50%    d < n        105.944  5.930 5.567 5.930
## 2.5%11  20  15               75%    d < n         95.563 -4.452 2.975 4.452
## 2.5%12  20  20              100%    d = n        100.626  0.612 2.959 0.612
## 2.5%13  20  30              150%    d > n         93.724 -6.291 1.998 6.291
## 2.5%14  20  40              200%    d > n         99.072 -0.943 3.066 0.943
## 2.5%15  30  15               50%    d < n         99.631 -0.384 4.014 0.384
## 2.5%16  30  23 76.6666666666667%    d < n        100.727  0.712 3.073 0.712
## 2.5%17  30  30              100%    d = n         98.332 -1.683 2.505 1.683
## 2.5%18  30  45              150%    d > n         99.880 -0.134 2.006 0.134
## 2.5%19  30  60              200%    d > n        101.318  1.304 1.649 1.304
## 2.5%20  50  25               50%    d < n         98.318 -1.697 3.035 1.697
## 2.5%21  50  38               76%    d < n        103.074  3.059 2.325 3.059
## 2.5%22  50  50              100%    d = n        102.152  2.138 2.099 2.138
## 2.5%23  50  75              150%    d > n        102.431  2.417 1.628 2.417
## 2.5%24  50 100              200%    d > n         98.721 -1.294 1.639 1.294
## 2.5%25 100  50               50%    d < n        100.971  0.957 2.324 0.957
## 2.5%26 100  75               75%    d < n         99.949 -0.065 1.648 0.065
## 2.5%27 100 100              100%    d = n         98.219 -1.795 1.367 1.795
## 2.5%28 100 150              150%    d > n        101.347  1.332 1.234 1.332
## 2.5%29 100 200              200%    d > n        102.987  2.973 0.962 2.973

Kesimpulan

cat("Berdasarkan hasil simulasi bootstrap:\n\n")
## Berdasarkan hasil simulasi bootstrap:
cat("1. d < n (bootstrap dengan sampel lebih kecil):\n")
## 1. d < n (bootstrap dengan sampel lebih kecil):
cat("   - Bias:", round(mean(abs(subset(hasil_simulasi, kategori == "d < n")$bias)), 4), "\n")
##    - Bias: 2.7453
cat("   - Standard Error:", round(mean(subset(hasil_simulasi, kategori == "d < n")$se), 4), "\n\n")
##    - Standard Error: 3.8822
cat("2. d = n (bootstrap standard):\n")
## 2. d = n (bootstrap standard):
cat("   - Bias:", round(mean(abs(subset(hasil_simulasi, kategori == "d = n")$bias)), 4), "\n")
##    - Bias: 1.6979
cat("   - Standard Error:", round(mean(subset(hasil_simulasi, kategori == "d = n")$se), 4), "\n\n")
##    - Standard Error: 2.8729
cat("3. d > n (bootstrap dengan sampel lebih besar):\n")
## 3. d > n (bootstrap dengan sampel lebih besar):
cat("   - Bias:", round(mean(abs(subset(hasil_simulasi, kategori == "d > n")$bias)), 4), "\n")
##    - Bias: 2.7868
cat("   - Standard Error:", round(mean(subset(hasil_simulasi, kategori == "d > n")$se), 4), "\n")
##    - Standard Error: 2.1911