No. 3 Bootstrap
Data
set.seed(123)
# Membuat data populasi berdistribusi normal (N=100.000)
populasi <- rnorm(100000, mean = 100, sd = 15)
parameter_populasi <- mean(populasi) # Nilai parameter yang akan diestimasi
# Menentukan konfigurasi sesuai tabel soal
konfigurasi <- data.frame(
n = c(10, 10, 10, 10, 10, # n=10
15, 15, 15, 15, 15, # n=15
20, 20, 20, 20, 20, # n=20
30, 30, 30, 30, 30, # n=30
50, 50, 50, 50, 50, # n=50
100, 100, 100, 100, 100), # n=100
d = c(5, 8, 10, 15, 20, # d untuk n=10
8, 11, 15, 23, 30, # d untuk n=15
10, 15, 20, 30, 40, # d untuk n=20
15, 23, 30, 45, 60, # d untuk n=30
25, 38, 50, 75, 100, # d untuk n=50
50, 75, 100, 150, 200) # d untuk n=100
)
# Menambahkan kolom kategori perbandingan d vs n
konfigurasi$kategori <- ifelse(konfigurasi$d < konfigurasi$n, "d < n",
ifelse(konfigurasi$d == konfigurasi$n, "d = n", "d > n"))
# Menambahkan kolom persentase d/n
konfigurasi$persentase <- paste0(konfigurasi$d/konfigurasi$n * 100, "%")
# Menampilkan konfigurasi
print(konfigurasi)## n d kategori persentase
## 1 10 5 d < n 50%
## 2 10 8 d < n 80%
## 3 10 10 d = n 100%
## 4 10 15 d > n 150%
## 5 10 20 d > n 200%
## 6 15 8 d < n 53.3333333333333%
## 7 15 11 d < n 73.3333333333333%
## 8 15 15 d = n 100%
## 9 15 23 d > n 153.333333333333%
## 10 15 30 d > n 200%
## 11 20 10 d < n 50%
## 12 20 15 d < n 75%
## 13 20 20 d = n 100%
## 14 20 30 d > n 150%
## 15 20 40 d > n 200%
## 16 30 15 d < n 50%
## 17 30 23 d < n 76.6666666666667%
## 18 30 30 d = n 100%
## 19 30 45 d > n 150%
## 20 30 60 d > n 200%
## 21 50 25 d < n 50%
## 22 50 38 d < n 76%
## 23 50 50 d = n 100%
## 24 50 75 d > n 150%
## 25 50 100 d > n 200%
## 26 100 50 d < n 50%
## 27 100 75 d < n 75%
## 28 100 100 d = n 100%
## 29 100 150 d > n 150%
## 30 100 200 d > n 200%
Fungsi Bootstrap
bootstrap_simulasi <- function(data_sampel, d, B = 1000) {
# Fungsi untuk melakukan bootstrap dengan ukuran sampel d
# Input:
# data_sampel: vektor data sampel asli
# d: ukuran sampel bootstrap
# B: jumlah replikasi bootstrap
bootstrap_means <- numeric(B)
for(i in 1:B) {
# Mengambil sampel bootstrap dengan ukuran d (dengan pengembalian)
sampel_bootstrap <- sample(data_sampel, size = d, replace = TRUE)
bootstrap_means[i] <- mean(sampel_bootstrap)
}
# Menghitung statistik
hasil <- list(
mean_bootstrap = mean(bootstrap_means),
se_bootstrap = sd(bootstrap_means),
ci_bootstrap = quantile(bootstrap_means, c(0.025, 0.975))
)
return(hasil)
}Simulasi Bootstrap
B <- 1000 # Jumlah replikasi bootstrap
hasil_simulasi <- data.frame()
# Loop untuk setiap konfigurasi
for(i in 1:nrow(konfigurasi)) {
n_aktual <- konfigurasi$n[i]
d_aktual <- konfigurasi$d[i]
# Mengambil satu sampel acak dari populasi dengan ukuran n
sampel_aktual <- sample(populasi, size = n_aktual, replace = FALSE)
mean_sampel <- mean(sampel_aktual)
# Melakukan bootstrap
hasil_bootstrap <- bootstrap_simulasi(sampel_aktual, d = d_aktual, B = B)
# Menghitung bias
bias <- hasil_bootstrap$mean_bootstrap - parameter_populasi
# Menghitung MSE dan RMSE
mse <- mean((rep(hasil_bootstrap$mean_bootstrap, B) - parameter_populasi)^2)
rmse <- sqrt(mse)
# Menyimpan hasil
hasil_simulasi <- rbind(hasil_simulasi, data.frame(
n = n_aktual,
d = d_aktual,
persentase = konfigurasi$persentase[i],
kategori = konfigurasi$kategori[i],
mean_populasi = parameter_populasi,
mean_sampel = mean_sampel,
mean_bootstrap = hasil_bootstrap$mean_bootstrap,
bias = bias,
se = hasil_bootstrap$se_bootstrap,
ci_lower = hasil_bootstrap$ci_bootstrap[1],
ci_upper = hasil_bootstrap$ci_bootstrap[2],
mse = mse,
rmse = rmse
))
# Menampilkan progress
if(i %% 5 == 0) {
cat(paste("Progress:", i, "dari", nrow(konfigurasi), "konfigurasi selesai\n"))
}
}## Progress: 5 dari 30 konfigurasi selesai
## Progress: 10 dari 30 konfigurasi selesai
## Progress: 15 dari 30 konfigurasi selesai
## Progress: 20 dari 30 konfigurasi selesai
## Progress: 25 dari 30 konfigurasi selesai
## Progress: 30 dari 30 konfigurasi selesai
Hasil Simulasi
hasil_tampilan <- hasil_simulasi
hasil_tampilan[, c("mean_populasi", "mean_sampel", "mean_bootstrap",
"bias", "se", "ci_lower", "ci_upper", "mse", "rmse")] <-
round(hasil_tampilan[, c("mean_populasi", "mean_sampel", "mean_bootstrap",
"bias", "se", "ci_lower", "ci_upper", "mse", "rmse")], 3)
print(hasil_tampilan)## n d persentase kategori mean_populasi mean_sampel
## 2.5% 10 5 50% d < n 100.015 100.318
## 2.5%1 10 8 80% d < n 100.015 103.327
## 2.5%2 10 10 100% d = n 100.015 97.448
## 2.5%3 10 15 150% d > n 100.015 99.080
## 2.5%4 10 20 200% d > n 100.015 96.464
## 2.5%5 15 8 53.3333333333333% d < n 100.015 104.006
## 2.5%6 15 11 73.3333333333333% d < n 100.015 107.964
## 2.5%7 15 15 100% d = n 100.015 98.329
## 2.5%8 15 23 153.333333333333% d > n 100.015 106.917
## 2.5%9 15 30 200% d > n 100.015 94.744
## 2.5%10 20 10 50% d < n 100.015 106.000
## 2.5%11 20 15 75% d < n 100.015 95.607
## 2.5%12 20 20 100% d = n 100.015 100.564
## 2.5%13 20 30 150% d > n 100.015 93.772
## 2.5%14 20 40 200% d > n 100.015 99.071
## 2.5%15 30 15 50% d < n 100.015 99.498
## 2.5%16 30 23 76.6666666666667% d < n 100.015 100.754
## 2.5%17 30 30 100% d = n 100.015 98.380
## 2.5%18 30 45 150% d > n 100.015 99.763
## 2.5%19 30 60 200% d > n 100.015 101.292
## 2.5%20 50 25 50% d < n 100.015 98.223
## 2.5%21 50 38 76% d < n 100.015 103.024
## 2.5%22 50 50 100% d = n 100.015 102.254
## 2.5%23 50 75 150% d > n 100.015 102.363
## 2.5%24 50 100 200% d > n 100.015 98.856
## 2.5%25 100 50 50% d < n 100.015 100.995
## 2.5%26 100 75 75% d < n 100.015 99.893
## 2.5%27 100 100 100% d = n 100.015 98.232
## 2.5%28 100 150 150% d > n 100.015 101.309
## 2.5%29 100 200 200% d > n 100.015 102.961
## mean_bootstrap bias se ci_lower ci_upper mse rmse
## 2.5% 100.243 0.228 6.127 89.494 113.723 0.052 0.228
## 2.5%1 103.342 3.327 6.356 91.419 116.400 11.069 3.327
## 2.5%2 97.791 -2.223 4.978 88.630 107.988 4.943 2.223
## 2.5%3 99.069 -0.946 3.117 93.141 105.459 0.895 0.946
## 2.5%4 96.398 -3.617 3.057 90.668 102.473 13.079 3.617
## 2.5%5 104.202 4.187 4.443 95.939 113.301 17.534 4.187
## 2.5%6 107.960 7.946 4.700 98.864 117.659 63.133 7.946
## 2.5%7 98.278 -1.736 3.329 91.274 104.282 3.014 1.736
## 2.5%8 106.978 6.963 3.547 100.441 114.380 48.482 6.963
## 2.5%9 94.785 -5.230 2.389 89.933 99.148 27.353 5.230
## 2.5%10 105.944 5.930 5.567 95.686 117.141 35.160 5.930
## 2.5%11 95.563 -4.452 2.975 90.249 101.797 19.820 4.452
## 2.5%12 100.626 0.612 2.959 95.102 106.300 0.374 0.612
## 2.5%13 93.724 -6.291 1.998 89.834 97.669 39.571 6.291
## 2.5%14 99.072 -0.943 3.066 93.044 104.885 0.889 0.943
## 2.5%15 99.631 -0.384 4.014 91.999 107.196 0.148 0.384
## 2.5%16 100.727 0.712 3.073 94.697 107.161 0.507 0.712
## 2.5%17 98.332 -1.683 2.505 93.408 103.038 2.833 1.683
## 2.5%18 99.880 -0.134 2.006 95.823 103.722 0.018 0.134
## 2.5%19 101.318 1.304 1.649 98.244 104.638 1.700 1.304
## 2.5%20 98.318 -1.697 3.035 92.466 103.976 2.879 1.697
## 2.5%21 103.074 3.059 2.325 98.481 107.658 9.360 3.059
## 2.5%22 102.152 2.138 2.099 98.100 106.395 4.570 2.138
## 2.5%23 102.431 2.417 1.628 99.277 105.664 5.841 2.417
## 2.5%24 98.721 -1.294 1.639 95.472 101.971 1.674 1.294
## 2.5%25 100.971 0.957 2.324 96.361 105.717 0.915 0.957
## 2.5%26 99.949 -0.065 1.648 96.759 103.191 0.004 0.065
## 2.5%27 98.219 -1.795 1.367 95.469 100.818 3.224 1.795
## 2.5%28 101.347 1.332 1.234 98.854 103.704 1.774 1.332
## 2.5%29 102.987 2.973 0.962 101.094 104.823 8.836 2.973
Perbandingan Hasil Simulasi
analisis_kategori <- aggregate(
cbind(bias, se, mse, rmse) ~ kategori,
data = hasil_simulasi,
FUN = function(x) round(mean(abs(x)), 4)
)
print(analisis_kategori)## kategori bias se mse rmse
## 1 d < n 2.7453 3.8822 13.3818 2.7453
## 2 d = n 1.6979 2.8729 3.1595 1.6979
## 3 d > n 2.7868 2.1911 12.5094 2.7868
for(n_uniq in unique(hasil_simulasi$n)) {
cat(paste("\n--- n =", n_uniq, "---\n"))
data_n <- subset(hasil_simulasi, n == n_uniq)
data_n[, c("mean_bootstrap", "bias", "se")] <-
round(data_n[, c("mean_bootstrap", "bias", "se")], 3)
print(data_n[, c("d", "persentase", "kategori", "mean_bootstrap", "bias", "se")])
}##
## --- n = 10 ---
## d persentase kategori mean_bootstrap bias se
## 2.5% 5 50% d < n 100.243 0.228 6.127
## 2.5%1 8 80% d < n 103.342 3.327 6.356
## 2.5%2 10 100% d = n 97.791 -2.223 4.978
## 2.5%3 15 150% d > n 99.069 -0.946 3.117
## 2.5%4 20 200% d > n 96.398 -3.617 3.057
##
## --- n = 15 ---
## d persentase kategori mean_bootstrap bias se
## 2.5%5 8 53.3333333333333% d < n 104.202 4.187 4.443
## 2.5%6 11 73.3333333333333% d < n 107.960 7.946 4.700
## 2.5%7 15 100% d = n 98.278 -1.736 3.329
## 2.5%8 23 153.333333333333% d > n 106.978 6.963 3.547
## 2.5%9 30 200% d > n 94.785 -5.230 2.389
##
## --- n = 20 ---
## d persentase kategori mean_bootstrap bias se
## 2.5%10 10 50% d < n 105.944 5.930 5.567
## 2.5%11 15 75% d < n 95.563 -4.452 2.975
## 2.5%12 20 100% d = n 100.626 0.612 2.959
## 2.5%13 30 150% d > n 93.724 -6.291 1.998
## 2.5%14 40 200% d > n 99.072 -0.943 3.066
##
## --- n = 30 ---
## d persentase kategori mean_bootstrap bias se
## 2.5%15 15 50% d < n 99.631 -0.384 4.014
## 2.5%16 23 76.6666666666667% d < n 100.727 0.712 3.073
## 2.5%17 30 100% d = n 98.332 -1.683 2.505
## 2.5%18 45 150% d > n 99.880 -0.134 2.006
## 2.5%19 60 200% d > n 101.318 1.304 1.649
##
## --- n = 50 ---
## d persentase kategori mean_bootstrap bias se
## 2.5%20 25 50% d < n 98.318 -1.697 3.035
## 2.5%21 38 76% d < n 103.074 3.059 2.325
## 2.5%22 50 100% d = n 102.152 2.138 2.099
## 2.5%23 75 150% d > n 102.431 2.417 1.628
## 2.5%24 100 200% d > n 98.721 -1.294 1.639
##
## --- n = 100 ---
## d persentase kategori mean_bootstrap bias se
## 2.5%25 50 50% d < n 100.971 0.957 2.324
## 2.5%26 75 75% d < n 99.949 -0.065 1.648
## 2.5%27 100 100% d = n 98.219 -1.795 1.367
## 2.5%28 150 150% d > n 101.347 1.332 1.234
## 2.5%29 200 200% d > n 102.987 2.973 0.962
Perbandingan Berdasarkan Rersentase d/n
# Mengelompokkan berdasarkan interval persentase
hasil_simulasi$kelompok_persen <- cut(
hasil_simulasi$d / hasil_simulasi$n * 100,
breaks = c(0, 75, 100, 150, Inf),
labels = c("< 75%", "75-100%", "100-150%", "> 150%")
)
ringkasan_persen <- aggregate(
cbind(bias, se, rmse) ~ kelompok_persen,
data = hasil_simulasi,
FUN = function(x) round(mean(abs(x)), 4)
)
print(ringkasan_persen)## kelompok_persen bias se rmse
## 1 < 75% 2.8718 3.8703 2.8718
## 2 75-100% 1.9206 3.2212 1.9206
## 3 100-150% 2.2239 1.9965 2.2239
## 4 > 150% 3.1889 2.3300 3.1889
Perbandingan Akhir
# Membuat tabel dengan format yang rapi
tabel_lengkap <- hasil_simulasi[, c("n", "d", "persentase", "kategori",
"mean_bootstrap", "bias", "se", "rmse")]
tabel_lengkap[, 5:8] <- round(tabel_lengkap[, 5:8], 3)
# Mengurutkan berdasarkan n dan d
tabel_lengkap <- tabel_lengkap[order(tabel_lengkap$n, tabel_lengkap$d), ]
print(tabel_lengkap)## n d persentase kategori mean_bootstrap bias se rmse
## 2.5% 10 5 50% d < n 100.243 0.228 6.127 0.228
## 2.5%1 10 8 80% d < n 103.342 3.327 6.356 3.327
## 2.5%2 10 10 100% d = n 97.791 -2.223 4.978 2.223
## 2.5%3 10 15 150% d > n 99.069 -0.946 3.117 0.946
## 2.5%4 10 20 200% d > n 96.398 -3.617 3.057 3.617
## 2.5%5 15 8 53.3333333333333% d < n 104.202 4.187 4.443 4.187
## 2.5%6 15 11 73.3333333333333% d < n 107.960 7.946 4.700 7.946
## 2.5%7 15 15 100% d = n 98.278 -1.736 3.329 1.736
## 2.5%8 15 23 153.333333333333% d > n 106.978 6.963 3.547 6.963
## 2.5%9 15 30 200% d > n 94.785 -5.230 2.389 5.230
## 2.5%10 20 10 50% d < n 105.944 5.930 5.567 5.930
## 2.5%11 20 15 75% d < n 95.563 -4.452 2.975 4.452
## 2.5%12 20 20 100% d = n 100.626 0.612 2.959 0.612
## 2.5%13 20 30 150% d > n 93.724 -6.291 1.998 6.291
## 2.5%14 20 40 200% d > n 99.072 -0.943 3.066 0.943
## 2.5%15 30 15 50% d < n 99.631 -0.384 4.014 0.384
## 2.5%16 30 23 76.6666666666667% d < n 100.727 0.712 3.073 0.712
## 2.5%17 30 30 100% d = n 98.332 -1.683 2.505 1.683
## 2.5%18 30 45 150% d > n 99.880 -0.134 2.006 0.134
## 2.5%19 30 60 200% d > n 101.318 1.304 1.649 1.304
## 2.5%20 50 25 50% d < n 98.318 -1.697 3.035 1.697
## 2.5%21 50 38 76% d < n 103.074 3.059 2.325 3.059
## 2.5%22 50 50 100% d = n 102.152 2.138 2.099 2.138
## 2.5%23 50 75 150% d > n 102.431 2.417 1.628 2.417
## 2.5%24 50 100 200% d > n 98.721 -1.294 1.639 1.294
## 2.5%25 100 50 50% d < n 100.971 0.957 2.324 0.957
## 2.5%26 100 75 75% d < n 99.949 -0.065 1.648 0.065
## 2.5%27 100 100 100% d = n 98.219 -1.795 1.367 1.795
## 2.5%28 100 150 150% d > n 101.347 1.332 1.234 1.332
## 2.5%29 100 200 200% d > n 102.987 2.973 0.962 2.973
Kesimpulan
## Berdasarkan hasil simulasi bootstrap:
## 1. d < n (bootstrap dengan sampel lebih kecil):
## - Bias: 2.7453
## - Standard Error: 3.8822
## 2. d = n (bootstrap standard):
## - Bias: 1.6979
## - Standard Error: 2.8729
## 3. d > n (bootstrap dengan sampel lebih besar):
## - Bias: 2.7868
## - Standard Error: 2.1911