# ============================================================
# Apakah Bootstrap Tetap Andal Ketika Data Tidak Normal? Evaluasi melalui Simulasi Berbagai Distribusi Probabilitas
# ============================================================
rm(list = ls())
set.seed(123)
# Parameter simulasi
ukuran_sampel <- c(20, 50, 100)
B <- 200 # jumlah bootstrap
R <- 50 # jumlah pengulangan simulasi Monte Carlo
alpha <- 0.05
# Fungsi pembangkit data
buat_data <- function(distribusi, n) {
if (distribusi == "Normal") {
return(rnorm(n, mean = 50, sd = 10))
}
if (distribusi == "Eksponensial") {
return(rexp(n, rate = 1/50))
}
if (distribusi == "Gamma") {
return(rgamma(n, shape = 2, scale = 25))
}
if (distribusi == "Chi_Square") {
return(10 * rchisq(n, df = 5))
}
}
# Parameter populasi
parameter_teoritis <- c(
Normal = 50,
Eksponensial = 50,
Gamma = 50,
Chi_Square = 50
)
# Fungsi bootstrap
bootstrap_mean <- function(data, B, alpha) {
n <- length(data)
hasil_boot <- replicate(
B,
mean(sample(data, size = n, replace = TRUE))
)
ci <- quantile(
hasil_boot,
probs = c(alpha / 2, 1 - alpha / 2),
na.rm = TRUE
)
return(data.frame(
mean_sampel = mean(data),
mean_bootstrap = mean(hasil_boot),
bias = mean(hasil_boot) - mean(data),
standard_error = sd(hasil_boot),
ci_lower = as.numeric(ci[1]),
ci_upper = as.numeric(ci[2]),
lebar_ci = as.numeric(ci[2] - ci[1])
))
}
# Daftar distribusi
daftar_distribusi <- c("Normal", "Eksponensial", "Gamma", "Chi_Square")
# Tempat menyimpan hasil akhir
hasil_akhir <- data.frame()
# Proses simulasi
for (dist in daftar_distribusi) {
for (n in ukuran_sampel) {
bias_simpan <- numeric(R)
se_simpan <- numeric(R)
lebar_ci_simpan <- numeric(R)
coverage_simpan <- numeric(R)
for (i in 1:R) {
# Ambil sampel acak, tetapi tidak ditampilkan
sampel_acak <- buat_data(dist, n)
hasil_boot <- bootstrap_mean(sampel_acak, B, alpha)
bias_simpan[i] <- hasil_boot$bias
se_simpan[i] <- hasil_boot$standard_error
lebar_ci_simpan[i] <- hasil_boot$lebar_ci
coverage_simpan[i] <- ifelse(
parameter_teoritis[dist] >= hasil_boot$ci_lower &&
parameter_teoritis[dist] <= hasil_boot$ci_upper,
1, 0
)
}
hasil_akhir <- rbind(
hasil_akhir,
data.frame(
Distribusi = dist,
Ukuran_Sampel = n,
Parameter_Populasi = parameter_teoritis[dist],
Rata_Rata_Bias = mean(bias_simpan, na.rm = TRUE),
Rata_Rata_Standard_Error = mean(se_simpan, na.rm = TRUE),
Rata_Rata_Lebar_CI = mean(lebar_ci_simpan, na.rm = TRUE),
Coverage_Probability = mean(coverage_simpan, na.rm = TRUE)
)
)
}
}
# Membulatkan hasil
hasil_akhir$Parameter_Populasi <- round(hasil_akhir$Parameter_Populasi, 4)
hasil_akhir$Rata_Rata_Bias <- round(hasil_akhir$Rata_Rata_Bias, 4)
hasil_akhir$Rata_Rata_Standard_Error <- round(hasil_akhir$Rata_Rata_Standard_Error, 4)
hasil_akhir$Rata_Rata_Lebar_CI <- round(hasil_akhir$Rata_Rata_Lebar_CI, 4)
hasil_akhir$Coverage_Probability <- round(hasil_akhir$Coverage_Probability, 4)
# Tampilkan tabel hasil
print(hasil_akhir)
## Distribusi Ukuran_Sampel Parameter_Populasi Rata_Rata_Bias
## Normal Normal 20 50 -0.0044
## Normal1 Normal 50 50 -0.0033
## Normal2 Normal 100 50 -0.0019
## Eksponensial Eksponensial 20 50 0.0019
## Eksponensial1 Eksponensial 50 50 -0.0671
## Eksponensial2 Eksponensial 100 50 -0.0504
## Gamma Gamma 20 50 -0.0393
## Gamma1 Gamma 50 50 -0.0336
## Gamma2 Gamma 100 50 0.0008
## Chi_Square Chi_Square 20 50 -0.0249
## Chi_Square1 Chi_Square 50 50 0.0698
## Chi_Square2 Chi_Square 100 50 0.0671
## Rata_Rata_Standard_Error Rata_Rata_Lebar_CI Coverage_Probability
## Normal 2.1446 8.2705 0.98
## Normal1 1.4501 5.5036 0.98
## Normal2 0.9893 3.7826 0.98
## Eksponensial 10.8657 41.8065 0.92
## Eksponensial1 6.8643 26.1482 0.94
## Eksponensial2 4.9775 18.9821 0.92
## Gamma 7.2934 27.8756 0.90
## Gamma1 4.6109 17.7006 0.94
## Gamma2 3.4760 13.3195 0.96
## Chi_Square 6.3278 23.9297 0.88
## Chi_Square1 4.4403 17.0125 0.92
## Chi_Square2 3.2471 12.5205 0.94
# Simpan ke CSV
write.csv(
hasil_akhir,
"hasil_akhir_simulasi_bootstrap.csv",
row.names = FALSE
)
# Cek data grafik
str(hasil_akhir)
## 'data.frame': 12 obs. of 7 variables:
## $ Distribusi : chr "Normal" "Normal" "Normal" "Eksponensial" ...
## $ Ukuran_Sampel : num 20 50 100 20 50 100 20 50 100 20 ...
## $ Parameter_Populasi : num 50 50 50 50 50 50 50 50 50 50 ...
## $ Rata_Rata_Bias : num -0.0044 -0.0033 -0.0019 0.0019 -0.0671 -0.0504 -0.0393 -0.0336 0.0008 -0.0249 ...
## $ Rata_Rata_Standard_Error: num 2.145 1.45 0.989 10.866 6.864 ...
## $ Rata_Rata_Lebar_CI : num 8.27 5.5 3.78 41.81 26.15 ...
## $ Coverage_Probability : num 0.98 0.98 0.98 0.92 0.94 0.92 0.9 0.94 0.96 0.88 ...
summary(hasil_akhir)
## Distribusi Ukuran_Sampel Parameter_Populasi Rata_Rata_Bias
## Length:12 Min. : 20.00 Min. :50 Min. :-0.067100
## Class :character 1st Qu.: 20.00 1st Qu.:50 1st Qu.:-0.035025
## Mode :character Median : 50.00 Median :50 Median :-0.003850
## Mean : 56.67 Mean :50 Mean :-0.007108
## 3rd Qu.:100.00 3rd Qu.:50 3rd Qu.: 0.001075
## Max. :100.00 Max. :50 Max. : 0.069800
## Rata_Rata_Standard_Error Rata_Rata_Lebar_CI Coverage_Probability
## Min. : 0.9893 Min. : 3.783 Min. :0.8800
## 1st Qu.: 2.9715 1st Qu.:11.458 1st Qu.:0.9200
## Median : 4.5256 Median :17.357 Median :0.9400
## Mean : 4.7239 Mean :18.071 Mean :0.9383
## 3rd Qu.: 6.4619 3rd Qu.:24.484 3rd Qu.:0.9650
## Max. :10.8657 Max. :41.806 Max. :0.9800
# Label grafik
label_grafik <- paste(
hasil_akhir$Distribusi,
hasil_akhir$Ukuran_Sampel,
sep = "-"
)
# Grafik Standard Error
barplot(
height = hasil_akhir$Rata_Rata_Standard_Error,
names.arg = label_grafik,
las = 2,
main = "Rata-Rata Standard Error Bootstrap",
ylab = "Standard Error"
)

# Grafik Lebar CI
barplot(
height = hasil_akhir$Rata_Rata_Lebar_CI,
names.arg = label_grafik,
las = 2,
main = "Rata-Rata Lebar Interval Kepercayaan Bootstrap 95%",
ylab = "Lebar CI"
)

# Grafik Bias
barplot(
height = hasil_akhir$Rata_Rata_Bias,
names.arg = label_grafik,
las = 2,
main = "Rata-Rata Bias Estimasi Bootstrap",
ylab = "Bias"
)

# Grafik Coverage Probability
barplot(
height = hasil_akhir$Coverage_Probability,
names.arg = label_grafik,
las = 2,
main = "Coverage Probability Bootstrap",
ylab = "Coverage Probability"
)

hasil_akhir
## Distribusi Ukuran_Sampel Parameter_Populasi Rata_Rata_Bias
## Normal Normal 20 50 -0.0044
## Normal1 Normal 50 50 -0.0033
## Normal2 Normal 100 50 -0.0019
## Eksponensial Eksponensial 20 50 0.0019
## Eksponensial1 Eksponensial 50 50 -0.0671
## Eksponensial2 Eksponensial 100 50 -0.0504
## Gamma Gamma 20 50 -0.0393
## Gamma1 Gamma 50 50 -0.0336
## Gamma2 Gamma 100 50 0.0008
## Chi_Square Chi_Square 20 50 -0.0249
## Chi_Square1 Chi_Square 50 50 0.0698
## Chi_Square2 Chi_Square 100 50 0.0671
## Rata_Rata_Standard_Error Rata_Rata_Lebar_CI Coverage_Probability
## Normal 2.1446 8.2705 0.98
## Normal1 1.4501 5.5036 0.98
## Normal2 0.9893 3.7826 0.98
## Eksponensial 10.8657 41.8065 0.92
## Eksponensial1 6.8643 26.1482 0.94
## Eksponensial2 4.9775 18.9821 0.92
## Gamma 7.2934 27.8756 0.90
## Gamma1 4.6109 17.7006 0.94
## Gamma2 3.4760 13.3195 0.96
## Chi_Square 6.3278 23.9297 0.88
## Chi_Square1 4.4403 17.0125 0.92
## Chi_Square2 3.2471 12.5205 0.94
print(hasil_akhir)
## Distribusi Ukuran_Sampel Parameter_Populasi Rata_Rata_Bias
## Normal Normal 20 50 -0.0044
## Normal1 Normal 50 50 -0.0033
## Normal2 Normal 100 50 -0.0019
## Eksponensial Eksponensial 20 50 0.0019
## Eksponensial1 Eksponensial 50 50 -0.0671
## Eksponensial2 Eksponensial 100 50 -0.0504
## Gamma Gamma 20 50 -0.0393
## Gamma1 Gamma 50 50 -0.0336
## Gamma2 Gamma 100 50 0.0008
## Chi_Square Chi_Square 20 50 -0.0249
## Chi_Square1 Chi_Square 50 50 0.0698
## Chi_Square2 Chi_Square 100 50 0.0671
## Rata_Rata_Standard_Error Rata_Rata_Lebar_CI Coverage_Probability
## Normal 2.1446 8.2705 0.98
## Normal1 1.4501 5.5036 0.98
## Normal2 0.9893 3.7826 0.98
## Eksponensial 10.8657 41.8065 0.92
## Eksponensial1 6.8643 26.1482 0.94
## Eksponensial2 4.9775 18.9821 0.92
## Gamma 7.2934 27.8756 0.90
## Gamma1 4.6109 17.7006 0.94
## Gamma2 3.4760 13.3195 0.96
## Chi_Square 6.3278 23.9297 0.88
## Chi_Square1 4.4403 17.0125 0.92
## Chi_Square2 3.2471 12.5205 0.94