# ============================================================
# Apakah Bootstrap Tetap Andal Ketika Data Tidak Normal? Evaluasi melalui Simulasi Berbagai Distribusi Probabilitas
# ============================================================

rm(list = ls())
set.seed(123)

# Parameter simulasi
ukuran_sampel <- c(20, 50, 100)
B <- 200          # jumlah bootstrap
R <- 50           # jumlah pengulangan simulasi Monte Carlo
alpha <- 0.05

# Fungsi pembangkit data
buat_data <- function(distribusi, n) {
  if (distribusi == "Normal") {
    return(rnorm(n, mean = 50, sd = 10))
  }
  
  if (distribusi == "Eksponensial") {
    return(rexp(n, rate = 1/50))
  }
  
  if (distribusi == "Gamma") {
    return(rgamma(n, shape = 2, scale = 25))
  }
  
  if (distribusi == "Chi_Square") {
    return(10 * rchisq(n, df = 5))
  }
}

# Parameter populasi
parameter_teoritis <- c(
  Normal = 50,
  Eksponensial = 50,
  Gamma = 50,
  Chi_Square = 50
)

# Fungsi bootstrap
bootstrap_mean <- function(data, B, alpha) {
  
  n <- length(data)
  
  hasil_boot <- replicate(
    B,
    mean(sample(data, size = n, replace = TRUE))
  )
  
  ci <- quantile(
    hasil_boot,
    probs = c(alpha / 2, 1 - alpha / 2),
    na.rm = TRUE
  )
  
  return(data.frame(
    mean_sampel = mean(data),
    mean_bootstrap = mean(hasil_boot),
    bias = mean(hasil_boot) - mean(data),
    standard_error = sd(hasil_boot),
    ci_lower = as.numeric(ci[1]),
    ci_upper = as.numeric(ci[2]),
    lebar_ci = as.numeric(ci[2] - ci[1])
  ))
}

# Daftar distribusi
daftar_distribusi <- c("Normal", "Eksponensial", "Gamma", "Chi_Square")

# Tempat menyimpan hasil akhir
hasil_akhir <- data.frame()

# Proses simulasi
for (dist in daftar_distribusi) {
  
  for (n in ukuran_sampel) {
    
    bias_simpan <- numeric(R)
    se_simpan <- numeric(R)
    lebar_ci_simpan <- numeric(R)
    coverage_simpan <- numeric(R)
    
    for (i in 1:R) {
      
      # Ambil sampel acak, tetapi tidak ditampilkan
      sampel_acak <- buat_data(dist, n)
      
      hasil_boot <- bootstrap_mean(sampel_acak, B, alpha)
      
      bias_simpan[i] <- hasil_boot$bias
      se_simpan[i] <- hasil_boot$standard_error
      lebar_ci_simpan[i] <- hasil_boot$lebar_ci
      
      coverage_simpan[i] <- ifelse(
        parameter_teoritis[dist] >= hasil_boot$ci_lower &&
          parameter_teoritis[dist] <= hasil_boot$ci_upper,
        1, 0
      )
    }
    
    hasil_akhir <- rbind(
      hasil_akhir,
      data.frame(
        Distribusi = dist,
        Ukuran_Sampel = n,
        Parameter_Populasi = parameter_teoritis[dist],
        Rata_Rata_Bias = mean(bias_simpan, na.rm = TRUE),
        Rata_Rata_Standard_Error = mean(se_simpan, na.rm = TRUE),
        Rata_Rata_Lebar_CI = mean(lebar_ci_simpan, na.rm = TRUE),
        Coverage_Probability = mean(coverage_simpan, na.rm = TRUE)
      )
    )
  }
}

# Membulatkan hasil
hasil_akhir$Parameter_Populasi <- round(hasil_akhir$Parameter_Populasi, 4)
hasil_akhir$Rata_Rata_Bias <- round(hasil_akhir$Rata_Rata_Bias, 4)
hasil_akhir$Rata_Rata_Standard_Error <- round(hasil_akhir$Rata_Rata_Standard_Error, 4)
hasil_akhir$Rata_Rata_Lebar_CI <- round(hasil_akhir$Rata_Rata_Lebar_CI, 4)
hasil_akhir$Coverage_Probability <- round(hasil_akhir$Coverage_Probability, 4)

# Tampilkan tabel hasil
print(hasil_akhir)
##                 Distribusi Ukuran_Sampel Parameter_Populasi Rata_Rata_Bias
## Normal              Normal            20                 50        -0.0044
## Normal1             Normal            50                 50        -0.0033
## Normal2             Normal           100                 50        -0.0019
## Eksponensial  Eksponensial            20                 50         0.0019
## Eksponensial1 Eksponensial            50                 50        -0.0671
## Eksponensial2 Eksponensial           100                 50        -0.0504
## Gamma                Gamma            20                 50        -0.0393
## Gamma1               Gamma            50                 50        -0.0336
## Gamma2               Gamma           100                 50         0.0008
## Chi_Square      Chi_Square            20                 50        -0.0249
## Chi_Square1     Chi_Square            50                 50         0.0698
## Chi_Square2     Chi_Square           100                 50         0.0671
##               Rata_Rata_Standard_Error Rata_Rata_Lebar_CI Coverage_Probability
## Normal                          2.1446             8.2705                 0.98
## Normal1                         1.4501             5.5036                 0.98
## Normal2                         0.9893             3.7826                 0.98
## Eksponensial                   10.8657            41.8065                 0.92
## Eksponensial1                   6.8643            26.1482                 0.94
## Eksponensial2                   4.9775            18.9821                 0.92
## Gamma                           7.2934            27.8756                 0.90
## Gamma1                          4.6109            17.7006                 0.94
## Gamma2                          3.4760            13.3195                 0.96
## Chi_Square                      6.3278            23.9297                 0.88
## Chi_Square1                     4.4403            17.0125                 0.92
## Chi_Square2                     3.2471            12.5205                 0.94
# Simpan ke CSV
write.csv(
  hasil_akhir,
  "hasil_akhir_simulasi_bootstrap.csv",
  row.names = FALSE
)

# Cek data grafik
str(hasil_akhir)
## 'data.frame':    12 obs. of  7 variables:
##  $ Distribusi              : chr  "Normal" "Normal" "Normal" "Eksponensial" ...
##  $ Ukuran_Sampel           : num  20 50 100 20 50 100 20 50 100 20 ...
##  $ Parameter_Populasi      : num  50 50 50 50 50 50 50 50 50 50 ...
##  $ Rata_Rata_Bias          : num  -0.0044 -0.0033 -0.0019 0.0019 -0.0671 -0.0504 -0.0393 -0.0336 0.0008 -0.0249 ...
##  $ Rata_Rata_Standard_Error: num  2.145 1.45 0.989 10.866 6.864 ...
##  $ Rata_Rata_Lebar_CI      : num  8.27 5.5 3.78 41.81 26.15 ...
##  $ Coverage_Probability    : num  0.98 0.98 0.98 0.92 0.94 0.92 0.9 0.94 0.96 0.88 ...
summary(hasil_akhir)
##   Distribusi        Ukuran_Sampel    Parameter_Populasi Rata_Rata_Bias     
##  Length:12          Min.   : 20.00   Min.   :50         Min.   :-0.067100  
##  Class :character   1st Qu.: 20.00   1st Qu.:50         1st Qu.:-0.035025  
##  Mode  :character   Median : 50.00   Median :50         Median :-0.003850  
##                     Mean   : 56.67   Mean   :50         Mean   :-0.007108  
##                     3rd Qu.:100.00   3rd Qu.:50         3rd Qu.: 0.001075  
##                     Max.   :100.00   Max.   :50         Max.   : 0.069800  
##  Rata_Rata_Standard_Error Rata_Rata_Lebar_CI Coverage_Probability
##  Min.   : 0.9893          Min.   : 3.783     Min.   :0.8800      
##  1st Qu.: 2.9715          1st Qu.:11.458     1st Qu.:0.9200      
##  Median : 4.5256          Median :17.357     Median :0.9400      
##  Mean   : 4.7239          Mean   :18.071     Mean   :0.9383      
##  3rd Qu.: 6.4619          3rd Qu.:24.484     3rd Qu.:0.9650      
##  Max.   :10.8657          Max.   :41.806     Max.   :0.9800
# Label grafik
label_grafik <- paste(
  hasil_akhir$Distribusi,
  hasil_akhir$Ukuran_Sampel,
  sep = "-"
)

# Grafik Standard Error
barplot(
  height = hasil_akhir$Rata_Rata_Standard_Error,
  names.arg = label_grafik,
  las = 2,
  main = "Rata-Rata Standard Error Bootstrap",
  ylab = "Standard Error"
)

# Grafik Lebar CI
barplot(
  height = hasil_akhir$Rata_Rata_Lebar_CI,
  names.arg = label_grafik,
  las = 2,
  main = "Rata-Rata Lebar Interval Kepercayaan Bootstrap 95%",
  ylab = "Lebar CI"
)

# Grafik Bias
barplot(
  height = hasil_akhir$Rata_Rata_Bias,
  names.arg = label_grafik,
  las = 2,
  main = "Rata-Rata Bias Estimasi Bootstrap",
  ylab = "Bias"
)

# Grafik Coverage Probability
barplot(
  height = hasil_akhir$Coverage_Probability,
  names.arg = label_grafik,
  las = 2,
  main = "Coverage Probability Bootstrap",
  ylab = "Coverage Probability"
)

hasil_akhir
##                 Distribusi Ukuran_Sampel Parameter_Populasi Rata_Rata_Bias
## Normal              Normal            20                 50        -0.0044
## Normal1             Normal            50                 50        -0.0033
## Normal2             Normal           100                 50        -0.0019
## Eksponensial  Eksponensial            20                 50         0.0019
## Eksponensial1 Eksponensial            50                 50        -0.0671
## Eksponensial2 Eksponensial           100                 50        -0.0504
## Gamma                Gamma            20                 50        -0.0393
## Gamma1               Gamma            50                 50        -0.0336
## Gamma2               Gamma           100                 50         0.0008
## Chi_Square      Chi_Square            20                 50        -0.0249
## Chi_Square1     Chi_Square            50                 50         0.0698
## Chi_Square2     Chi_Square           100                 50         0.0671
##               Rata_Rata_Standard_Error Rata_Rata_Lebar_CI Coverage_Probability
## Normal                          2.1446             8.2705                 0.98
## Normal1                         1.4501             5.5036                 0.98
## Normal2                         0.9893             3.7826                 0.98
## Eksponensial                   10.8657            41.8065                 0.92
## Eksponensial1                   6.8643            26.1482                 0.94
## Eksponensial2                   4.9775            18.9821                 0.92
## Gamma                           7.2934            27.8756                 0.90
## Gamma1                          4.6109            17.7006                 0.94
## Gamma2                          3.4760            13.3195                 0.96
## Chi_Square                      6.3278            23.9297                 0.88
## Chi_Square1                     4.4403            17.0125                 0.92
## Chi_Square2                     3.2471            12.5205                 0.94
print(hasil_akhir)
##                 Distribusi Ukuran_Sampel Parameter_Populasi Rata_Rata_Bias
## Normal              Normal            20                 50        -0.0044
## Normal1             Normal            50                 50        -0.0033
## Normal2             Normal           100                 50        -0.0019
## Eksponensial  Eksponensial            20                 50         0.0019
## Eksponensial1 Eksponensial            50                 50        -0.0671
## Eksponensial2 Eksponensial           100                 50        -0.0504
## Gamma                Gamma            20                 50        -0.0393
## Gamma1               Gamma            50                 50        -0.0336
## Gamma2               Gamma           100                 50         0.0008
## Chi_Square      Chi_Square            20                 50        -0.0249
## Chi_Square1     Chi_Square            50                 50         0.0698
## Chi_Square2     Chi_Square           100                 50         0.0671
##               Rata_Rata_Standard_Error Rata_Rata_Lebar_CI Coverage_Probability
## Normal                          2.1446             8.2705                 0.98
## Normal1                         1.4501             5.5036                 0.98
## Normal2                         0.9893             3.7826                 0.98
## Eksponensial                   10.8657            41.8065                 0.92
## Eksponensial1                   6.8643            26.1482                 0.94
## Eksponensial2                   4.9775            18.9821                 0.92
## Gamma                           7.2934            27.8756                 0.90
## Gamma1                          4.6109            17.7006                 0.94
## Gamma2                          3.4760            13.3195                 0.96
## Chi_Square                      6.3278            23.9297                 0.88
## Chi_Square1                     4.4403            17.0125                 0.92
## Chi_Square2                     3.2471            12.5205                 0.94