set.seed(123)

n_obs    <- 100
n_rep    <- 100
phi_true <- -0.191  # parameter AR(1)

#Hasil estimasi
est_normal  <- numeric(n_rep)
est_outlier <- numeric(n_rep)
est_missing <- numeric(n_rep)
est_missing_baru <- numeric(n_rep)

Membangkitkan Data

data_gen <- arima.sim(
  n = n_obs,
  list(order = c(1,1,0), ar = phi_true)
)

Estimasi Data Normal

model_normal <- arima(data_gen, order = c(1,1,0))
coef(model_normal)
##        ar1 
## -0.1776056
plot(data_gen, main = "Simulasi Data ARIMA(1,1,0) - BBCA",
     ylab = "Nilai", xlab = "Waktu")

Skenario 1 (Data Pencilan)

data_outlier <- data_gen

# Menambahkan nilai ekstrem (5x standar deviasi)
data_outlier[50] <- data_outlier[50] + (5 * sd(data_gen))

model_outlier <- arima(data_outlier, order = c(1,1,0))
coef(model_outlier)
##        ar1 
## -0.4740628
plot(data_outlier, main = "Simulasi Data Outlier")

Skenario 2 (Missing Value)

data_missing <- data_gen

# Menghapus 10% data secara acak
idx_missing <- sample(1:n_obs, 10)
data_missing[idx_missing] <- NA

# Interpolasi linear
data_fixed <- approx(data_missing, xout = 1:n_obs)$y

model_missing <- arima(data_fixed, order = c(1,1,0))
coef(model_missing)
##         ar1 
## -0.06006856
plot(data_fixed, type = "l",
     main = "Simulasi Data Missing (Interpolasi)",
     ylab = "Nilai", xlab = "Waktu")

library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
data_missing_baru <- data_gen

n_obs <- length(data_missing_baru)

# Menghapus 10% data secara acak
n_missing <- round(0.1 * n_obs)
idx_missing_baru <- sample(1:n_obs, n_missing)

data_missing_baru[idx_missing_baru] <- NA

# Model langsung tanpa interpolasi
model_missing_baru <- Arima(data_missing_baru, order = c(1,1,0))

coef(model_missing_baru)
##       ar1 
## -0.275261
# Plot data missing
plot(data_missing_baru, type = "l",
     main = "Simulasi Data Missing",
     ylab = "Nilai", xlab = "Waktu")

par(mfrow = c(2,2))

plot(data_gen, type="l", main="Normal")
plot(data_outlier, type="l", main="Outlier")
plot(data_fixed, type="l", main="Missing (Interpolasi)")
plot(data_missing_baru, type="l", main="Missing")

Simulasi Replikasi

for(i in 1:n_rep) {
  
  # Data Normal
  data_gen <- arima.sim(
    n = n_obs,
    list(order = c(1,1,0), ar = phi_true)
  )
  
  model_normal <- arima(data_gen, order = c(1,1,0))
  est_normal[i] <- coef(model_normal)["ar1"]
  
  # Data Outlier
  data_outlier <- data_gen
  data_outlier[50] <- data_outlier[50] + (5 * sd(data_gen))
  
  model_outlier <- arima(data_outlier, order = c(1,1,0))
  est_outlier[i] <- coef(model_outlier)["ar1"]
  
  # Data Missing dengan Interpolasi
  data_missing <- data_gen
  data_missing[sample(1:n_obs, 10)] <- NA
  
  data_fixed <- approx(data_missing, xout = 1:n_obs)$y
  
  model_missing <- arima(data_fixed, order = c(1,1,0))
  est_missing[i] <- coef(model_missing)["ar1"]
  
  # Data Missing Value tanpa Interpolasi
  data_missing <- data_gen
  data_missing[sample(1:n_obs, 10)] <- NA

  model_missing_baru <- Arima(data_missing, order = c(1,1,0))
  
  est_missing_baru[i] <- coef(model_missing_baru)["ar1"]
  
}
data_gen
## Time Series:
## Start = 1 
## End = 102 
## Frequency = 1 
##   [1]  0.00000000 -0.88193660 -1.78919290 -3.50821414 -2.85653910 -2.26345968
##   [7] -1.48434582 -1.03272956 -0.11382655  1.02656733  0.34433159 -0.32207147
##  [13]  0.58943525  2.22217386  2.00578919  2.94045137  3.75307492  3.13527657
##  [19]  3.39846839  3.78544600  4.41115082  3.85640191  2.74338027  3.62648304
##  [25]  1.95070954  1.79813968  2.84018926  2.15793736  2.48699646  1.56967224
##  [31]  2.03124661  2.67668544  3.69979564  4.09232980  3.89433341  3.05359878
##  [37]  2.94339599  3.68362610  5.27896299  5.23355434  4.72163331  3.66975551
##  [43]  3.53439925  5.13932163  3.37283665  2.70431582  3.14553230 -0.03955549
##  [49]  1.95407390  1.31003669  1.19826008  1.78659648  4.42781714  2.92295804
##  [55]  2.54031858  3.17471080  1.47801958  1.76234411  1.93297091  2.14858020
##  [61]  2.16135019  2.85913684  2.34839960  2.68085398  3.56179691  3.97827095
##  [67]  4.11378842  4.18823178  6.58831881  5.86761095  5.76517769  6.02460917
##  [73]  6.94702961  6.76239294  6.83781949  6.05494267  7.59703500  6.89675599
##  [79]  7.34451107  8.45834548  6.59717850  8.30711660  8.53860986  8.24162183
##  [85]  8.93765563  8.22829239  8.35435246  7.51367725  8.15529536  7.76870894
##  [91]  7.61232439  9.04589941 10.25119000  9.32223454 10.36057669 10.65236005
##  [97] 11.33710028 11.83535543 11.14535914 11.27923998 10.59746041 10.42196759
data_outlier
## Time Series:
## Start = 1 
## End = 102 
## Frequency = 1 
##   [1]  0.00000000 -0.88193660 -1.78919290 -3.50821414 -2.85653910 -2.26345968
##   [7] -1.48434582 -1.03272956 -0.11382655  1.02656733  0.34433159 -0.32207147
##  [13]  0.58943525  2.22217386  2.00578919  2.94045137  3.75307492  3.13527657
##  [19]  3.39846839  3.78544600  4.41115082  3.85640191  2.74338027  3.62648304
##  [25]  1.95070954  1.79813968  2.84018926  2.15793736  2.48699646  1.56967224
##  [31]  2.03124661  2.67668544  3.69979564  4.09232980  3.89433341  3.05359878
##  [37]  2.94339599  3.68362610  5.27896299  5.23355434  4.72163331  3.66975551
##  [43]  3.53439925  5.13932163  3.37283665  2.70431582  3.14553230 -0.03955549
##  [49]  1.95407390 18.66430436  1.19826008  1.78659648  4.42781714  2.92295804
##  [55]  2.54031858  3.17471080  1.47801958  1.76234411  1.93297091  2.14858020
##  [61]  2.16135019  2.85913684  2.34839960  2.68085398  3.56179691  3.97827095
##  [67]  4.11378842  4.18823178  6.58831881  5.86761095  5.76517769  6.02460917
##  [73]  6.94702961  6.76239294  6.83781949  6.05494267  7.59703500  6.89675599
##  [79]  7.34451107  8.45834548  6.59717850  8.30711660  8.53860986  8.24162183
##  [85]  8.93765563  8.22829239  8.35435246  7.51367725  8.15529536  7.76870894
##  [91]  7.61232439  9.04589941 10.25119000  9.32223454 10.36057669 10.65236005
##  [97] 11.33710028 11.83535543 11.14535914 11.27923998 10.59746041 10.42196759
data_missing
## Time Series:
## Start = 1 
## End = 102 
## Frequency = 1 
##   [1]  0.00000000 -0.88193660          NA -3.50821414 -2.85653910          NA
##   [7] -1.48434582 -1.03272956 -0.11382655  1.02656733  0.34433159 -0.32207147
##  [13]  0.58943525  2.22217386  2.00578919          NA          NA  3.13527657
##  [19]  3.39846839  3.78544600  4.41115082  3.85640191  2.74338027  3.62648304
##  [25]  1.95070954  1.79813968  2.84018926  2.15793736  2.48699646  1.56967224
##  [31]  2.03124661  2.67668544  3.69979564  4.09232980  3.89433341          NA
##  [37]          NA  3.68362610  5.27896299  5.23355434  4.72163331  3.66975551
##  [43]  3.53439925  5.13932163  3.37283665  2.70431582  3.14553230 -0.03955549
##  [49]          NA  1.31003669  1.19826008  1.78659648  4.42781714  2.92295804
##  [55]  2.54031858  3.17471080  1.47801958  1.76234411  1.93297091  2.14858020
##  [61]  2.16135019  2.85913684  2.34839960  2.68085398  3.56179691  3.97827095
##  [67]  4.11378842  4.18823178  6.58831881  5.86761095  5.76517769  6.02460917
##  [73]  6.94702961  6.76239294  6.83781949  6.05494267  7.59703500  6.89675599
##  [79]          NA  8.45834548  6.59717850  8.30711660  8.53860986  8.24162183
##  [85]  8.93765563  8.22829239  8.35435246  7.51367725  8.15529536  7.76870894
##  [91]  7.61232439  9.04589941 10.25119000  9.32223454          NA          NA
##  [97] 11.33710028 11.83535543 11.14535914 11.27923998 10.59746041 10.42196759
data_missing_baru
## Time Series:
## Start = 1 
## End = 101 
## Frequency = 1 
##   [1]  0.000000000  1.681918503  1.821588275  0.529850114  0.089719251
##   [6] -0.271877725  1.021269095           NA  1.513314178  1.551565435
##  [11]  0.988418310  2.882892548  3.018898447  1.026304164  2.108245573
##  [16]  1.428803356  0.490753114  0.451945795 -0.566646455 -1.100986565
##  [21] -1.623966871 -3.210770944 -2.069904321 -2.134436728 -3.260247976
##  [26] -1.791403106 -1.645488255 -1.968429474 -1.011622040 -0.316238773
##  [31]  0.372524105  0.929610649  1.377124773  1.229737865  0.951926100
##  [36]  0.624517146 -0.007654722 -0.094827174 -1.343573587  1.063892943
##  [41]  1.812028834  0.546026296  0.384947945           NA  0.812278548
##  [46]           NA  0.864767598  0.778780814  0.752333832  2.125987490
##  [51]  1.637848655  3.247553777  1.391347295  2.330496483           NA
##  [56]  2.501519741  2.837888841  2.271318889  2.046326366  1.070724555
##  [61]  0.185273275  0.657923111  1.015856771  1.000495668  1.925697107
##  [66]           NA  2.950223250  0.803183783  2.219006845           NA
##  [71]  0.738483248           NA  1.360796144  0.235374156  0.631633235
##  [76]  0.417056389           NA  0.840156216  0.397613054  1.126515347
##  [81]  0.766808447  1.167294429  2.187640620  2.427935988           NA
##  [86]  3.275934754  4.036451697  4.439589921  4.601322255  3.942525303
##  [91]  5.429007969  4.544830193  6.901041141  7.983615476  7.541143419
##  [96]  6.599234682  6.068732687  6.426942277           NA  5.824475760
## [101]  4.927742305

Fungsi Evaluasi dan Hasil

hitung_evaluasi <- function(est_list, true_val) {
  rata_est <- mean(est_list)
  bias     <- rata_est - true_val
  mse      <- mean((est_list - true_val)^2)
  return(c(rata_est, bias, mse))
}

hasil_normal  <- hitung_evaluasi(est_normal, phi_true)
hasil_outlier <- hitung_evaluasi(est_outlier, phi_true)
hasil_missing <- hitung_evaluasi(est_missing, phi_true)
hasil_missing_baru <- hitung_evaluasi(est_missing_baru, phi_true)

Tabel Ringkasan

tabel_evaluasi <- data.frame(
  Skenario = c("Normal", "Outlier", "Interpolasi", "Missing Values"),
  Rata_Rata_Estimasi = c(hasil_normal[1], hasil_outlier[1], hasil_missing[1], hasil_missing_baru[1]),
  Bias = c(hasil_normal[2], hasil_outlier[2], hasil_missing[2], hasil_missing_baru[2]),
  MSE = c(hasil_normal[3], hasil_outlier[3], hasil_missing[3], hasil_missing_baru[3])
)

print(tabel_evaluasi)
##         Skenario Rata_Rata_Estimasi         Bias         MSE
## 1         Normal         -0.1982989 -0.007298890 0.009412451
## 2        Outlier         -0.4263324 -0.235332367 0.059313651
## 3    Interpolasi         -0.1357730  0.055226950 0.013782576
## 4 Missing Values         -0.1923917 -0.001391667 0.010933616

Visualisasi

barplot(tabel_evaluasi$MSE,
        names.arg = tabel_evaluasi$Skenario,
        main = "Perbandingan MSE antar Skenario",
        ylab = "MSE",
        col = c("#A3B18A", "#E27396", "#AEC6CF", "#F4F1DE"))