set.seed(123)

n_obs    <- 100
n_rep    <- 100
phi_true <- -0.191  # parameter AR(1)

#Hasil estimasi
est_normal  <- numeric(n_rep)
est_outlier <- numeric(n_rep)
est_missing <- numeric(n_rep)
est_missing_baru <- numeric(n_rep)

Membangkitkan Data

data_gen <- arima.sim(
  n = n_obs,
  list(order = c(1,1,0), ar = phi_true)
)

Estimasi Data Normal

model_normal <- arima(data_gen, order = c(1,1,0))
coef(model_normal)
##        ar1 
## -0.1776056
plot(data_gen, main = "Simulasi Data ARIMA(1,1,0) - BBCA",
     ylab = "Nilai", xlab = "Waktu")

Skenario 1 (Data Pencilan)

data_outlier <- data_gen

# Menambahkan nilai ekstrem (5x standar deviasi)
data_outlier[50] <- data_outlier[50] + (5 * sd(data_gen))

model_outlier <- arima(data_outlier, order = c(1,1,0))
coef(model_outlier)
##        ar1 
## -0.4740628
plot(data_outlier, main = "Simulasi Data Outlier")

Skenario 2 (Missing Value)

data_missing <- data_gen

# Menghapus 10% data secara acak
idx_missing <- sample(1:n_obs, 10)
data_missing[idx_missing] <- NA

# Interpolasi linear
data_fixed <- approx(data_missing, xout = 1:n_obs)$y

model_missing <- arima(data_fixed, order = c(1,1,0))
coef(model_missing)
##         ar1 
## -0.06006856
plot(data_fixed, type = "l",
     main = "Simulasi Data Missing (Interpolasi)",
     ylab = "Nilai", xlab = "Waktu")

library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
data_missing_baru <- data_gen

n_obs <- length(data_missing_baru)

# Menghapus 10% data secara acak
n_missing <- round(0.1 * n_obs)
idx_missing_baru <- sample(1:n_obs, n_missing)

data_missing_baru[idx_missing_baru] <- NA

# Model langsung tanpa interpolasi
model_missing_baru <- Arima(data_missing_baru, order = c(1,1,0))

coef(model_missing_baru)
##       ar1 
## -0.275261
# Plot data missing
plot(data_missing_baru, type = "l",
     main = "Simulasi Data Missing",
     ylab = "Nilai", xlab = "Waktu")

par(mfrow = c(2,2))

plot(data_gen, type="l", main="Normal")
plot(data_outlier, type="l", main="Outlier")
plot(data_fixed, type="l", main="Missing (Interpolasi)")
plot(data_missing_baru, type="l", main="Missing")

Simulasi Replikasi

for(i in 1:n_rep) {
  
  # Data Normal
  data_gen <- arima.sim(
    n = n_obs,
    list(order = c(1,1,0), ar = phi_true)
  )
  
  model_normal <- arima(data_gen, order = c(1,1,0))
  est_normal[i] <- coef(model_normal)["ar1"]
  
  # Data Outlier
  data_outlier <- data_gen
  data_outlier[50] <- data_outlier[50] + (5 * sd(data_gen))
  
  model_outlier <- arima(data_outlier, order = c(1,1,0))
  est_outlier[i] <- coef(model_outlier)["ar1"]
  
  # Data Missing dengan Interpolasi
  data_missing <- data_gen
  data_missing[sample(1:n_obs, 10)] <- NA
  
  data_fixed <- approx(data_missing, xout = 1:n_obs)$y
  
  model_missing <- arima(data_fixed, order = c(1,1,0))
  est_missing[i] <- coef(model_missing)["ar1"]
  
  # Data Missing Value tanpa Interpolasi
  data_missing <- data_gen
  data_missing[sample(1:n_obs, 10)] <- NA

  model_missing_baru <- Arima(data_missing, order = c(1,1,0))
  
  est_missing_baru[i] <- coef(model_missing_baru)["ar1"]
  
}
data_gen
## Time Series:
## Start = 1 
## End = 102 
## Frequency = 1 
##   [1]  0.00000000 -0.88193660 -1.78919290 -3.50821414 -2.85653910 -2.26345968
##   [7] -1.48434582 -1.03272956 -0.11382655  1.02656733  0.34433159 -0.32207147
##  [13]  0.58943525  2.22217386  2.00578919  2.94045137  3.75307492  3.13527657
##  [19]  3.39846839  3.78544600  4.41115082  3.85640191  2.74338027  3.62648304
##  [25]  1.95070954  1.79813968  2.84018926  2.15793736  2.48699646  1.56967224
##  [31]  2.03124661  2.67668544  3.69979564  4.09232980  3.89433341  3.05359878
##  [37]  2.94339599  3.68362610  5.27896299  5.23355434  4.72163331  3.66975551
##  [43]  3.53439925  5.13932163  3.37283665  2.70431582  3.14553230 -0.03955549
##  [49]  1.95407390  1.31003669  1.19826008  1.78659648  4.42781714  2.92295804
##  [55]  2.54031858  3.17471080  1.47801958  1.76234411  1.93297091  2.14858020
##  [61]  2.16135019  2.85913684  2.34839960  2.68085398  3.56179691  3.97827095
##  [67]  4.11378842  4.18823178  6.58831881  5.86761095  5.76517769  6.02460917
##  [73]  6.94702961  6.76239294  6.83781949  6.05494267  7.59703500  6.89675599
##  [79]  7.34451107  8.45834548  6.59717850  8.30711660  8.53860986  8.24162183
##  [85]  8.93765563  8.22829239  8.35435246  7.51367725  8.15529536  7.76870894
##  [91]  7.61232439  9.04589941 10.25119000  9.32223454 10.36057669 10.65236005
##  [97] 11.33710028 11.83535543 11.14535914 11.27923998 10.59746041 10.42196759
data_outlier
## Time Series:
## Start = 1 
## End = 102 
## Frequency = 1 
##   [1]  0.00000000 -0.88193660 -1.78919290 -3.50821414 -2.85653910 -2.26345968
##   [7] -1.48434582 -1.03272956 -0.11382655  1.02656733  0.34433159 -0.32207147
##  [13]  0.58943525  2.22217386  2.00578919  2.94045137  3.75307492  3.13527657
##  [19]  3.39846839  3.78544600  4.41115082  3.85640191  2.74338027  3.62648304
##  [25]  1.95070954  1.79813968  2.84018926  2.15793736  2.48699646  1.56967224
##  [31]  2.03124661  2.67668544  3.69979564  4.09232980  3.89433341  3.05359878
##  [37]  2.94339599  3.68362610  5.27896299  5.23355434  4.72163331  3.66975551
##  [43]  3.53439925  5.13932163  3.37283665  2.70431582  3.14553230 -0.03955549
##  [49]  1.95407390 18.66430436  1.19826008  1.78659648  4.42781714  2.92295804
##  [55]  2.54031858  3.17471080  1.47801958  1.76234411  1.93297091  2.14858020
##  [61]  2.16135019  2.85913684  2.34839960  2.68085398  3.56179691  3.97827095
##  [67]  4.11378842  4.18823178  6.58831881  5.86761095  5.76517769  6.02460917
##  [73]  6.94702961  6.76239294  6.83781949  6.05494267  7.59703500  6.89675599
##  [79]  7.34451107  8.45834548  6.59717850  8.30711660  8.53860986  8.24162183
##  [85]  8.93765563  8.22829239  8.35435246  7.51367725  8.15529536  7.76870894
##  [91]  7.61232439  9.04589941 10.25119000  9.32223454 10.36057669 10.65236005
##  [97] 11.33710028 11.83535543 11.14535914 11.27923998 10.59746041 10.42196759
data_missing
## Time Series:
## Start = 1 
## End = 102 
## Frequency = 1 
##   [1]  0.00000000 -0.88193660          NA -3.50821414 -2.85653910          NA
##   [7] -1.48434582 -1.03272956 -0.11382655  1.02656733  0.34433159 -0.32207147
##  [13]  0.58943525  2.22217386  2.00578919          NA          NA  3.13527657
##  [19]  3.39846839  3.78544600  4.41115082  3.85640191  2.74338027  3.62648304
##  [25]  1.95070954  1.79813968  2.84018926  2.15793736  2.48699646  1.56967224
##  [31]  2.03124661  2.67668544  3.69979564  4.09232980  3.89433341          NA
##  [37]          NA  3.68362610  5.27896299  5.23355434  4.72163331  3.66975551
##  [43]  3.53439925  5.13932163  3.37283665  2.70431582  3.14553230 -0.03955549
##  [49]          NA  1.31003669  1.19826008  1.78659648  4.42781714  2.92295804
##  [55]  2.54031858  3.17471080  1.47801958  1.76234411  1.93297091  2.14858020
##  [61]  2.16135019  2.85913684  2.34839960  2.68085398  3.56179691  3.97827095
##  [67]  4.11378842  4.18823178  6.58831881  5.86761095  5.76517769  6.02460917
##  [73]  6.94702961  6.76239294  6.83781949  6.05494267  7.59703500  6.89675599
##  [79]          NA  8.45834548  6.59717850  8.30711660  8.53860986  8.24162183
##  [85]  8.93765563  8.22829239  8.35435246  7.51367725  8.15529536  7.76870894
##  [91]  7.61232439  9.04589941 10.25119000  9.32223454          NA          NA
##  [97] 11.33710028 11.83535543 11.14535914 11.27923998 10.59746041 10.42196759
data_missing_baru
## Time Series:
## Start = 1 
## End = 101 
## Frequency = 1 
##   [1]  0.000000000  1.681918503  1.821588275  0.529850114  0.089719251
##   [6] -0.271877725  1.021269095           NA  1.513314178  1.551565435
##  [11]  0.988418310  2.882892548  3.018898447  1.026304164  2.108245573
##  [16]  1.428803356  0.490753114  0.451945795 -0.566646455 -1.100986565
##  [21] -1.623966871 -3.210770944 -2.069904321 -2.134436728 -3.260247976
##  [26] -1.791403106 -1.645488255 -1.968429474 -1.011622040 -0.316238773
##  [31]  0.372524105  0.929610649  1.377124773  1.229737865  0.951926100
##  [36]  0.624517146 -0.007654722 -0.094827174 -1.343573587  1.063892943
##  [41]  1.812028834  0.546026296  0.384947945           NA  0.812278548
##  [46]           NA  0.864767598  0.778780814  0.752333832  2.125987490
##  [51]  1.637848655  3.247553777  1.391347295  2.330496483           NA
##  [56]  2.501519741  2.837888841  2.271318889  2.046326366  1.070724555
##  [61]  0.185273275  0.657923111  1.015856771  1.000495668  1.925697107
##  [66]           NA  2.950223250  0.803183783  2.219006845           NA
##  [71]  0.738483248           NA  1.360796144  0.235374156  0.631633235
##  [76]  0.417056389           NA  0.840156216  0.397613054  1.126515347
##  [81]  0.766808447  1.167294429  2.187640620  2.427935988           NA
##  [86]  3.275934754  4.036451697  4.439589921  4.601322255  3.942525303
##  [91]  5.429007969  4.544830193  6.901041141  7.983615476  7.541143419
##  [96]  6.599234682  6.068732687  6.426942277           NA  5.824475760
## [101]  4.927742305

Fungsi Evaluasi dan Hasil

hitung_evaluasi <- function(est_list, true_val) {
  rata_est <- mean(est_list)
  bias     <- rata_est - true_val
  mse      <- mean((est_list - true_val)^2)
  return(c(rata_est, bias, mse))
}

hasil_normal  <- hitung_evaluasi(est_normal, phi_true)
hasil_outlier <- hitung_evaluasi(est_outlier, phi_true)
hasil_missing <- hitung_evaluasi(est_missing, phi_true)
hasil_missing_baru <- hitung_evaluasi(est_missing_baru, phi_true)

Tabel Ringkasan

tabel_evaluasi <- data.frame(
  Skenario = c("Normal", "Outlier", "Interpolasi", "Missing Value"),
  Rata_Rata_Estimasi = c(hasil_normal[1], hasil_outlier[1], hasil_missing[1], hasil_missing_baru[1]),
  Bias = c(hasil_normal[2], hasil_outlier[2], hasil_missing[2], hasil_missing_baru[2]),
  MSE = c(hasil_normal[3], hasil_outlier[3], hasil_missing[3], hasil_missing_baru[3])
)

tabel_evaluasi
##        Skenario Rata_Rata_Estimasi         Bias         MSE
## 1        Normal         -0.1982989 -0.007298890 0.009412451
## 2       Outlier         -0.4263324 -0.235332367 0.059313651
## 3   Interpolasi         -0.1357730  0.055226950 0.013782576
## 4 Missing Value         -0.1923917 -0.001391667 0.010933616

Pada tabel ringkasan terlihat bahwa skenario yang memiliki nilai bias tertinggi adalah Outlier sebesar -0.235332367 dengan nilai MSE terbesar hingga mencapai 0.059313651. Sementara itu, pada skenario Missing Value tanpa dilakukan interpolasi, menghasil Bias dan MSE yang sangat kecil dibandingkan 2 skenario lainnya yaitu sebesar -0.1923917 dan 0.010933616. Selain itu, meskipun nilai Bias pada skenario Missing Value tersebut (-0.0014) tampak lebih kecil dibandingkan kondisi Normal (-0.0072), evaluasi ketangguhan model tetap bergantung pada nilai Mean Squared Error (MSE) di mana nilai MSE pada skenario Missing Value (0.0109) lebih besar dibandingkan kondisi Normal (0.0094). Hal ini menegaskan bahwa hilangnya informasi observasi tetap memperbesar variansi estimasi. Sebaliknya, ketika data tersebut ditangani menggunakan interpolasi, nilai Bias justru meningkat menjadi 0.0552 dengan MSE 0.0137. Hal ini membuktikan bahwa penggunaan interpolasi linier dapat merusak variansi dan struktur autokorelasi asli dari Time Series.

Visualisasi

barplot(tabel_evaluasi$MSE,
        names.arg = tabel_evaluasi$Skenario,
        main = "Perbandingan MSE antar Skenario",
        ylab = "MSE",
        col = c("#A3B18A", "#E27396", "#AEC6CF", "#F4F1DE"))

Grafik menunjukkan bahwa skenario outlier memiliki nilai MSE paling tinggi, yang berarti paling merusak akurasi estimasi model. Sebaliknya, kondisi data normal menghasilkan MSE paling rendah, sehingga estimasi berjalan dengan baik tanpa gangguan. Untuk data hilang, penanganan langsung oleh model memberikan MSE yang lebih kecil dibandingkan interpolasi, yang menunjukkan bahwa metode interpolasi linier cenderung menurunkan akurasi. Secara keseluruhan, hasil ini menegaskan bahwa outlier merupakan faktor paling berpengaruh terhadap kesalahan estimasi, sedangkan penanganan missing value yang tepat dapat menjaga performa model tetap stabil.