set.seed(123)
n_obs <- 100
n_rep <- 100
phi_true <- -0.191 # parameter AR(1)
#Hasil estimasi
est_normal <- numeric(n_rep)
est_outlier <- numeric(n_rep)
est_missing <- numeric(n_rep)
est_missing_baru <- numeric(n_rep)
data_gen <- arima.sim(
n = n_obs,
list(order = c(1,1,0), ar = phi_true)
)
model_normal <- arima(data_gen, order = c(1,1,0))
coef(model_normal)
## ar1
## -0.1776056
plot(data_gen, main = "Simulasi Data ARIMA(1,1,0) - BBCA",
ylab = "Nilai", xlab = "Waktu")
data_outlier <- data_gen
# Menambahkan nilai ekstrem (5x standar deviasi)
data_outlier[50] <- data_outlier[50] + (5 * sd(data_gen))
model_outlier <- arima(data_outlier, order = c(1,1,0))
coef(model_outlier)
## ar1
## -0.4740628
plot(data_outlier, main = "Simulasi Data Outlier")
data_missing <- data_gen
# Menghapus 10% data secara acak
idx_missing <- sample(1:n_obs, 10)
data_missing[idx_missing] <- NA
# Interpolasi linear
data_fixed <- approx(data_missing, xout = 1:n_obs)$y
model_missing <- arima(data_fixed, order = c(1,1,0))
coef(model_missing)
## ar1
## -0.06006856
plot(data_fixed, type = "l",
main = "Simulasi Data Missing (Interpolasi)",
ylab = "Nilai", xlab = "Waktu")
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
data_missing_baru <- data_gen
n_obs <- length(data_missing_baru)
# Menghapus 10% data secara acak
n_missing <- round(0.1 * n_obs)
idx_missing_baru <- sample(1:n_obs, n_missing)
data_missing_baru[idx_missing_baru] <- NA
# Model langsung tanpa interpolasi
model_missing_baru <- Arima(data_missing_baru, order = c(1,1,0))
coef(model_missing_baru)
## ar1
## -0.275261
# Plot data missing
plot(data_missing_baru, type = "l",
main = "Simulasi Data Missing",
ylab = "Nilai", xlab = "Waktu")
par(mfrow = c(2,2))
plot(data_gen, type="l", main="Normal")
plot(data_outlier, type="l", main="Outlier")
plot(data_fixed, type="l", main="Missing (Interpolasi)")
plot(data_missing_baru, type="l", main="Missing")
for(i in 1:n_rep) {
# Data Normal
data_gen <- arima.sim(
n = n_obs,
list(order = c(1,1,0), ar = phi_true)
)
model_normal <- arima(data_gen, order = c(1,1,0))
est_normal[i] <- coef(model_normal)["ar1"]
# Data Outlier
data_outlier <- data_gen
data_outlier[50] <- data_outlier[50] + (5 * sd(data_gen))
model_outlier <- arima(data_outlier, order = c(1,1,0))
est_outlier[i] <- coef(model_outlier)["ar1"]
# Data Missing dengan Interpolasi
data_missing <- data_gen
data_missing[sample(1:n_obs, 10)] <- NA
data_fixed <- approx(data_missing, xout = 1:n_obs)$y
model_missing <- arima(data_fixed, order = c(1,1,0))
est_missing[i] <- coef(model_missing)["ar1"]
# Data Missing Value tanpa Interpolasi
data_missing <- data_gen
data_missing[sample(1:n_obs, 10)] <- NA
model_missing_baru <- Arima(data_missing, order = c(1,1,0))
est_missing_baru[i] <- coef(model_missing_baru)["ar1"]
}
data_gen
## Time Series:
## Start = 1
## End = 102
## Frequency = 1
## [1] 0.00000000 -0.88193660 -1.78919290 -3.50821414 -2.85653910 -2.26345968
## [7] -1.48434582 -1.03272956 -0.11382655 1.02656733 0.34433159 -0.32207147
## [13] 0.58943525 2.22217386 2.00578919 2.94045137 3.75307492 3.13527657
## [19] 3.39846839 3.78544600 4.41115082 3.85640191 2.74338027 3.62648304
## [25] 1.95070954 1.79813968 2.84018926 2.15793736 2.48699646 1.56967224
## [31] 2.03124661 2.67668544 3.69979564 4.09232980 3.89433341 3.05359878
## [37] 2.94339599 3.68362610 5.27896299 5.23355434 4.72163331 3.66975551
## [43] 3.53439925 5.13932163 3.37283665 2.70431582 3.14553230 -0.03955549
## [49] 1.95407390 1.31003669 1.19826008 1.78659648 4.42781714 2.92295804
## [55] 2.54031858 3.17471080 1.47801958 1.76234411 1.93297091 2.14858020
## [61] 2.16135019 2.85913684 2.34839960 2.68085398 3.56179691 3.97827095
## [67] 4.11378842 4.18823178 6.58831881 5.86761095 5.76517769 6.02460917
## [73] 6.94702961 6.76239294 6.83781949 6.05494267 7.59703500 6.89675599
## [79] 7.34451107 8.45834548 6.59717850 8.30711660 8.53860986 8.24162183
## [85] 8.93765563 8.22829239 8.35435246 7.51367725 8.15529536 7.76870894
## [91] 7.61232439 9.04589941 10.25119000 9.32223454 10.36057669 10.65236005
## [97] 11.33710028 11.83535543 11.14535914 11.27923998 10.59746041 10.42196759
data_outlier
## Time Series:
## Start = 1
## End = 102
## Frequency = 1
## [1] 0.00000000 -0.88193660 -1.78919290 -3.50821414 -2.85653910 -2.26345968
## [7] -1.48434582 -1.03272956 -0.11382655 1.02656733 0.34433159 -0.32207147
## [13] 0.58943525 2.22217386 2.00578919 2.94045137 3.75307492 3.13527657
## [19] 3.39846839 3.78544600 4.41115082 3.85640191 2.74338027 3.62648304
## [25] 1.95070954 1.79813968 2.84018926 2.15793736 2.48699646 1.56967224
## [31] 2.03124661 2.67668544 3.69979564 4.09232980 3.89433341 3.05359878
## [37] 2.94339599 3.68362610 5.27896299 5.23355434 4.72163331 3.66975551
## [43] 3.53439925 5.13932163 3.37283665 2.70431582 3.14553230 -0.03955549
## [49] 1.95407390 18.66430436 1.19826008 1.78659648 4.42781714 2.92295804
## [55] 2.54031858 3.17471080 1.47801958 1.76234411 1.93297091 2.14858020
## [61] 2.16135019 2.85913684 2.34839960 2.68085398 3.56179691 3.97827095
## [67] 4.11378842 4.18823178 6.58831881 5.86761095 5.76517769 6.02460917
## [73] 6.94702961 6.76239294 6.83781949 6.05494267 7.59703500 6.89675599
## [79] 7.34451107 8.45834548 6.59717850 8.30711660 8.53860986 8.24162183
## [85] 8.93765563 8.22829239 8.35435246 7.51367725 8.15529536 7.76870894
## [91] 7.61232439 9.04589941 10.25119000 9.32223454 10.36057669 10.65236005
## [97] 11.33710028 11.83535543 11.14535914 11.27923998 10.59746041 10.42196759
data_missing
## Time Series:
## Start = 1
## End = 102
## Frequency = 1
## [1] 0.00000000 -0.88193660 NA -3.50821414 -2.85653910 NA
## [7] -1.48434582 -1.03272956 -0.11382655 1.02656733 0.34433159 -0.32207147
## [13] 0.58943525 2.22217386 2.00578919 NA NA 3.13527657
## [19] 3.39846839 3.78544600 4.41115082 3.85640191 2.74338027 3.62648304
## [25] 1.95070954 1.79813968 2.84018926 2.15793736 2.48699646 1.56967224
## [31] 2.03124661 2.67668544 3.69979564 4.09232980 3.89433341 NA
## [37] NA 3.68362610 5.27896299 5.23355434 4.72163331 3.66975551
## [43] 3.53439925 5.13932163 3.37283665 2.70431582 3.14553230 -0.03955549
## [49] NA 1.31003669 1.19826008 1.78659648 4.42781714 2.92295804
## [55] 2.54031858 3.17471080 1.47801958 1.76234411 1.93297091 2.14858020
## [61] 2.16135019 2.85913684 2.34839960 2.68085398 3.56179691 3.97827095
## [67] 4.11378842 4.18823178 6.58831881 5.86761095 5.76517769 6.02460917
## [73] 6.94702961 6.76239294 6.83781949 6.05494267 7.59703500 6.89675599
## [79] NA 8.45834548 6.59717850 8.30711660 8.53860986 8.24162183
## [85] 8.93765563 8.22829239 8.35435246 7.51367725 8.15529536 7.76870894
## [91] 7.61232439 9.04589941 10.25119000 9.32223454 NA NA
## [97] 11.33710028 11.83535543 11.14535914 11.27923998 10.59746041 10.42196759
data_missing_baru
## Time Series:
## Start = 1
## End = 101
## Frequency = 1
## [1] 0.000000000 1.681918503 1.821588275 0.529850114 0.089719251
## [6] -0.271877725 1.021269095 NA 1.513314178 1.551565435
## [11] 0.988418310 2.882892548 3.018898447 1.026304164 2.108245573
## [16] 1.428803356 0.490753114 0.451945795 -0.566646455 -1.100986565
## [21] -1.623966871 -3.210770944 -2.069904321 -2.134436728 -3.260247976
## [26] -1.791403106 -1.645488255 -1.968429474 -1.011622040 -0.316238773
## [31] 0.372524105 0.929610649 1.377124773 1.229737865 0.951926100
## [36] 0.624517146 -0.007654722 -0.094827174 -1.343573587 1.063892943
## [41] 1.812028834 0.546026296 0.384947945 NA 0.812278548
## [46] NA 0.864767598 0.778780814 0.752333832 2.125987490
## [51] 1.637848655 3.247553777 1.391347295 2.330496483 NA
## [56] 2.501519741 2.837888841 2.271318889 2.046326366 1.070724555
## [61] 0.185273275 0.657923111 1.015856771 1.000495668 1.925697107
## [66] NA 2.950223250 0.803183783 2.219006845 NA
## [71] 0.738483248 NA 1.360796144 0.235374156 0.631633235
## [76] 0.417056389 NA 0.840156216 0.397613054 1.126515347
## [81] 0.766808447 1.167294429 2.187640620 2.427935988 NA
## [86] 3.275934754 4.036451697 4.439589921 4.601322255 3.942525303
## [91] 5.429007969 4.544830193 6.901041141 7.983615476 7.541143419
## [96] 6.599234682 6.068732687 6.426942277 NA 5.824475760
## [101] 4.927742305
hitung_evaluasi <- function(est_list, true_val) {
rata_est <- mean(est_list)
bias <- rata_est - true_val
mse <- mean((est_list - true_val)^2)
return(c(rata_est, bias, mse))
}
hasil_normal <- hitung_evaluasi(est_normal, phi_true)
hasil_outlier <- hitung_evaluasi(est_outlier, phi_true)
hasil_missing <- hitung_evaluasi(est_missing, phi_true)
hasil_missing_baru <- hitung_evaluasi(est_missing_baru, phi_true)
tabel_evaluasi <- data.frame(
Skenario = c("Normal", "Outlier", "Interpolasi", "Missing Value"),
Rata_Rata_Estimasi = c(hasil_normal[1], hasil_outlier[1], hasil_missing[1], hasil_missing_baru[1]),
Bias = c(hasil_normal[2], hasil_outlier[2], hasil_missing[2], hasil_missing_baru[2]),
MSE = c(hasil_normal[3], hasil_outlier[3], hasil_missing[3], hasil_missing_baru[3])
)
tabel_evaluasi
## Skenario Rata_Rata_Estimasi Bias MSE
## 1 Normal -0.1982989 -0.007298890 0.009412451
## 2 Outlier -0.4263324 -0.235332367 0.059313651
## 3 Interpolasi -0.1357730 0.055226950 0.013782576
## 4 Missing Value -0.1923917 -0.001391667 0.010933616
Pada tabel ringkasan terlihat bahwa skenario yang memiliki nilai bias tertinggi adalah Outlier sebesar -0.235332367 dengan nilai MSE terbesar hingga mencapai 0.059313651. Sementara itu, pada skenario Missing Value tanpa dilakukan interpolasi, menghasil Bias dan MSE yang sangat kecil dibandingkan 2 skenario lainnya yaitu sebesar -0.1923917 dan 0.010933616. Selain itu, meskipun nilai Bias pada skenario Missing Value tersebut (-0.0014) tampak lebih kecil dibandingkan kondisi Normal (-0.0072), evaluasi ketangguhan model tetap bergantung pada nilai Mean Squared Error (MSE) di mana nilai MSE pada skenario Missing Value (0.0109) lebih besar dibandingkan kondisi Normal (0.0094). Hal ini menegaskan bahwa hilangnya informasi observasi tetap memperbesar variansi estimasi. Sebaliknya, ketika data tersebut ditangani menggunakan interpolasi, nilai Bias justru meningkat menjadi 0.0552 dengan MSE 0.0137. Hal ini membuktikan bahwa penggunaan interpolasi linier dapat merusak variansi dan struktur autokorelasi asli dari Time Series.
barplot(tabel_evaluasi$MSE,
names.arg = tabel_evaluasi$Skenario,
main = "Perbandingan MSE antar Skenario",
ylab = "MSE",
col = c("#A3B18A", "#E27396", "#AEC6CF", "#F4F1DE"))
Grafik menunjukkan bahwa skenario outlier memiliki nilai MSE paling
tinggi, yang berarti paling merusak akurasi estimasi model. Sebaliknya,
kondisi data normal menghasilkan MSE paling rendah, sehingga estimasi
berjalan dengan baik tanpa gangguan. Untuk data hilang, penanganan
langsung oleh model memberikan MSE yang lebih kecil dibandingkan
interpolasi, yang menunjukkan bahwa metode interpolasi linier cenderung
menurunkan akurasi. Secara keseluruhan, hasil ini menegaskan bahwa
outlier merupakan faktor paling berpengaruh terhadap kesalahan estimasi,
sedangkan penanganan missing value yang tepat dapat menjaga performa
model tetap stabil.