A. Package dan Data

Library

library(googlesheets4)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TTR)
library(imputeTS)
library(tseries)
## 
## Attaching package: 'tseries'
## The following object is masked from 'package:imputeTS':
## 
##     na.remove
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(graphics)
library(TSA)
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble  3.1.8     ✔ purrr   0.3.4
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ readr::spec()   masks TSA::spec()
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
##   method                 from    
##   autoplot.Arima         forecast
##   autoplot.acf           forecast
##   autoplot.ar            forecast
##   autoplot.bats          forecast
##   autoplot.decomposed.ts forecast
##   autoplot.ets           forecast
##   autoplot.forecast      forecast
##   autoplot.stl           forecast
##   autoplot.ts            forecast
##   fitted.ar              forecast
##   fortify.ts             forecast
##   residuals.ar           forecast
library(cowplot)
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
library(graphics)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following object is masked from 'package:imputeTS':
## 
##     na.locf
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(stats)
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(googlesheets4)
library(tseries)
library(forecast)
library(TTR)
library(TSA)
library(imputeTS)
library(ggplot2)
library(lmtest)
library(FinTS)
## 
## Attaching package: 'FinTS'
## 
## The following object is masked from 'package:forecast':
## 
##     Acf

IMPORT DATA

dataemas <- read.csv("C:/Berliana/Materi Kuliah/Semester 5/Metode Peramalan Deret Waktu/Data Emas Fixx.csv")
ts.dataemas <- ts(dataemas)
kableExtra::kable(head(dataemas) ,caption = 'Data Rata-Rata Harga Emas Global  2010-2022')
Data Rata-Rata Harga Emas Global 2010-2022
Waktu USD
2010-01-29 1078.50
2010-02-26 1108.25
2010-03-31 1115.50
2010-04-30 1179.25
2010-05-31 1207.50
2010-06-30 1244.00
str(dataemas)
## 'data.frame':    146 obs. of  2 variables:
##  $ Waktu: chr  "2010-01-29" "2010-02-26" "2010-03-31" "2010-04-30" ...
##  $ USD  : num  1078 1108 1116 1179 1208 ...

Data yang digunakan bersumber dari kaggle.com. Data ini merupakan data harga emas per bulan di dunia selama 12 tahun terakhir dimulai pada periode 29 Januari 2010 hingga 28 Februari 2022 dengan jumlah observasi adalah 146. Data harga emas tersebut menggunakan satuan USD.

B. EKSPLORASI DATA

Spliting Data

Membagi data ke dalam data latih (data training) dan data uji (data testing) sebelum memproses data lebih lanjut. Data training diambil dari pengamatan data ke-1 sampai ke-113 dengan periode 29 Januari 2010 sampai 31 Mei 2019. Sedangkan data testing diambil dari sisa data yaitu dari pengamatan 114 sampai 146 dengan periode dari 28 Juni 2019 sampai 28 Februari 2022.

y <- dataemas$USD
training<-y[1:113]
testing<-y[114:146]

training.ts<-ts(training)
testing.ts<-ts(testing,start=118)
plot(dataemas$USD, col="red",main="Plot Penjualan rata-rata harga emas")
points(dataemas)

plot(training.ts, col="blue",main="Plot Data Training")
points(training.ts)

plot(testing.ts, col="blue",main="Plot data Testing")
points(testing.ts)

p=0.77
freq_train=as.integer(p*nrow(dataemas))
dataemas$Waktu <- as.Date(dataemas$Waktu)
ggplot(dataemas, aes(x=Waktu, y=USD)) +
  geom_line() + theme_minimal()+
  scale_x_date(date_breaks = "12 month", date_labels = "%Y %b %d") +
  labs(title = "Plot Time Series Rata-Rata Harga Emas 2010-2022",
       subtitle = "(Januari 2010 sd Februari 2022)",
       y="USD") +
  geom_vline(aes(xintercept = Waktu[freq_train], 
                 col="Frequency_Train_Data"), lty=2, lwd=.7) +
    theme(plot.title =  element_text(face = "bold", hjust=.5),
        plot.subtitle = element_text(hjust=.5),
        legend.position = "bottom") +
  scale_color_manual(name = "", values = c(Frequency_Train_Data="red"))  + scale_x_date()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

C. Cek kestasioneran data

Secara Eksploratif

Secara eksploratif, kestasioneran data dapat terlihat dari plot deret waktu atau melihat plot acf dari peubah yang akan diuji. Jika pada plot deret waktu menunjukan pola tren naik/turun pada kurun waktu tertentu (tidak stabil), atau, jika plot acf menunjukkan pola tails off slowly, maka data tersebut tidak stasioner.

plot.ts(training.ts, ylab=expression(Y[t]), main = "Plot Time Series Data Penjualan Emas Bulanan")

acf(training.ts)

adf.test(training.ts)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  training.ts
## Dickey-Fuller = -2.3175, Lag order = 4, p-value = 0.4448
## alternative hypothesis: stationary

D. Differencing

y_diff <- diff(training.ts, differences = 1)
y_diff
## Time Series:
## Start = 2 
## End = 113 
## Frequency = 1 
##   [1]   29.75    7.25   63.75   28.25   36.50  -75.00   77.00   61.00   39.75
##  [10]   36.75   22.00  -78.50   84.00   28.00   96.50    1.00  -31.00  123.00
##  [19]  185.00 -193.50  102.00   24.00 -215.00  213.00   26.00 -107.50  -11.25
##  [28]  -93.25   40.50   23.50   26.50  127.50  -57.00    7.00  -68.50    7.25
##  [37]  -76.25    9.75 -129.25  -74.50 -202.50  122.50   80.25  -68.25   -2.50
##  [46]  -71.00  -48.50   46.50   75.50  -34.75   -3.25  -38.00   64.50  -29.75
##  [55]    0.50  -69.25  -52.25   18.50   23.25   54.25  -46.25  -27.00   -6.75
##  [64]   11.15  -20.40  -72.60   36.60  -21.00   28.35  -80.45   -1.90   51.80
##  [73]  123.10    2.10   48.65  -73.55  108.65   21.25  -32.75   13.25  -50.50
##  [82]  -93.90  -32.20   66.90   42.80  -10.75   21.60   -0.25  -23.95   25.30
##  [91]   44.20  -28.65  -12.95   10.05   10.80   54.05  -27.20    6.00  -10.65
## [100]   -7.85  -54.90  -29.50  -18.50  -15.20   27.70    2.60   61.45   44.25
## [109]   -4.10  -23.75  -13.10   13.25

Cek ulang kestasioneran data

adf.test(y_diff)
## Warning in adf.test(y_diff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y_diff
## Dickey-Fuller = -4.9178, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

uji Augmented Dickey-Fuller (ADF) menghasilkan p-value sebesar 0,01 di mana lebih kecil dari 𝛼 = 0,05 sehingga dikatakan tolak H0. Oleh karena itu, dapat diambil keputusan bahwa data telah stasioner.

table(y_diff)
## y_diff
##              -215            -202.5            -193.5           -129.25 
##                 1                 1                 1                 1 
##            -107.5 -93.9000000000001            -93.25 -80.4499999999998 
##                 1                 1                 1                 1 
##             -78.5            -76.25               -75             -74.5 
##                 1                 1                 1                 1 
## -73.5500000000002 -72.5999999999999               -71            -69.25 
##                 1                 1                 1                 1 
##             -68.5            -68.25               -57 -54.8999999999999 
##                 1                 1                 1                 1 
##            -52.25             -50.5             -48.5            -46.25 
##                 1                 1                 1                 1 
##               -38            -34.75            -32.75 -32.1999999999998 
##                 1                 1                 1                 1 
##               -31            -29.75             -29.5 -28.6500000000001 
##                 1                 1                 1                 1 
##             -27.2               -27            -23.95            -23.75 
##                 1                 1                 1                 1 
##               -21 -20.4000000000001             -18.5             -15.2 
##                 1                 1                 1                 1 
## -13.1000000000001 -12.9499999999998            -11.25            -10.75 
##                 1                 1                 1                 1 
## -10.6499999999999 -7.85000000000014             -6.75 -4.09999999999991 
##                 1                 1                 1                 1 
##             -3.25              -2.5 -1.90000000000009             -0.25 
##                 1                 1                 1                 1 
##               0.5                 1  2.09999999999991  2.59999999999991 
##                 1                 1                 1                 1 
##                 6                 7              7.25              9.75 
##                 1                 1                 2                 1 
##             10.05              10.8  11.1500000000001             13.25 
##                 1                 1                 1                 2 
##              18.5             21.25  21.6000000000001                22 
##                 1                 1                 1                 1 
##             23.25              23.5                24              25.3 
##                 1                 1                 1                 1 
##                26              26.5              27.7                28 
##                 1                 1                 1                 1 
##             28.25  28.3499999999999             29.75              36.5 
##                 1                 1                 1                 1 
##  36.5999999999999             36.75             39.75              40.5 
##                 1                 1                 1                 1 
##              42.8              44.2             44.25              46.5 
##                 1                 1                 1                 1 
##  48.6500000000001              51.8             54.05             54.25 
##                 1                 1                 1                 1 
##                61             61.45             63.75              64.5 
##                 1                 1                 1                 1 
##  66.8999999999999              75.5                77             80.25 
##                 1                 1                 1                 1 
##                84              96.5               102            108.65 
##                 1                 1                 1                 1 
##             122.5               123             123.1             127.5 
##                 1                 1                 1                 1 
##               185               213 
##                 1                 1
plot(y_diff, col="blue")
points(y_diff)

E. Identifikasi model Arima

Untuk mengidentifikasi model arima, nilai ACF dan PACF dapat digunakan untuk menentukan nilai q pada nilai MA(q) dan nilai p pada model AR(p). Namun, kedua nilai ini p dan q pada model campuran ARMA(p,q). Oleh karena itu, dikembangkan metode extended autocorrelation function (EACF) untuk mengindentifikasi model campuran ARMA(p,q). Pada tabel EACF, triangle of zeros akan terbentuk, dan nilai pada pojok kiri atas akan bersesuaian dengan ordo ARMA.

ACF

acf(y_diff)

PACF

pacf(y_diff)

EACF

eacf(y_diff)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o o o o x o o o o x  o  o  o 
## 1 x o o o o x o o o o x  o  o  o 
## 2 x o o o o x o o o o x  o  o  o 
## 3 x x o o o o o o o o x  o  o  o 
## 4 x x o x o o o o o o x  o  o  o 
## 5 o x x x o o o o o o x  o  o  o 
## 6 o o x o x o o o o o x  o  o  o 
## 7 x o x x o o o o o o o  o  o  o

Pemilihan Model Terbaik

arima011 <- Arima(training.ts, order=c(0,1,1), include.drift=TRUE)
arima111 <- Arima(training.ts, order=c(1,1,1), include.drift=TRUE)

Keakuratan Model

akurasi_model = data.frame("Model" = c("ARIMA(0,1,1)","ARIMA(1,1,1)"),
                        "AIC" = c(arima011$aic,  arima111$aic),
                       "BIC" = c(arima011$bic,  arima111$bic))
akurasi_model[order(akurasi_model[,3]),]
##          Model      AIC      BIC
## 1 ARIMA(0,1,1) 1264.731 1272.887
## 2 ARIMA(1,1,1) 1266.591 1277.465

Terdapat dua model tentatif yang terbentuk, yaitu ARIMA(0,1,1) dan ARIMA(1,1,1). Apabila dilakukan perbandingan model berdasarkan nilai AIC, maka ARIMA(1,1,1) memiliki nilai AIC rendah sebesar 1264.591.

Signifikasi Parameter

coeftest(arima011)
## 
## z test of coefficients:
## 
##        Estimate Std. Error z value Pr(>|z|)  
## ma1   -0.179786   0.096116 -1.8705  0.06141 .
## drift  1.876603   5.179953  0.3623  0.71714  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(arima111)
## 
## z test of coefficients:
## 
##       Estimate Std. Error z value Pr(>|z|)
## ar1    0.15106    0.39487  0.3826   0.7020
## ma1   -0.32438    0.37387 -0.8676   0.3856
## drift  1.86939    5.02688  0.3719   0.7100

Perbandingan Ramalan Dua Model Terbaik

ramalan011 <- forecast(arima011, h = 12)
ramalan111 <- forecast(arima111, h = 12)
plot(ramalan011); lines(testing.ts)

plot(ramalan111); lines(testing.ts)

Nilai Akurasi

akurasi011 <- accuracy(ramalan011, testing.ts)
akurasi111 <- accuracy(ramalan111, testing.ts)

akurasi_model = data.frame("Model" = c("ARIMA(0,1,1)", "ARIMA(1,1,1)"),
                        "RMSE" = c(akurasi011[2,2],akurasi111[2,2]),
                       "MAPE" = c(akurasi011[2,5],akurasi111[2,5]))
akurasi_model[order(akurasi_model[,3]),]
##          Model     RMSE     MAPE
## 2 ARIMA(1,1,1) 186.3628 11.94038
## 1 ARIMA(0,1,1) 186.8153 11.97220

F. Overfitting

arima011 <- Arima(training.ts, order=c(0,1,1), include.drift=TRUE)
summary(arima011)
## Series: training.ts 
## ARIMA(0,1,1) with drift 
## 
## Coefficients:
##           ma1   drift
##       -0.1798  1.8766
## s.e.   0.0961  5.1800
## 
## sigma^2 = 4530:  log likelihood = -629.37
## AIC=1264.73   AICc=1264.95   BIC=1272.89
## 
## Training set error measures:
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 0.06272055 66.40733 49.48094 -0.1065918 3.635735 0.9968908
##                    ACF1
## Training set 0.00685804
coeftest(arima011)
## 
## z test of coefficients:
## 
##        Estimate Std. Error z value Pr(>|z|)  
## ma1   -0.179786   0.096116 -1.8705  0.06141 .
## drift  1.876603   5.179953  0.3623  0.71714  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
arima012 <- Arima(training.ts, order=c(0,1,2), include.drift=TRUE)
summary(arima012)
## Series: training.ts 
## ARIMA(0,1,2) with drift 
## 
## Coefficients:
##           ma1      ma2   drift
##       -0.1634  -0.0530  1.8571
## s.e.   0.0976   0.1023  4.9489
## 
## sigma^2 = 4561:  log likelihood = -629.23
## AIC=1266.46   AICc=1266.84   BIC=1277.34
## 
## Training set error measures:
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 0.09601532 66.32728 49.01529 -0.1072558 3.601811 0.9875094
##                      ACF1
## Training set -0.005448149
coeftest(arima012)
## 
## z test of coefficients:
## 
##        Estimate Std. Error z value Pr(>|z|)  
## ma1   -0.163369   0.097604 -1.6738  0.09417 .
## ma2   -0.052962   0.102251 -0.5180  0.60448  
## drift  1.857092   4.948902  0.3753  0.70747  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
akurasi_ovrfit = data.frame("Model" = c("ARIMA(0,1,1)", "ARIMA(0,1,2)"),
                        "AIC" = c(arima011$aic, arima012$aic),
                       "BIC" = c(arima011$bic, arima012$bic))
akurasi_ovrfit[order(akurasi_ovrfit[,3]),]
##          Model      AIC      BIC
## 1 ARIMA(0,1,1) 1264.731 1272.887
## 2 ARIMA(0,1,2) 1266.464 1277.338

G. Uji Diagnostik Model

Eksplorasi Plot

best_model <- arima011
sisaan <- best_model$residuals
par(mfrow=c(2,2))
qqnorm(sisaan)
qqline(sisaan, col = "blue", lwd = 2)
plot(as.numeric(best_model$fitted), as.numeric(sisaan), xlab="Fitted", ylab="Sisaan", 
     ylim=c(-500,500), main="Residuals vs Fitted"); abline(h=0, col="red")
acf(sisaan)
plot(1:length(sisaan),sisaan,type='o', xlab="Order", ylim=c(-500,500),
     main="Residuals vs Order"); abline(h=0, col="red")

Uji Formal

ks.test(sisaan,"pnorm")
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  sisaan
## D = 0.49541, p-value < 2.2e-16
## alternative hypothesis: two-sided
Box.test(sisaan,type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  sisaan
## X-squared = 0.0054571, df = 1, p-value = 0.9411
t.test(sisaan, mu = 0, conf.level=0.95)
## 
##  One Sample t-test
## 
## data:  sisaan
## t = 0.0099955, df = 112, p-value = 0.992
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -12.37019  12.49563
## sample estimates:
##  mean of x 
## 0.06272055
ramalan_best <- forecast(best_model, h = 12)
plot(ramalan_best); lines(training.ts)

plot(ramalan_best); lines(testing.ts)

H. Validasi Model

accuracy(ramalan_best, testing.ts)
##                        ME      RMSE       MAE        MPE      MAPE      MASE
## Training set   0.06272055  66.40733  49.48094 -0.1065918  3.635735 0.9968908
## Test set     179.95189765 186.81528 179.95190 11.9722024 11.972202 3.6254846
##                    ACF1 Theil's U
## Training set 0.00685804        NA
## Test set     0.10614719  3.352309

Terlihat nilai MAPE berada di nilai 11.972%. Dimana apabila nilai MAPE dalam rentang 10% hingga 20%, maka kemampuan model peramalan dikatakan baik.

I. Hasil Peramalan Model ARIMA(0,1,1)

Periode peramalan yang dilakukan dengan jumlah 6 periode ke depan. Dapat dilihat jika berdasarkan hasil peramalan tersebut terdapat kenaikan harga emas dunia untuk 6 periode ke depan.

ts.dataemas <- as.ts(dataemas$USD)
dataemas.real <- Arima(ts.dataemas, order = c(0,1,1), include.drift = TRUE)
ramalanall <- forecast::forecast(ts.dataemas, model=dataemas.real, h= 6)
ramalanall
##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 147       1897.224 1807.766 1986.682 1760.410 2034.038
## 148       1902.801 1786.616 2018.987 1725.111 2080.491
## 149       1908.378 1770.554 2046.202 1697.595 2119.161
## 150       1913.955 1757.457 2070.453 1674.611 2153.299
## 151       1919.532 1746.361 2092.703 1654.690 2184.374
## 152       1925.109 1736.736 2113.482 1637.017 2213.201
data.ramalanall <- ramalanall$mean
plot(ramalanall, xlab="Time", ylab="Harga Emas Dunia", main="Plot Ramalan Metode ARIMA(0,1,1)")