library(googlesheets4)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(TTR)
library(imputeTS)
library(tseries)
##
## Attaching package: 'tseries'
## The following object is masked from 'package:imputeTS':
##
## na.remove
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(graphics)
library(TSA)
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ purrr 0.3.4
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ readr::spec() masks TSA::spec()
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
## method from
## autoplot.Arima forecast
## autoplot.acf forecast
## autoplot.ar forecast
## autoplot.bats forecast
## autoplot.decomposed.ts forecast
## autoplot.ets forecast
## autoplot.forecast forecast
## autoplot.stl forecast
## autoplot.ts forecast
## fitted.ar forecast
## fortify.ts forecast
## residuals.ar forecast
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(graphics)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following object is masked from 'package:imputeTS':
##
## na.locf
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(stats)
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(googlesheets4)
library(tseries)
library(forecast)
library(TTR)
library(TSA)
library(imputeTS)
library(ggplot2)
library(lmtest)
library(FinTS)
##
## Attaching package: 'FinTS'
##
## The following object is masked from 'package:forecast':
##
## Acf
dataemas <- read.csv("C:/Berliana/Materi Kuliah/Semester 5/Metode Peramalan Deret Waktu/Data Emas Fixx.csv")
ts.dataemas <- ts(dataemas)
kableExtra::kable(head(dataemas) ,caption = 'Data Rata-Rata Harga Emas Global 2010-2022')
| Waktu | USD |
|---|---|
| 2010-01-29 | 1078.50 |
| 2010-02-26 | 1108.25 |
| 2010-03-31 | 1115.50 |
| 2010-04-30 | 1179.25 |
| 2010-05-31 | 1207.50 |
| 2010-06-30 | 1244.00 |
str(dataemas)
## 'data.frame': 146 obs. of 2 variables:
## $ Waktu: chr "2010-01-29" "2010-02-26" "2010-03-31" "2010-04-30" ...
## $ USD : num 1078 1108 1116 1179 1208 ...
Data yang digunakan bersumber dari kaggle.com. Data ini merupakan data harga emas per bulan di dunia selama 12 tahun terakhir dimulai pada periode 29 Januari 2010 hingga 28 Februari 2022 dengan jumlah observasi adalah 146. Data harga emas tersebut menggunakan satuan USD.
Membagi data ke dalam data latih (data training) dan data uji (data testing) sebelum memproses data lebih lanjut. Data training diambil dari pengamatan data ke-1 sampai ke-113 dengan periode 29 Januari 2010 sampai 31 Mei 2019. Sedangkan data testing diambil dari sisa data yaitu dari pengamatan 114 sampai 146 dengan periode dari 28 Juni 2019 sampai 28 Februari 2022.
y <- dataemas$USD
training<-y[1:113]
testing<-y[114:146]
training.ts<-ts(training)
testing.ts<-ts(testing,start=118)
plot(dataemas$USD, col="red",main="Plot Penjualan rata-rata harga emas")
points(dataemas)
plot(training.ts, col="blue",main="Plot Data Training")
points(training.ts)
plot(testing.ts, col="blue",main="Plot data Testing")
points(testing.ts)
p=0.77
freq_train=as.integer(p*nrow(dataemas))
dataemas$Waktu <- as.Date(dataemas$Waktu)
ggplot(dataemas, aes(x=Waktu, y=USD)) +
geom_line() + theme_minimal()+
scale_x_date(date_breaks = "12 month", date_labels = "%Y %b %d") +
labs(title = "Plot Time Series Rata-Rata Harga Emas 2010-2022",
subtitle = "(Januari 2010 sd Februari 2022)",
y="USD") +
geom_vline(aes(xintercept = Waktu[freq_train],
col="Frequency_Train_Data"), lty=2, lwd=.7) +
theme(plot.title = element_text(face = "bold", hjust=.5),
plot.subtitle = element_text(hjust=.5),
legend.position = "bottom") +
scale_color_manual(name = "", values = c(Frequency_Train_Data="red")) + scale_x_date()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
Secara eksploratif, kestasioneran data dapat terlihat dari plot deret waktu atau melihat plot acf dari peubah yang akan diuji. Jika pada plot deret waktu menunjukan pola tren naik/turun pada kurun waktu tertentu (tidak stabil), atau, jika plot acf menunjukkan pola tails off slowly, maka data tersebut tidak stasioner.
plot.ts(training.ts, ylab=expression(Y[t]), main = "Plot Time Series Data Penjualan Emas Bulanan")
acf(training.ts)
adf.test(training.ts)
##
## Augmented Dickey-Fuller Test
##
## data: training.ts
## Dickey-Fuller = -2.3175, Lag order = 4, p-value = 0.4448
## alternative hypothesis: stationary
y_diff <- diff(training.ts, differences = 1)
y_diff
## Time Series:
## Start = 2
## End = 113
## Frequency = 1
## [1] 29.75 7.25 63.75 28.25 36.50 -75.00 77.00 61.00 39.75
## [10] 36.75 22.00 -78.50 84.00 28.00 96.50 1.00 -31.00 123.00
## [19] 185.00 -193.50 102.00 24.00 -215.00 213.00 26.00 -107.50 -11.25
## [28] -93.25 40.50 23.50 26.50 127.50 -57.00 7.00 -68.50 7.25
## [37] -76.25 9.75 -129.25 -74.50 -202.50 122.50 80.25 -68.25 -2.50
## [46] -71.00 -48.50 46.50 75.50 -34.75 -3.25 -38.00 64.50 -29.75
## [55] 0.50 -69.25 -52.25 18.50 23.25 54.25 -46.25 -27.00 -6.75
## [64] 11.15 -20.40 -72.60 36.60 -21.00 28.35 -80.45 -1.90 51.80
## [73] 123.10 2.10 48.65 -73.55 108.65 21.25 -32.75 13.25 -50.50
## [82] -93.90 -32.20 66.90 42.80 -10.75 21.60 -0.25 -23.95 25.30
## [91] 44.20 -28.65 -12.95 10.05 10.80 54.05 -27.20 6.00 -10.65
## [100] -7.85 -54.90 -29.50 -18.50 -15.20 27.70 2.60 61.45 44.25
## [109] -4.10 -23.75 -13.10 13.25
adf.test(y_diff)
## Warning in adf.test(y_diff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: y_diff
## Dickey-Fuller = -4.9178, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
uji Augmented Dickey-Fuller (ADF) menghasilkan p-value sebesar 0,01 di mana lebih kecil dari 𝛼 = 0,05 sehingga dikatakan tolak H0. Oleh karena itu, dapat diambil keputusan bahwa data telah stasioner.
table(y_diff)
## y_diff
## -215 -202.5 -193.5 -129.25
## 1 1 1 1
## -107.5 -93.9000000000001 -93.25 -80.4499999999998
## 1 1 1 1
## -78.5 -76.25 -75 -74.5
## 1 1 1 1
## -73.5500000000002 -72.5999999999999 -71 -69.25
## 1 1 1 1
## -68.5 -68.25 -57 -54.8999999999999
## 1 1 1 1
## -52.25 -50.5 -48.5 -46.25
## 1 1 1 1
## -38 -34.75 -32.75 -32.1999999999998
## 1 1 1 1
## -31 -29.75 -29.5 -28.6500000000001
## 1 1 1 1
## -27.2 -27 -23.95 -23.75
## 1 1 1 1
## -21 -20.4000000000001 -18.5 -15.2
## 1 1 1 1
## -13.1000000000001 -12.9499999999998 -11.25 -10.75
## 1 1 1 1
## -10.6499999999999 -7.85000000000014 -6.75 -4.09999999999991
## 1 1 1 1
## -3.25 -2.5 -1.90000000000009 -0.25
## 1 1 1 1
## 0.5 1 2.09999999999991 2.59999999999991
## 1 1 1 1
## 6 7 7.25 9.75
## 1 1 2 1
## 10.05 10.8 11.1500000000001 13.25
## 1 1 1 2
## 18.5 21.25 21.6000000000001 22
## 1 1 1 1
## 23.25 23.5 24 25.3
## 1 1 1 1
## 26 26.5 27.7 28
## 1 1 1 1
## 28.25 28.3499999999999 29.75 36.5
## 1 1 1 1
## 36.5999999999999 36.75 39.75 40.5
## 1 1 1 1
## 42.8 44.2 44.25 46.5
## 1 1 1 1
## 48.6500000000001 51.8 54.05 54.25
## 1 1 1 1
## 61 61.45 63.75 64.5
## 1 1 1 1
## 66.8999999999999 75.5 77 80.25
## 1 1 1 1
## 84 96.5 102 108.65
## 1 1 1 1
## 122.5 123 123.1 127.5
## 1 1 1 1
## 185 213
## 1 1
plot(y_diff, col="blue")
points(y_diff)
Untuk mengidentifikasi model arima, nilai ACF dan PACF dapat digunakan untuk menentukan nilai q pada nilai MA(q) dan nilai p pada model AR(p). Namun, kedua nilai ini p dan q pada model campuran ARMA(p,q). Oleh karena itu, dikembangkan metode extended autocorrelation function (EACF) untuk mengindentifikasi model campuran ARMA(p,q). Pada tabel EACF, triangle of zeros akan terbentuk, dan nilai pada pojok kiri atas akan bersesuaian dengan ordo ARMA.
acf(y_diff)
pacf(y_diff)
eacf(y_diff)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o o o o x o o o o x o o o
## 1 x o o o o x o o o o x o o o
## 2 x o o o o x o o o o x o o o
## 3 x x o o o o o o o o x o o o
## 4 x x o x o o o o o o x o o o
## 5 o x x x o o o o o o x o o o
## 6 o o x o x o o o o o x o o o
## 7 x o x x o o o o o o o o o o
arima011 <- Arima(training.ts, order=c(0,1,1), include.drift=TRUE)
arima111 <- Arima(training.ts, order=c(1,1,1), include.drift=TRUE)
akurasi_model = data.frame("Model" = c("ARIMA(0,1,1)","ARIMA(1,1,1)"),
"AIC" = c(arima011$aic, arima111$aic),
"BIC" = c(arima011$bic, arima111$bic))
akurasi_model[order(akurasi_model[,3]),]
## Model AIC BIC
## 1 ARIMA(0,1,1) 1264.731 1272.887
## 2 ARIMA(1,1,1) 1266.591 1277.465
Terdapat dua model tentatif yang terbentuk, yaitu ARIMA(0,1,1) dan ARIMA(1,1,1). Apabila dilakukan perbandingan model berdasarkan nilai AIC, maka ARIMA(1,1,1) memiliki nilai AIC rendah sebesar 1264.591.
coeftest(arima011)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.179786 0.096116 -1.8705 0.06141 .
## drift 1.876603 5.179953 0.3623 0.71714
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(arima111)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.15106 0.39487 0.3826 0.7020
## ma1 -0.32438 0.37387 -0.8676 0.3856
## drift 1.86939 5.02688 0.3719 0.7100
ramalan011 <- forecast(arima011, h = 12)
ramalan111 <- forecast(arima111, h = 12)
plot(ramalan011); lines(testing.ts)
plot(ramalan111); lines(testing.ts)
akurasi011 <- accuracy(ramalan011, testing.ts)
akurasi111 <- accuracy(ramalan111, testing.ts)
akurasi_model = data.frame("Model" = c("ARIMA(0,1,1)", "ARIMA(1,1,1)"),
"RMSE" = c(akurasi011[2,2],akurasi111[2,2]),
"MAPE" = c(akurasi011[2,5],akurasi111[2,5]))
akurasi_model[order(akurasi_model[,3]),]
## Model RMSE MAPE
## 2 ARIMA(1,1,1) 186.3628 11.94038
## 1 ARIMA(0,1,1) 186.8153 11.97220
arima011 <- Arima(training.ts, order=c(0,1,1), include.drift=TRUE)
summary(arima011)
## Series: training.ts
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## -0.1798 1.8766
## s.e. 0.0961 5.1800
##
## sigma^2 = 4530: log likelihood = -629.37
## AIC=1264.73 AICc=1264.95 BIC=1272.89
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.06272055 66.40733 49.48094 -0.1065918 3.635735 0.9968908
## ACF1
## Training set 0.00685804
coeftest(arima011)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.179786 0.096116 -1.8705 0.06141 .
## drift 1.876603 5.179953 0.3623 0.71714
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
arima012 <- Arima(training.ts, order=c(0,1,2), include.drift=TRUE)
summary(arima012)
## Series: training.ts
## ARIMA(0,1,2) with drift
##
## Coefficients:
## ma1 ma2 drift
## -0.1634 -0.0530 1.8571
## s.e. 0.0976 0.1023 4.9489
##
## sigma^2 = 4561: log likelihood = -629.23
## AIC=1266.46 AICc=1266.84 BIC=1277.34
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.09601532 66.32728 49.01529 -0.1072558 3.601811 0.9875094
## ACF1
## Training set -0.005448149
coeftest(arima012)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.163369 0.097604 -1.6738 0.09417 .
## ma2 -0.052962 0.102251 -0.5180 0.60448
## drift 1.857092 4.948902 0.3753 0.70747
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
akurasi_ovrfit = data.frame("Model" = c("ARIMA(0,1,1)", "ARIMA(0,1,2)"),
"AIC" = c(arima011$aic, arima012$aic),
"BIC" = c(arima011$bic, arima012$bic))
akurasi_ovrfit[order(akurasi_ovrfit[,3]),]
## Model AIC BIC
## 1 ARIMA(0,1,1) 1264.731 1272.887
## 2 ARIMA(0,1,2) 1266.464 1277.338
best_model <- arima011
sisaan <- best_model$residuals
par(mfrow=c(2,2))
qqnorm(sisaan)
qqline(sisaan, col = "blue", lwd = 2)
plot(as.numeric(best_model$fitted), as.numeric(sisaan), xlab="Fitted", ylab="Sisaan",
ylim=c(-500,500), main="Residuals vs Fitted"); abline(h=0, col="red")
acf(sisaan)
plot(1:length(sisaan),sisaan,type='o', xlab="Order", ylim=c(-500,500),
main="Residuals vs Order"); abline(h=0, col="red")
ks.test(sisaan,"pnorm")
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: sisaan
## D = 0.49541, p-value < 2.2e-16
## alternative hypothesis: two-sided
Box.test(sisaan,type = "Ljung-Box")
##
## Box-Ljung test
##
## data: sisaan
## X-squared = 0.0054571, df = 1, p-value = 0.9411
t.test(sisaan, mu = 0, conf.level=0.95)
##
## One Sample t-test
##
## data: sisaan
## t = 0.0099955, df = 112, p-value = 0.992
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -12.37019 12.49563
## sample estimates:
## mean of x
## 0.06272055
ramalan_best <- forecast(best_model, h = 12)
plot(ramalan_best); lines(training.ts)
plot(ramalan_best); lines(testing.ts)
accuracy(ramalan_best, testing.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.06272055 66.40733 49.48094 -0.1065918 3.635735 0.9968908
## Test set 179.95189765 186.81528 179.95190 11.9722024 11.972202 3.6254846
## ACF1 Theil's U
## Training set 0.00685804 NA
## Test set 0.10614719 3.352309
Terlihat nilai MAPE berada di nilai 11.972%. Dimana apabila nilai MAPE dalam rentang 10% hingga 20%, maka kemampuan model peramalan dikatakan baik.
Periode peramalan yang dilakukan dengan jumlah 6 periode ke depan. Dapat dilihat jika berdasarkan hasil peramalan tersebut terdapat kenaikan harga emas dunia untuk 6 periode ke depan.
ts.dataemas <- as.ts(dataemas$USD)
dataemas.real <- Arima(ts.dataemas, order = c(0,1,1), include.drift = TRUE)
ramalanall <- forecast::forecast(ts.dataemas, model=dataemas.real, h= 6)
ramalanall
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 147 1897.224 1807.766 1986.682 1760.410 2034.038
## 148 1902.801 1786.616 2018.987 1725.111 2080.491
## 149 1908.378 1770.554 2046.202 1697.595 2119.161
## 150 1913.955 1757.457 2070.453 1674.611 2153.299
## 151 1919.532 1746.361 2092.703 1654.690 2184.374
## 152 1925.109 1736.736 2113.482 1637.017 2213.201
data.ramalanall <- ramalanall$mean
plot(ramalanall, xlab="Time", ylab="Harga Emas Dunia", main="Plot Ramalan Metode ARIMA(0,1,1)")