Library
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2 3.3.5 ✓ fma 2.4
## ✓ forecast 8.16 ✓ expsmooth 2.3
##
library(tseries)
library(forecast)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:fma':
##
## pigs
library(quantmod)
## Loading required package: TTR
library(ggplot2)
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
## method from
## autoplot.Arima forecast
## autoplot.acf forecast
## autoplot.ar forecast
## autoplot.bats forecast
## autoplot.decomposed.ts forecast
## autoplot.ets forecast
## autoplot.forecast forecast
## autoplot.stl forecast
## autoplot.ts forecast
## fitted.ar forecast
## fortify.ts forecast
## residuals.ar forecast
library(faraway)
##
## Attaching package: 'faraway'
## The following object is masked from 'package:GGally':
##
## happy
## The following objects are masked from 'package:fma':
##
## airpass, eggs, ozone, wheat
library(MASS)
##
## Attaching package: 'MASS'
## The following objects are masked from 'package:fma':
##
## cement, housing, petrol
library(FitAR)
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:faraway':
##
## melanoma
## Loading required package: leaps
## Loading required package: ltsa
## Loading required package: bestglm
##
## Attaching package: 'FitAR'
## The following object is masked from 'package:forecast':
##
## BoxCox
library(MTS)
##
## Attaching package: 'MTS'
## The following object is masked from 'package:TTR':
##
## VMA
library(AID)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:FitAR':
##
## Boot
## The following objects are masked from 'package:faraway':
##
## logit, vif
library(seastests)
library(tseries)
library(stats)
library(lmtest)
Import Data
library(readxl)
LPGI<-read_excel("~/Downloads/LPGI.xlsx")
head(LPGI)
## # A tibble: 6 × 7
## Date Open High Low Close `Adj Close` Volume
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-01-02 00:00:00 5400 5400 5400 5400 3887. 0
## 2 2017-01-03 00:00:00 5400 5500 5400 5500 3959. 65100
## 3 2017-01-04 00:00:00 5500 5600 5400 5525 3977. 25700
## 4 2017-01-05 00:00:00 5525 5600 5500 5600 4031. 53100
## 5 2017-01-06 00:00:00 5600 5750 5575 5750 4139. 2500
## 6 2017-01-09 00:00:00 5750 5750 5475 5650 4067. 43400
Cleaning Data
LPGI[is.na(LPGI)] <- 0
LPGI$Date <- as.Date(LPGI$Date, format = "%Y-%m-%d")
summary(LPGI)
## Date Open High Low
## Min. :2017-01-02 Min. :2800 Min. :2900 Min. :2700
## 1st Qu.:2018-03-27 1st Qu.:3502 1st Qu.:3510 1st Qu.:3500
## Median :2019-06-11 Median :3860 Median :3890 Median :3800
## Mean :2019-06-22 Mean :4087 Mean :4111 Mean :4065
## 3rd Qu.:2020-09-16 3rd Qu.:4450 3rd Qu.:4500 3rd Qu.:4448
## Max. :2021-12-30 Max. :5950 Max. :5950 Max. :5950
## Close Adj Close Volume
## Min. :2900 Min. :2291 Min. : 0
## 1st Qu.:3510 1st Qu.:3078 1st Qu.: 0
## Median :3860 Median :3342 Median : 100
## Mean :4093 Mean :3401 Mean : 8321
## 3rd Qu.:4500 3rd Qu.:3671 3rd Qu.: 3375
## Max. :5950 Max. :5175 Max. :1000000
yt<-LPGI$Close
Plot data
plot.ts(yt)

fried(ts(yt, frequency=7))
## Test used: Friedman rank
##
## Test statistic: 2.34
## P-value: 0.8854967
Hasil 0.8854967>0.05 maka Data Harga Saham Closing LPGI tidak
bersifat seasonal
ts_exchangerate<-ts(yt, frequency=7)
d_exchangerate<-decompose(ts_exchangerate,"additive")
plot(d_exchangerate)

Stationary test (yt)
acf(yt, lag.max=60)

adf.test(yt,alternative="stationary")
##
## Augmented Dickey-Fuller Test
##
## data: yt
## Dickey-Fuller = -1.1737, Lag order = 10, p-value = 0.9107
## alternative hypothesis: stationary
Decomposed
library(forecast)
lpgidp<-(ts_exchangerate-d_exchangerate$seasonal)
autoplot(lpgidp)

Stationary test (data decomposed)
plot(lpgidp)

acf(lpgidp, lag.max=50)

pacf(lpgidp, lag.max=50)

adf.test(lpgidp,alternative="stationary")
##
## Augmented Dickey-Fuller Test
##
## data: lpgidp
## Dickey-Fuller = -1.1574, Lag order = 10, p-value = 0.9133
## alternative hypothesis: stationary
Split Data Training dan Testing
n<-length(lpgidp)
n1<-abs(0.70*n)
n2<-n-1
n3<-n1+1
train<-lpgidp[1:n1]
test<-lpgidp[n3:n]
data.ts<-ts(train)
head(data.ts)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## [1] 5413.725 5508.370 5520.473 5593.418 5743.299 5643.823
plot.ts(train , main="Train Data Set Time Series")

plot.ts(test , main="Test Data Set Time Series")

Stationary test (data train)
acf(train, lag.max=50)

pacf(train, lag.max=50)

archTest(train)
## Q(m) of squared series(LM test):
## Test statistic: 8260.081 p-value: 0
## Rank-based Test:
## Test statistic: 7600.462 p-value: 0
adf.test(train)
##
## Augmented Dickey-Fuller Test
##
## data: train
## Dickey-Fuller = -2.1236, Lag order = 9, p-value = 0.526
## alternative hypothesis: stationary
Differencing
diffy_box<-diff(y_box, differences=1)
plot.ts(diffy_box)

acf(diffy_box, lag.max=50)

pacf(diffy_box, lag.max=50)

Stationary Test (after differencing)
archTest(diffy_box)
## Q(m) of squared series(LM test):
## Test statistic: 154.5242 p-value: 0
## Rank-based Test:
## Test statistic: 335.869 p-value: 0
adf.test(diffy_box)
## Warning in adf.test(diffy_box): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diffy_box
## Dickey-Fuller = -11.491, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
Hasil ADF test, p value=0,01<0,05. Maka data sudah stasioner
sehingga dapat melakukan proses pemilihan tentative model.
Find the tentative model with differencing process
model1<-arima(y_box,order=c(1,1,0))
summary(model1)
##
## Call:
## arima(x = y_box, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## -0.1748
## s.e. 0.0332
##
## sigma^2 estimated as 11423: log likelihood = -5371.95, aic = 10747.9
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -2.060063 106.8179 42.15738 -0.08247877 0.9939015 1.047724
## ACF1
## Training set -0.01086235
coeftest(model1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.174790 0.033153 -5.2722 1.348e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model2<-arima(y_box,order=c(2,1,0))
summary(model2)
##
## Call:
## arima(x = y_box, order = c(2, 1, 0))
##
## Coefficients:
## ar1 ar2
## -0.1853 -0.0599
## s.e. 0.0336 0.0336
##
## sigma^2 estimated as 11382: log likelihood = -5370.36, aic = 10746.72
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -2.186534 106.6252 42.58087 -0.08710808 1.005876 1.058249
## ACF1
## Training set 0.004999031
coeftest(model2)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.185270 0.033615 -5.5115 3.558e-08 ***
## ar2 -0.059938 0.033591 -1.7844 0.07437 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model3<-arima(y_box,order=c(3,1,0))
summary(model3)
##
## Call:
## arima(x = y_box, order = c(3, 1, 0))
##
## Coefficients:
## ar1 ar2 ar3
## -0.1798 -0.0431 0.0905
## s.e. 0.0335 0.0340 0.0335
##
## sigma^2 estimated as 11288: log likelihood = -5366.72, aic = 10741.45
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1.984303 106.1849 43.37521 -0.07925854 1.025416 1.07799
## ACF1
## Training set 0.003838781
coeftest(model3)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.179768 0.033538 -5.3602 8.312e-08 ***
## ar2 -0.043097 0.034027 -1.2665 0.20532
## ar3 0.090550 0.033499 2.7031 0.00687 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model4<-arima(y_box,order=c(8,1,0))
summary(model4)
##
## Call:
## arima(x = y_box, order = c(8, 1, 0))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## -0.1881 -0.0509 0.0795 -0.0528 -0.0678 -0.0840 -0.0834 -0.0098
## s.e. 0.0337 0.0341 0.0341 0.0341 0.0341 0.0341 0.0341 0.0336
##
## sigma^2 estimated as 11102: log likelihood = -5359.46, aic = 10736.92
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -2.536977 105.308 45.32421 -0.09799613 1.076866 1.126428
## ACF1
## Training set -0.0009370291
coeftest(model4)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.1881275 0.0336746 -5.5866 2.315e-08 ***
## ar2 -0.0508723 0.0341330 -1.4904 0.13612
## ar3 0.0794937 0.0340601 2.3339 0.01960 *
## ar4 -0.0528374 0.0341047 -1.5493 0.12132
## ar5 -0.0678337 0.0340987 -1.9893 0.04666 *
## ar6 -0.0839572 0.0340560 -2.4653 0.01369 *
## ar7 -0.0834173 0.0341229 -2.4446 0.01450 *
## ar8 -0.0098127 0.0336464 -0.2916 0.77056
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Berdasarkan dari 4 tentative model, hasil AIC terkecil adalah
10736.92. Maka model ARIMA yang dipilih adalah ARIMA(8,1,0).
Check Residual
checkresiduals(model1)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)
## Q* = 26.524, df = 9, p-value = 0.001676
##
## Model df: 1. Total lags used: 10
checkresiduals(model2)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0)
## Q* = 25.089, df = 8, p-value = 0.001502
##
## Model df: 2. Total lags used: 10
checkresiduals(model3)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,1,0)
## Q* = 15.281, df = 7, p-value = 0.03256
##
## Model df: 3. Total lags used: 10
checkresiduals(model4)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(8,1,0)
## Q* = 2.2708, df = 3, p-value = 0.5181
##
## Model df: 8. Total lags used: 11
acf(model4$residuals)

Plot QQ
boxresult<-LjungBoxTest(model4$residuals,k=2,StartLag=1)
plot(boxresult[,3],main= "Ljung-Box Q Test", ylab= "P-values",xlab= "Lag")

qqnorm(model4$residuals)
qqline(model4$residuals)

ARIMA Forecast
fit3<-arima(y_box, order=c(8,1,0))
forecast<-predict(fit3, n2)
autoplot(forecast(fit3, h=n3, level=c(99.5)))

head(forecast$pred)
## Time Series:
## Start = 884
## End = 889
## Frequency = 1
## [1] 3870.240 3879.599 3887.594 3889.705 3888.269 3886.459
Data Frame (Data Forecast)
data_forecast1 <- forecast$pred
data_forecast1 <- as.data.frame(data_forecast1)
head(data_forecast1)
## x
## 1 3870.240
## 2 3879.599
## 3 3887.594
## 4 3889.705
## 5 3888.269
## 6 3886.459
Data Frame (Data aktual=Data testing)
dataaktual <- as.data.frame(y_test)
head(dataaktual)
## y_test
## 1 3808.370
## 2 3745.473
## 3 3743.418
## 4 3843.299
## 5 3843.823
## 6 3951.893
Akurasi data forecast dan data testing
akurasi.arima1 <- accuracy(data_forecast1$x,dataaktual$`y_test`)
akurasi.arima1
## ME RMSE MAE MPE MAPE
## Test set -223.1506 441.4826 371.6276 -7.131258 10.37647
Hasil akurasi antara data training dan testing mencapai 10.37647.
Karena nilai MAPE berada di antara 10-20% , maka akurasi tersebut
diterima, atau kemampuan model peramalan baik.
Double Exponential Smoothing (Data LPGI)
Double Exponential Smoothing (data training and testing)
data1<-LPGI$Close[-884:-1262] #testing
data2<-LPGI$Close[-1:-883] #training
#readdata
data.ts<-ts(data1)
head(data.ts)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## [1] 5400 5500 5525 5600 5750 5650
#plotdata
plot(data.ts,main="Plot semua data")
points(data.ts)

lapfore2 = HoltWinters(data.ts, gamma=FALSE)
lapfore2
## Holt-Winters exponential smoothing with trend and without seasonal component.
##
## Call:
## HoltWinters(x = data.ts, gamma = FALSE)
##
## Smoothing parameters:
## alpha: 0.8365515
## beta : 0.03274018
## gamma: FALSE
##
## Coefficients:
## [,1]
## a 3850.746878
## b 3.429557
lapfore2$SSE
## [1] 10585935
plot(lapfore2)

Hasil smoothing parameters: alpha = 0.8365515 dan beta =
0.03274018
Data Frame (Forecast)
hasil_forecast.sess <- forecast(lapfore2, h = 379)
hasil_forecast.ses <- as.data.frame(hasil_forecast.sess)
head(hasil_forecast.ses)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 884 3854.176 3713.711 3994.642 3639.353 4069.000
## 885 3857.606 3671.979 4043.233 3573.714 4141.498
## 886 3861.036 3637.132 4084.939 3518.605 4203.466
## 887 3864.465 3606.035 4122.895 3469.231 4259.700
## 888 3867.895 3577.297 4158.493 3423.463 4312.326
## 889 3871.324 3550.161 4192.487 3380.148 4362.501
plot(hasil_forecast.sess)

Data Frame (Forecast)
data_forecast2 <- hasil_forecast.ses$`Point Forecast`
data_forecast2 <- as.data.frame(data_forecast2)
head(data_forecast2)
## data_forecast2
## 1 3854.176
## 2 3857.606
## 3 3861.036
## 4 3864.465
## 5 3867.895
## 6 3871.324
Data Frame (Data testing)
dataaktual <- as.data.frame(data2)
head(dataaktual)
## data2
## 1 3800
## 2 3750
## 3 3750
## 4 3850
## 5 3850
## 6 3950
Akurasi Data training dan testing
akurasi.ses <- accuracy(data_forecast2$data_forecast2,dataaktual$`data2`)
akurasi.ses
## ME RMSE MAE MPE MAPE
## Test set -840.0541 911.175 842.2392 -23.61308 23.66383
Hasil akurasi antara data training dan testing mencapai 23.66383.
Karena nilai MAPE berada di antara 20-50%, maka akurasi tersebut
diterima atau kemampuan model peramalan layak.
Berdasarkan hasil akurasi antara metode ARIMA dan Exponential,
dimana hasil MAPE di ARIMA adalah 10.37647% sedangkan double exponential
smoothing adalah 23.66383%. Maka metode yang tepat dan akurat untuk
dilakukan proses forecasting yang baik adalah menggunakan Metode
ARIMA.