Library

library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2   3.3.5     ✓ fma       2.4  
## ✓ forecast  8.16      ✓ expsmooth 2.3
## 
library(tseries)
library(forecast)
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:fma':
## 
##     pigs
library(quantmod)
## Loading required package: TTR
library(ggplot2)
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
##   method                 from    
##   autoplot.Arima         forecast
##   autoplot.acf           forecast
##   autoplot.ar            forecast
##   autoplot.bats          forecast
##   autoplot.decomposed.ts forecast
##   autoplot.ets           forecast
##   autoplot.forecast      forecast
##   autoplot.stl           forecast
##   autoplot.ts            forecast
##   fitted.ar              forecast
##   fortify.ts             forecast
##   residuals.ar           forecast
library(faraway)
## 
## Attaching package: 'faraway'
## The following object is masked from 'package:GGally':
## 
##     happy
## The following objects are masked from 'package:fma':
## 
##     airpass, eggs, ozone, wheat
library(MASS)
## 
## Attaching package: 'MASS'
## The following objects are masked from 'package:fma':
## 
##     cement, housing, petrol
library(FitAR)
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:faraway':
## 
##     melanoma
## Loading required package: leaps
## Loading required package: ltsa
## Loading required package: bestglm
## 
## Attaching package: 'FitAR'
## The following object is masked from 'package:forecast':
## 
##     BoxCox
library(MTS)
## 
## Attaching package: 'MTS'
## The following object is masked from 'package:TTR':
## 
##     VMA
library(AID)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:FitAR':
## 
##     Boot
## The following objects are masked from 'package:faraway':
## 
##     logit, vif
library(seastests)
library(tseries)
library(stats)
library(lmtest)

Import Data

library(readxl)
LPGI<-read_excel("~/Downloads/LPGI.xlsx")
head(LPGI)
## # A tibble: 6 × 7
##   Date                 Open  High   Low Close `Adj Close` Volume
##   <dttm>              <dbl> <dbl> <dbl> <dbl>       <dbl>  <dbl>
## 1 2017-01-02 00:00:00  5400  5400  5400  5400       3887.      0
## 2 2017-01-03 00:00:00  5400  5500  5400  5500       3959.  65100
## 3 2017-01-04 00:00:00  5500  5600  5400  5525       3977.  25700
## 4 2017-01-05 00:00:00  5525  5600  5500  5600       4031.  53100
## 5 2017-01-06 00:00:00  5600  5750  5575  5750       4139.   2500
## 6 2017-01-09 00:00:00  5750  5750  5475  5650       4067.  43400

Cleaning Data

LPGI[is.na(LPGI)] <- 0
LPGI$Date <- as.Date(LPGI$Date, format = "%Y-%m-%d")
summary(LPGI)
##       Date                 Open           High           Low      
##  Min.   :2017-01-02   Min.   :2800   Min.   :2900   Min.   :2700  
##  1st Qu.:2018-03-27   1st Qu.:3502   1st Qu.:3510   1st Qu.:3500  
##  Median :2019-06-11   Median :3860   Median :3890   Median :3800  
##  Mean   :2019-06-22   Mean   :4087   Mean   :4111   Mean   :4065  
##  3rd Qu.:2020-09-16   3rd Qu.:4450   3rd Qu.:4500   3rd Qu.:4448  
##  Max.   :2021-12-30   Max.   :5950   Max.   :5950   Max.   :5950  
##      Close        Adj Close        Volume       
##  Min.   :2900   Min.   :2291   Min.   :      0  
##  1st Qu.:3510   1st Qu.:3078   1st Qu.:      0  
##  Median :3860   Median :3342   Median :    100  
##  Mean   :4093   Mean   :3401   Mean   :   8321  
##  3rd Qu.:4500   3rd Qu.:3671   3rd Qu.:   3375  
##  Max.   :5950   Max.   :5175   Max.   :1000000
yt<-LPGI$Close

Plot data

plot.ts(yt)

fried(ts(yt, frequency=7))
## Test used:  Friedman rank 
##  
## Test statistic:  2.34 
## P-value:  0.8854967

Hasil 0.8854967>0.05 maka Data Harga Saham Closing LPGI tidak bersifat seasonal

ts_exchangerate<-ts(yt, frequency=7)
d_exchangerate<-decompose(ts_exchangerate,"additive")
plot(d_exchangerate)

Stationary test (yt)

acf(yt, lag.max=60)

adf.test(yt,alternative="stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yt
## Dickey-Fuller = -1.1737, Lag order = 10, p-value = 0.9107
## alternative hypothesis: stationary

Decomposed

library(forecast)
lpgidp<-(ts_exchangerate-d_exchangerate$seasonal)
autoplot(lpgidp)

Stationary test (data decomposed)

plot(lpgidp)

acf(lpgidp, lag.max=50)

pacf(lpgidp, lag.max=50)

adf.test(lpgidp,alternative="stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  lpgidp
## Dickey-Fuller = -1.1574, Lag order = 10, p-value = 0.9133
## alternative hypothesis: stationary

Split Data Training dan Testing

n<-length(lpgidp)
n1<-abs(0.70*n)
n2<-n-1
n3<-n1+1
train<-lpgidp[1:n1]
test<-lpgidp[n3:n]
data.ts<-ts(train)
head(data.ts)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 5413.725 5508.370 5520.473 5593.418 5743.299 5643.823
plot.ts(train , main="Train Data Set Time Series")

plot.ts(test , main="Test Data Set Time Series")

Stationary test (data train)

acf(train, lag.max=50)

pacf(train, lag.max=50)

archTest(train)
## Q(m) of squared series(LM test):  
## Test statistic:  8260.081  p-value:  0 
## Rank-based Test:  
## Test statistic:  7600.462  p-value:  0
adf.test(train)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train
## Dickey-Fuller = -2.1236, Lag order = 9, p-value = 0.526
## alternative hypothesis: stationary

Transform Data Train

BoxCox(train)

#Lambda=1
#Do 1/y transformation
y_box<-(train)**1
plot(y_box)

Transform Data Testing

y_test<-(test)**1
plot(y_test)

Stationary Test (after transform)

archTest(y_box)
## Q(m) of squared series(LM test):  
## Test statistic:  8260.081  p-value:  0 
## Rank-based Test:  
## Test statistic:  7600.462  p-value:  0
adf.test(y_box)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y_box
## Dickey-Fuller = -2.1236, Lag order = 9, p-value = 0.526
## alternative hypothesis: stationary
acf(y_box, lag.max=50)

pacf(y_box, lag.max=50)

Hasil ADF test, p value=0,526>0,05. Maka data belum stasioner sehingga harus melakukan proses differencing.

Differencing

diffy_box<-diff(y_box, differences=1)
plot.ts(diffy_box)

acf(diffy_box, lag.max=50)

pacf(diffy_box, lag.max=50)

Stationary Test (after differencing)

archTest(diffy_box)
## Q(m) of squared series(LM test):  
## Test statistic:  154.5242  p-value:  0 
## Rank-based Test:  
## Test statistic:  335.869  p-value:  0
adf.test(diffy_box)
## Warning in adf.test(diffy_box): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diffy_box
## Dickey-Fuller = -11.491, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary

Hasil ADF test, p value=0,01<0,05. Maka data sudah stasioner sehingga dapat melakukan proses pemilihan tentative model.

Find the tentative model with differencing process

model1<-arima(y_box,order=c(1,1,0))
summary(model1)
## 
## Call:
## arima(x = y_box, order = c(1, 1, 0))
## 
## Coefficients:
##           ar1
##       -0.1748
## s.e.   0.0332
## 
## sigma^2 estimated as 11423:  log likelihood = -5371.95,  aic = 10747.9
## 
## Training set error measures:
##                     ME     RMSE      MAE         MPE      MAPE     MASE
## Training set -2.060063 106.8179 42.15738 -0.08247877 0.9939015 1.047724
##                     ACF1
## Training set -0.01086235
coeftest(model1)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1 -0.174790   0.033153 -5.2722 1.348e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model2<-arima(y_box,order=c(2,1,0))
summary(model2)
## 
## Call:
## arima(x = y_box, order = c(2, 1, 0))
## 
## Coefficients:
##           ar1      ar2
##       -0.1853  -0.0599
## s.e.   0.0336   0.0336
## 
## sigma^2 estimated as 11382:  log likelihood = -5370.36,  aic = 10746.72
## 
## Training set error measures:
##                     ME     RMSE      MAE         MPE     MAPE     MASE
## Training set -2.186534 106.6252 42.58087 -0.08710808 1.005876 1.058249
##                     ACF1
## Training set 0.004999031
coeftest(model2)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1 -0.185270   0.033615 -5.5115 3.558e-08 ***
## ar2 -0.059938   0.033591 -1.7844   0.07437 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model3<-arima(y_box,order=c(3,1,0))
summary(model3)
## 
## Call:
## arima(x = y_box, order = c(3, 1, 0))
## 
## Coefficients:
##           ar1      ar2     ar3
##       -0.1798  -0.0431  0.0905
## s.e.   0.0335   0.0340  0.0335
## 
## sigma^2 estimated as 11288:  log likelihood = -5366.72,  aic = 10741.45
## 
## Training set error measures:
##                     ME     RMSE      MAE         MPE     MAPE    MASE
## Training set -1.984303 106.1849 43.37521 -0.07925854 1.025416 1.07799
##                     ACF1
## Training set 0.003838781
coeftest(model3)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1 -0.179768   0.033538 -5.3602 8.312e-08 ***
## ar2 -0.043097   0.034027 -1.2665   0.20532    
## ar3  0.090550   0.033499  2.7031   0.00687 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model4<-arima(y_box,order=c(8,1,0))
summary(model4)
## 
## Call:
## arima(x = y_box, order = c(8, 1, 0))
## 
## Coefficients:
##           ar1      ar2     ar3      ar4      ar5      ar6      ar7      ar8
##       -0.1881  -0.0509  0.0795  -0.0528  -0.0678  -0.0840  -0.0834  -0.0098
## s.e.   0.0337   0.0341  0.0341   0.0341   0.0341   0.0341   0.0341   0.0336
## 
## sigma^2 estimated as 11102:  log likelihood = -5359.46,  aic = 10736.92
## 
## Training set error measures:
##                     ME    RMSE      MAE         MPE     MAPE     MASE
## Training set -2.536977 105.308 45.32421 -0.09799613 1.076866 1.126428
##                       ACF1
## Training set -0.0009370291
coeftest(model4)
## 
## z test of coefficients:
## 
##       Estimate Std. Error z value  Pr(>|z|)    
## ar1 -0.1881275  0.0336746 -5.5866 2.315e-08 ***
## ar2 -0.0508723  0.0341330 -1.4904   0.13612    
## ar3  0.0794937  0.0340601  2.3339   0.01960 *  
## ar4 -0.0528374  0.0341047 -1.5493   0.12132    
## ar5 -0.0678337  0.0340987 -1.9893   0.04666 *  
## ar6 -0.0839572  0.0340560 -2.4653   0.01369 *  
## ar7 -0.0834173  0.0341229 -2.4446   0.01450 *  
## ar8 -0.0098127  0.0336464 -0.2916   0.77056    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Berdasarkan dari 4 tentative model, hasil AIC terkecil adalah 10736.92. Maka model ARIMA yang dipilih adalah ARIMA(8,1,0).

Check Residual

checkresiduals(model1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)
## Q* = 26.524, df = 9, p-value = 0.001676
## 
## Model df: 1.   Total lags used: 10
checkresiduals(model2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,0)
## Q* = 25.089, df = 8, p-value = 0.001502
## 
## Model df: 2.   Total lags used: 10
checkresiduals(model3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,1,0)
## Q* = 15.281, df = 7, p-value = 0.03256
## 
## Model df: 3.   Total lags used: 10
checkresiduals(model4)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(8,1,0)
## Q* = 2.2708, df = 3, p-value = 0.5181
## 
## Model df: 8.   Total lags used: 11
acf(model4$residuals)

Residual signifikan adalah di model 4 yaitu sebesar 0.5181. Apabila p-value>0,05 maka residualnya memenuhi asumsi white noise. Kesimpulannya tentative model 4 diterima.

Plot QQ

boxresult<-LjungBoxTest(model4$residuals,k=2,StartLag=1)
plot(boxresult[,3],main= "Ljung-Box Q Test", ylab= "P-values",xlab= "Lag")

qqnorm(model4$residuals)
qqline(model4$residuals)

ARIMA Forecast

fit3<-arima(y_box, order=c(8,1,0))
forecast<-predict(fit3, n2)
autoplot(forecast(fit3, h=n3, level=c(99.5)))

head(forecast$pred)
## Time Series:
## Start = 884 
## End = 889 
## Frequency = 1 
## [1] 3870.240 3879.599 3887.594 3889.705 3888.269 3886.459

Data Frame (Data Forecast)

data_forecast1 <- forecast$pred
data_forecast1 <- as.data.frame(data_forecast1)
head(data_forecast1)
##          x
## 1 3870.240
## 2 3879.599
## 3 3887.594
## 4 3889.705
## 5 3888.269
## 6 3886.459

Data Frame (Data aktual=Data testing)

dataaktual <- as.data.frame(y_test)
head(dataaktual)
##     y_test
## 1 3808.370
## 2 3745.473
## 3 3743.418
## 4 3843.299
## 5 3843.823
## 6 3951.893

Akurasi data forecast dan data testing

akurasi.arima1 <- accuracy(data_forecast1$x,dataaktual$`y_test`)
akurasi.arima1
##                 ME     RMSE      MAE       MPE     MAPE
## Test set -223.1506 441.4826 371.6276 -7.131258 10.37647

Hasil akurasi antara data training dan testing mencapai 10.37647. Karena nilai MAPE berada di antara 10-20% , maka akurasi tersebut diterima, atau kemampuan model peramalan baik.

Double Exponential Smoothing (Data LPGI)

Double Exponential Smoothing (data training and testing)

data1<-LPGI$Close[-884:-1262] #testing
data2<-LPGI$Close[-1:-883] #training

#readdata
data.ts<-ts(data1)
head(data.ts)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 5400 5500 5525 5600 5750 5650
#plotdata
plot(data.ts,main="Plot semua data")
points(data.ts)

lapfore2 = HoltWinters(data.ts, gamma=FALSE)
lapfore2
## Holt-Winters exponential smoothing with trend and without seasonal component.
## 
## Call:
## HoltWinters(x = data.ts, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.8365515
##  beta : 0.03274018
##  gamma: FALSE
## 
## Coefficients:
##          [,1]
## a 3850.746878
## b    3.429557
lapfore2$SSE
## [1] 10585935
plot(lapfore2)

Hasil smoothing parameters: alpha = 0.8365515 dan beta = 0.03274018

Data Frame (Forecast)

hasil_forecast.sess <- forecast(lapfore2, h = 379)
hasil_forecast.ses <- as.data.frame(hasil_forecast.sess)
head(hasil_forecast.ses)
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
##  884       3854.176 3713.711 3994.642 3639.353 4069.000
##  885       3857.606 3671.979 4043.233 3573.714 4141.498
##  886       3861.036 3637.132 4084.939 3518.605 4203.466
##  887       3864.465 3606.035 4122.895 3469.231 4259.700
##  888       3867.895 3577.297 4158.493 3423.463 4312.326
##  889       3871.324 3550.161 4192.487 3380.148 4362.501
plot(hasil_forecast.sess)

Data Frame (Forecast)

data_forecast2 <- hasil_forecast.ses$`Point Forecast`
data_forecast2 <- as.data.frame(data_forecast2)
head(data_forecast2)
##   data_forecast2
## 1       3854.176
## 2       3857.606
## 3       3861.036
## 4       3864.465
## 5       3867.895
## 6       3871.324

Data Frame (Data testing)

dataaktual <- as.data.frame(data2)
head(dataaktual)
##   data2
## 1  3800
## 2  3750
## 3  3750
## 4  3850
## 5  3850
## 6  3950

Akurasi Data training dan testing

akurasi.ses <- accuracy(data_forecast2$data_forecast2,dataaktual$`data2`)
akurasi.ses
##                 ME    RMSE      MAE       MPE     MAPE
## Test set -840.0541 911.175 842.2392 -23.61308 23.66383

Hasil akurasi antara data training dan testing mencapai 23.66383. Karena nilai MAPE berada di antara 20-50%, maka akurasi tersebut diterima atau kemampuan model peramalan layak.

Berdasarkan hasil akurasi antara metode ARIMA dan Exponential, dimana hasil MAPE di ARIMA adalah 10.37647% sedangkan double exponential smoothing adalah 23.66383%. Maka metode yang tepat dan akurat untuk dilakukan proses forecasting yang baik adalah menggunakan Metode ARIMA.