Library

library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2   3.3.5     ✓ fma       2.4  
## ✓ forecast  8.16      ✓ expsmooth 2.3
## 
library(tseries)
library(forecast)
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:fma':
## 
##     pigs
library(quantmod)
## Loading required package: TTR
library(ggplot2)
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
##   method                 from    
##   autoplot.Arima         forecast
##   autoplot.acf           forecast
##   autoplot.ar            forecast
##   autoplot.bats          forecast
##   autoplot.decomposed.ts forecast
##   autoplot.ets           forecast
##   autoplot.forecast      forecast
##   autoplot.stl           forecast
##   autoplot.ts            forecast
##   fitted.ar              forecast
##   fortify.ts             forecast
##   residuals.ar           forecast
library(faraway)
## 
## Attaching package: 'faraway'
## The following object is masked from 'package:GGally':
## 
##     happy
## The following objects are masked from 'package:fma':
## 
##     airpass, eggs, ozone, wheat
library(MASS)
## 
## Attaching package: 'MASS'
## The following objects are masked from 'package:fma':
## 
##     cement, housing, petrol
library(FitAR)
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:faraway':
## 
##     melanoma
## Loading required package: leaps
## Loading required package: ltsa
## Loading required package: bestglm
## 
## Attaching package: 'FitAR'
## The following object is masked from 'package:forecast':
## 
##     BoxCox
library(MTS)
## 
## Attaching package: 'MTS'
## The following object is masked from 'package:TTR':
## 
##     VMA
library(AID)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:FitAR':
## 
##     Boot
## The following objects are masked from 'package:faraway':
## 
##     logit, vif
library(seastests)
library(tseries)
library(stats)
library(lmtest)

Import Data

library(readxl)
ABDA<-read_excel("~/Downloads/ABDA.xlsx")
head(ABDA)
## # A tibble: 6 × 7
##   Date                 Open  High   Low Price `Adj Close` Volume
##   <dttm>              <dbl> <dbl> <dbl> <dbl>       <dbl>  <dbl>
## 1 2017-01-02 00:00:00  6900  6900  6900  6900       6290.      0
## 2 2017-01-03 00:00:00  6900  6900  6900  6900       6290.      0
## 3 2017-01-04 00:00:00  6900  6900  6900  6900       6290.      0
## 4 2017-01-05 00:00:00  6900  6900  6900  6900       6290.      0
## 5 2017-01-06 00:00:00  6900  6900  6900  6900       6290.      0
## 6 2017-01-09 00:00:00  6900  6900  6900  6900       6290.      0

Cleaning Data

ABDA[is.na(ABDA)] <- 0
ABDA$Date <- as.Date(ABDA$Date, format = "%Y-%m-%d")
summary(ABDA)
##       Date                 Open           High           Low      
##  Min.   :2017-01-02   Min.   :3360   Min.   :3360   Min.   :3360  
##  1st Qu.:2018-03-27   1st Qu.:6500   1st Qu.:6575   1st Qu.:6500  
##  Median :2019-06-11   Median :6950   Median :6950   Median :6950  
##  Mean   :2019-06-22   Mean   :6717   Mean   :6725   Mean   :6712  
##  3rd Qu.:2020-09-16   3rd Qu.:7000   3rd Qu.:7000   3rd Qu.:7000  
##  Max.   :2021-12-30   Max.   :8025   Max.   :8150   Max.   :7625  
##      Price        Adj Close        Volume       
##  Min.   :3360   Min.   :3142   Min.   :    0.0  
##  1st Qu.:6512   1st Qu.:6198   1st Qu.:    0.0  
##  Median :6950   Median :6546   Median :    0.0  
##  Mean   :6718   Mean   :6395   Mean   :  348.6  
##  3rd Qu.:7000   3rd Qu.:6822   3rd Qu.:    0.0  
##  Max.   :7625   Max.   :7200   Max.   :31300.0
yt<-ABDA$Price

Plot data

plot.ts(yt)

fried(ts(yt, frequency=7))
## Test used:  Friedman rank 
##  
## Test statistic:  2.17 
## P-value:  0.9036314

Hasil 0.9036314>0.05 maka Data Harga Saham Closing ABDA tidak bersifat seasonal

ts_exchangerate<-ts(yt, frequency=7)
d_exchangerate<-decompose(ts_exchangerate,"additive")
plot(d_exchangerate)

Stationary test (yt)

acf(yt, lag.max=60)

adf.test(yt,alternative="stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yt
## Dickey-Fuller = -3.7388, Lag order = 10, p-value = 0.02205
## alternative hypothesis: stationary

Decomposed

library(forecast)
abdadp<-(ts_exchangerate-d_exchangerate$seasonal)
autoplot(abdadp)

Stationary test (data decomposed)

plot(abdadp)

acf(abdadp, lag.max=50)

pacf(abdadp, lag.max=50)

adf.test(abdadp,alternative="stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  abdadp
## Dickey-Fuller = -3.7402, Lag order = 10, p-value = 0.02198
## alternative hypothesis: stationary

Split Data Training dan Testing

n<-length(abdadp)
n1<-abs(0.70*n)
n2<-n-1
n3<-n1+1
train<-abdadp[1:n1]
test<-abdadp[n3:n]
data.ts<-ts(train)
head(data.ts)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 6902.136 6903.892 6909.251 6909.852 6894.074 6883.879
plot.ts(train , main="Train Data Set Time Series")

plot.ts(test , main="Test Data Set Time Series")

Stationary test (data train)

acf(train, lag.max=50)

pacf(train, lag.max=50)

archTest(train)
## Q(m) of squared series(LM test):  
## Test statistic:  7117.037  p-value:  0 
## Rank-based Test:  
## Test statistic:  6751.015  p-value:  0
adf.test(train)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train
## Dickey-Fuller = -3.423, Lag order = 9, p-value = 0.04971
## alternative hypothesis: stationary

Transform Data Train

#Lambda=1
#Do 1/y transformation
y_box<-(train)**1
plot(y_box)

Transform Data Testing

y_test<-(test)**1
plot(y_test)

Stationary Test (after transform)

archTest(y_box)
## Q(m) of squared series(LM test):  
## Test statistic:  7117.037  p-value:  0 
## Rank-based Test:  
## Test statistic:  6751.015  p-value:  0
adf.test(y_box)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y_box
## Dickey-Fuller = -3.423, Lag order = 9, p-value = 0.04971
## alternative hypothesis: stationary
acf(y_box, lag.max=50)

pacf(y_box, lag.max=50)

Hasil ADF test, p value=0,04971<0,05. Maka data sudah stasioner sehingga langsung mencari tentative model tanpa melakukan proses differencing.

Find the tentative model without differencing process

model1<-arima(y_box,order=c(1,0,0))
summary(model1)
## 
## Call:
## arima(x = y_box, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.9812  6742.3531
## s.e.  0.0063   239.4058
## 
## sigma^2 estimated as 20086:  log likelihood = -5628.86,  aic = 11263.72
## 
## Training set error measures:
##                      ME     RMSE      MAE         MPE      MAPE    MASE
## Training set -0.9776668 141.7264 36.45566 -0.07603895 0.6141616 1.15221
##                    ACF1
## Training set -0.0185862
coeftest(model1)
## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1       9.8115e-01 6.3042e-03 155.635 < 2.2e-16 ***
## intercept 6.7424e+03 2.3941e+02  28.163 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model2<-arima(y_box,order=c(2,0,0))
summary(model2)
## 
## Call:
## arima(x = y_box, order = c(2, 0, 0))
## 
## Coefficients:
##          ar1     ar2  intercept
##       0.9615  0.0200  6754.6711
## s.e.  0.0336  0.0336   244.0599
## 
## sigma^2 estimated as 20079:  log likelihood = -5628.69,  aic = 11265.38
## 
## Training set error measures:
##                     ME     RMSE     MAE         MPE     MAPE     MASE
## Training set -1.239562 141.6993 36.4478 -0.08008879 0.614814 1.151962
##                     ACF1
## Training set 0.002706406
coeftest(model2)
## 
## z test of coefficients:
## 
##             Estimate Std. Error z value Pr(>|z|)    
## ar1       9.6150e-01 3.3611e-02 28.6063   <2e-16 ***
## ar2       2.0028e-02 3.3644e-02  0.5953   0.5517    
## intercept 6.7547e+03 2.4406e+02 27.6763   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model3<-arima(y_box,order=c(3,0,0))
summary(model3)
## 
## Call:
## arima(x = y_box, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1     ar2      ar3  intercept
##       0.9632  0.1001  -0.0833  6754.3410
## s.e.  0.0335  0.0465   0.0335   226.1775
## 
## sigma^2 estimated as 19939:  log likelihood = -5625.63,  aic = 11261.26
## 
## Training set error measures:
##                     ME     RMSE    MAE         MPE      MAPE     MASE
## Training set -1.114381 141.2071 37.261 -0.07672498 0.6250777 1.177664
##                      ACF1
## Training set -0.007976677
coeftest(model3)
## 
## z test of coefficients:
## 
##              Estimate  Std. Error z value Pr(>|z|)    
## ar1          0.963151    0.033497 28.7531  < 2e-16 ***
## ar2          0.100116    0.046506  2.1528  0.03134 *  
## ar3         -0.083293    0.033519 -2.4849  0.01296 *  
## intercept 6754.340987  226.177458 29.8630  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model4<-arima(y_box,order=c(4,0,0))
summary(model4)
## 
## Call:
## arima(x = y_box, order = c(4, 0, 0))
## 
## Coefficients:
##          ar1     ar2     ar3      ar4  intercept
##       0.9542  0.1108  0.0211  -0.1083  6704.7625
## s.e.  0.0334  0.0464  0.0463   0.0334   206.2003
## 
## sigma^2 estimated as 19703:  log likelihood = -5620.39,  aic = 11252.79
## 
## Training set error measures:
##                     ME     RMSE     MAE         MPE      MAPE     MASE
## Training set 0.1351413 140.3687 39.1235 -0.05498536 0.6487696 1.236529
##                       ACF1
## Training set -0.0006651154
coeftest(model4)
## 
## z test of coefficients:
## 
##              Estimate  Std. Error z value  Pr(>|z|)    
## ar1          0.954195    0.033412 28.5586 < 2.2e-16 ***
## ar2          0.110839    0.046357  2.3910  0.016803 *  
## ar3          0.021099    0.046327  0.4554  0.648791    
## ar4         -0.108256    0.033419 -3.2394  0.001198 ** 
## intercept 6704.762457  206.200267 32.5158 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Berdasarkan dari 4 tentative model, hasil AIC terkecil adalah 11252.79. Maka model ARIMA yang dipilih adalah ARIMA(4,0,0)

Check Residual

checkresiduals(model1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,0) with non-zero mean
## Q* = 21.86, df = 8, p-value = 0.005182
## 
## Model df: 2.   Total lags used: 10
checkresiduals(model2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,0) with non-zero mean
## Q* = 22.432, df = 7, p-value = 0.00214
## 
## Model df: 3.   Total lags used: 10
checkresiduals(model3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,0) with non-zero mean
## Q* = 14.739, df = 6, p-value = 0.02239
## 
## Model df: 4.   Total lags used: 10
checkresiduals(model4)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,0) with non-zero mean
## Q* = 4.0091, df = 5, p-value = 0.5481
## 
## Model df: 5.   Total lags used: 10
acf(model4$residuals)

Residual signifikan adalah di model 4 yaitu sebesar 0.5481. Apabila p-value>0,05 maka residualnya memenuhi asumsi white noise. Kesimpulannya tentative model 4 diterima.

Plot QQ

boxresult<-LjungBoxTest(model4$residuals,k=2,StartLag=1)
plot(boxresult[,3],main= "Ljung-Box Q Test", ylab= "P-values",xlab= "Lag")

qqnorm(model4$residuals)
qqline(model4$residuals)

ARIMA Forecast

fit3<-arima(y_box, order=c(4,0,0))
forecast<-predict(fit3, n2)
autoplot(forecast(fit3, h=n3, level=c(99.5)))

head(forecast$pred)
## Time Series:
## Start = 884 
## End = 889 
## Frequency = 1 
## [1] 5773.120 5795.100 5817.098 5840.402 5863.269 5885.756

Data Frame (Data Forecast)

data_forecast1 <- forecast$pred
data_forecast1 <- as.data.frame(data_forecast1)
head(data_forecast1)
##          x
## 1 5773.120
## 2 5795.100
## 3 5817.098
## 4 5840.402
## 5 5863.269
## 6 5885.756

Data Frame (Data aktual=Data testing)

dataaktual <- as.data.frame(y_test)
head(dataaktual)
##     y_test
## 1 5753.892
## 2 5759.251
## 3 5759.852
## 4 5744.074
## 5 5983.879
## 6 5746.917

Akurasi data forecast dan data testing

akurasi.arima1 <- accuracy(data_forecast1$x,dataaktual$`y_test`)
akurasi.arima1
##                ME     RMSE      MAE        MPE     MAPE
## Test set 17.47626 449.2002 366.2928 -0.1977255 5.722292

Hasil akurasi antara data training dan testing mencapai 5.722292. Karena nilai MAPE<10% , maka akurasi tersebut diterima, atau kemampuan model peramalan sangat baik.

Double Exponential Smoothing (Data ABDA)

Double Exponential Smoothing (data training and testing)

data1<-ABDA$Price[-884:-1262] #testing
data2<-ABDA$Price[-1:-883] #training

#readdata
data.ts<-ts(data1)
head(data.ts)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 6900 6900 6900 6900 6900 6900
#plotdata
plot(data.ts,main="Plot semua data")
points(data.ts)

lapfore2 = HoltWinters(data.ts, gamma=FALSE)
lapfore2
## Holt-Winters exponential smoothing with trend and without seasonal component.
## 
## Call:
## HoltWinters(x = data.ts, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.9757213
##  beta : 0
##  gamma: FALSE
## 
## Coefficients:
##   [,1]
## a 5750
## b    0
lapfore2$SSE
## [1] 18001425
plot(lapfore2)

Hasil smoothing parameters: alpha = 0.9757213 dan beta = 0

Data Frame (Forecast)

hasil_forecast.sess <- forecast(lapfore2, h = 379)
hasil_forecast.ses <- as.data.frame(hasil_forecast.sess)
head(hasil_forecast.ses)
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
##  884           5750 5566.714 5933.286 5469.688 6030.312
##  885           5750 5493.922 6006.078 5358.362 6141.638
##  886           5750 5437.657 6062.343 5272.312 6227.688
##  887           5750 5390.082 6109.918 5199.554 6300.446
##  888           5750 5348.101 6151.899 5135.348 6364.652
##  889           5750 5310.108 6189.892 5077.243 6422.757
plot(hasil_forecast.sess)

Data Frame (Forecast)

data_forecast2 <- hasil_forecast.ses$`Point Forecast`
data_forecast2 <- as.data.frame(data_forecast2)
head(data_forecast2)
##   data_forecast2
## 1           5750
## 2           5750
## 3           5750
## 4           5750
## 5           5750
## 6           5750

Data Frame (Data testing)

dataaktual <- as.data.frame(data2)
head(dataaktual)
##   data2
## 1  5750
## 2  5750
## 3  5750
## 4  5750
## 5  6000
## 6  5750

Akurasi Data training dan testing

akurasi.ses <- accuracy(data_forecast2$data_forecast2,dataaktual$`data2`)
akurasi.ses
##               ME     RMSE      MAE      MPE    MAPE
## Test set 878.628 991.1165 892.6121 12.80737 13.0583

Hasil akurasi antara data training dan testing mencapai 13.0583. Karena nilai MAPE berada di antara 10-20%, maka akurasi tersebut diterima, atau kemampuan model peramalan baik.

Berdasarkan hasil akurasi antara metode ARIMA dan Exponential, dimana hasil MAPE di ARIMA adalah 5.722292% sedangkan double exponential smoothing adalah 13.0583%. Maka metode yang tepat dan akurat untuk dilakukan proses forecasting yang baik adalah menggunakan Metode ARIMA.