Library

library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2   3.3.5     ✓ fma       2.4  
## ✓ forecast  8.16      ✓ expsmooth 2.3
## 
library(tseries)
library(forecast)
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:fma':
## 
##     pigs
library(quantmod)
## Loading required package: TTR
library(ggplot2)
library(ggfortify)
## Registered S3 methods overwritten by 'ggfortify':
##   method                 from    
##   autoplot.Arima         forecast
##   autoplot.acf           forecast
##   autoplot.ar            forecast
##   autoplot.bats          forecast
##   autoplot.decomposed.ts forecast
##   autoplot.ets           forecast
##   autoplot.forecast      forecast
##   autoplot.stl           forecast
##   autoplot.ts            forecast
##   fitted.ar              forecast
##   fortify.ts             forecast
##   residuals.ar           forecast
library(faraway)
## 
## Attaching package: 'faraway'
## The following object is masked from 'package:GGally':
## 
##     happy
## The following objects are masked from 'package:fma':
## 
##     airpass, eggs, ozone, wheat
library(MASS)
## 
## Attaching package: 'MASS'
## The following objects are masked from 'package:fma':
## 
##     cement, housing, petrol
library(FitAR)
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:faraway':
## 
##     melanoma
## Loading required package: leaps
## Loading required package: ltsa
## Loading required package: bestglm
## 
## Attaching package: 'FitAR'
## The following object is masked from 'package:forecast':
## 
##     BoxCox
library(MTS)
## 
## Attaching package: 'MTS'
## The following object is masked from 'package:TTR':
## 
##     VMA
library(AID)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:FitAR':
## 
##     Boot
## The following objects are masked from 'package:faraway':
## 
##     logit, vif
library(seastests)
library(tseries)
library(stats)
library(lmtest)

Import Data

library(readxl)
AMAG<-read_excel("~/Downloads/AMAGG.xlsx")
head(AMAG)
## # A tibble: 6 × 7
##   Date                 Open  High   Low Close `Adj Close`  Volume
##   <dttm>              <dbl> <dbl> <dbl> <dbl>       <dbl>   <dbl>
## 1 2017-01-02 00:00:00   374   374   374   374        275.       0
## 2 2017-01-03 00:00:00   370   380   370   380        280.  319200
## 3 2017-01-04 00:00:00   380   380   370   370        272.   55200
## 4 2017-01-05 00:00:00   370   372   370   372        274.  292300
## 5 2017-01-06 00:00:00   372   372   372   372        274.    1000
## 6 2017-01-09 00:00:00   372   404   372   390        287. 1246900

Cleaning Data

AMAG[is.na(AMAG)] <- 0
AMAG$Date <- as.Date(AMAG$Date, format = "%Y-%m-%d")
summary(AMAG)
##       Date                 Open            High            Low       
##  Min.   :2017-01-02   Min.   :189.0   Min.   :193.0   Min.   :186.0  
##  1st Qu.:2018-03-27   1st Qu.:288.0   1st Qu.:296.0   1st Qu.:280.0  
##  Median :2019-06-11   Median :312.0   Median :316.0   Median :310.0  
##  Mean   :2019-06-22   Mean   :323.2   Mean   :327.4   Mean   :319.7  
##  3rd Qu.:2020-09-16   3rd Qu.:360.0   3rd Qu.:364.0   3rd Qu.:360.0  
##  Max.   :2021-12-30   Max.   :492.0   Max.   :498.0   Max.   :490.0  
##      Close         Adj Close         Volume        
##  Min.   :189.0   Min.   :162.5   Min.   :       0  
##  1st Qu.:292.0   1st Qu.:227.6   1st Qu.:       0  
##  Median :312.0   Median :250.6   Median :    1200  
##  Mean   :324.1   Mean   :257.6   Mean   :  224725  
##  3rd Qu.:360.0   3rd Qu.:285.4   3rd Qu.:   82425  
##  Max.   :492.0   Max.   :362.2   Max.   :21049300
yt<-AMAG$Close

Plot data

plot.ts(yt)

fried(ts(yt, frequency=7))
## Test used:  Friedman rank 
##  
## Test statistic:  4.09 
## P-value:  0.6639508

Hasil 0.6639508>0.05 maka Data Harga Saham Closing AMAG tidak bersifat seasonal

ts_exchangerate<-ts(yt, frequency=7)
d_exchangerate<-decompose(ts_exchangerate,"additive")
plot(d_exchangerate)

Stationary test (yt)

acf(yt, lag.max=60)

adf.test(yt,alternative="stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yt
## Dickey-Fuller = -1.4873, Lag order = 10, p-value = 0.7953
## alternative hypothesis: stationary

Decomposed

library(forecast)
amagdp<-(ts_exchangerate-d_exchangerate$seasonal)
autoplot(amagdp)

Stationary test (data decomposed)

plot(amagdp)

acf(amagdp, lag.max=50)

pacf(amagdp, lag.max=50)

adf.test(amagdp,alternative="stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  amagdp
## Dickey-Fuller = -1.4861, Lag order = 10, p-value = 0.7959
## alternative hypothesis: stationary

Split Data Training dan Testing

n<-length(amagdp)
n1<-abs(0.70*n)
n2<-n-1
n3<-n1+1
train<-amagdp[1:n1]
test<-amagdp[n3:n]
data.ts<-ts(train)
head(data.ts)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 374.1791 380.1520 369.6635 371.5559 372.4424 389.6908
plot.ts(train , main="Train Data Set Time Series")

plot.ts(test , main="Test Data Set Time Series")

Stationary test (data train)

acf(train, lag.max=50)

pacf(train, lag.max=50)

archTest(train)
## Q(m) of squared series(LM test):  
## Test statistic:  7987.24  p-value:  0 
## Rank-based Test:  
## Test statistic:  7741.068  p-value:  0
adf.test(train)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train
## Dickey-Fuller = -3.7201, Lag order = 9, p-value = 0.02299
## alternative hypothesis: stationary

Transform Data Train

BoxCox(train)

BoxCox(test)

#Lambda=1
#Do 1/y transformation
y_box<-(train)**0.07
plot(y_box)

Transform Data Testing

y_test<-(test)**0.07
plot(y_test)

Stationary Test (after transform)

archTest(y_box)
## Q(m) of squared series(LM test):  
## Test statistic:  7955.34  p-value:  0 
## Rank-based Test:  
## Test statistic:  7741.068  p-value:  0
adf.test(y_box)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y_box
## Dickey-Fuller = -3.8973, Lag order = 9, p-value = 0.01413
## alternative hypothesis: stationary
acf(y_box, lag.max=50)

pacf(y_box, lag.max=50)

Hasil ADF test, p value=0.01413<0,05. Maka data sudah stasioner sehingga tidak harus melakukan proses differencing.

Find the tentative model

model1<-arima(y_box,order=c(1,0,0))
summary(model1)
## 
## Call:
## arima(x = y_box, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.9745     1.5045
## s.e.  0.0074     0.0045
## 
## sigma^2 estimated as 1.273e-05:  log likelihood = 3722.01,  aic = -7438.02
## 
## Training set error measures:
##                         ME        RMSE         MAE          MPE      MAPE
## Training set -1.774805e-05 0.003567767 0.001683706 -0.001749801 0.1119269
##                  MASE       ACF1
## Training set 1.082997 -0.2444567
coeftest(model1)
## 
## z test of coefficients:
## 
##            Estimate Std. Error z value  Pr(>|z|)    
## ar1       0.9745190  0.0074005  131.68 < 2.2e-16 ***
## intercept 1.5044871  0.0045231  332.62 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model2<-arima(y_box,order=c(2,0,0))
summary(model2)
## 
## Call:
## arima(x = y_box, order = c(2, 0, 0))
## 
## Coefficients:
##          ar1     ar2  intercept
##       0.7286  0.2522     1.5050
## s.e.  0.0325  0.0326     0.0057
## 
## sigma^2 estimated as 1.192e-05:  log likelihood = 3750.97,  aic = -7493.94
## 
## Training set error measures:
##                         ME       RMSE         MAE          MPE      MAPE
## Training set -3.861826e-05 0.00345239 0.001682807 -0.003103355 0.1118584
##                  MASE       ACF1
## Training set 1.082419 -0.0766121
coeftest(model2)
## 
## z test of coefficients:
## 
##            Estimate Std. Error  z value  Pr(>|z|)    
## ar1       0.7286489  0.0325450  22.3890 < 2.2e-16 ***
## ar2       0.2522431  0.0325741   7.7437 9.659e-15 ***
## intercept 1.5049847  0.0056845 264.7500 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model3<-arima(y_box,order=c(3,0,0))
summary(model3)
## 
## Call:
## arima(x = y_box, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1     ar2     ar3  intercept
##       0.6517  0.0273  0.3079     1.5038
## s.e.  0.0320  0.0389  0.0321     0.0074
## 
## sigma^2 estimated as 1.079e-05:  log likelihood = 3794.79,  aic = -7579.57
## 
## Training set error measures:
##                         ME        RMSE        MAE          MPE      MAPE
## Training set -3.780737e-05 0.003284694 0.00169347 -0.003003262 0.1125605
##                  MASE        ACF1
## Training set 1.089278 -0.02662153
coeftest(model3)
## 
## z test of coefficients:
## 
##            Estimate Std. Error  z value Pr(>|z|)    
## ar1       0.6517167  0.0319901  20.3724   <2e-16 ***
## ar2       0.0272557  0.0388557   0.7015    0.483    
## ar3       0.3078787  0.0320553   9.6046   <2e-16 ***
## intercept 1.5038475  0.0074389 202.1602   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model4<-arima(y_box,order=c(5,0,0))
summary(model4)
## 
## Call:
## arima(x = y_box, order = c(5, 0, 0))
## 
## Coefficients:
##          ar1     ar2     ar3     ar4     ar5  intercept
##       0.6158  0.0010  0.2480  0.0277  0.0971     1.5005
## s.e.  0.0335  0.0394  0.0386  0.0395  0.0336     0.0096
## 
## sigma^2 estimated as 1.06e-05:  log likelihood = 3802.34,  aic = -7590.68
## 
## Training set error measures:
##                       ME        RMSE         MAE          MPE      MAPE
## Training set -1.5503e-05 0.003256492 0.001709605 -0.001511542 0.1136221
##                  MASE         ACF1
## Training set 1.099656 -0.004013348
coeftest(model4)
## 
## z test of coefficients:
## 
##             Estimate Std. Error  z value  Pr(>|z|)    
## ar1       0.61580154 0.03349409  18.3854 < 2.2e-16 ***
## ar2       0.00095708 0.03944203   0.0243  0.980641    
## ar3       0.24799207 0.03862713   6.4202 1.361e-10 ***
## ar4       0.02768404 0.03954250   0.7001  0.483860    
## ar5       0.09713005 0.03356078   2.8942  0.003802 ** 
## intercept 1.50048885 0.00958243 156.5875 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Berdasarkan dari 4 tentative model, hasil AIC terkecil adalah -7590.68. Maka model ARIMA yang dipilih adalah ARIMA(5,0,0).

Check Residual

checkresiduals(model1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,0) with non-zero mean
## Q* = 100.49, df = 8, p-value < 2.2e-16
## 
## Model df: 2.   Total lags used: 10
checkresiduals(model2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,0) with non-zero mean
## Q* = 85.161, df = 7, p-value = 1.221e-15
## 
## Model df: 3.   Total lags used: 10
checkresiduals(model3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,0) with non-zero mean
## Q* = 16.727, df = 6, p-value = 0.01034
## 
## Model df: 4.   Total lags used: 10
checkresiduals(model4)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(5,0,0) with non-zero mean
## Q* = 5.4842, df = 4, p-value = 0.2411
## 
## Model df: 6.   Total lags used: 10
acf(model4$residuals)

Residual signifikan adalah di model 4 yaitu sebesar 0.2411. Apabila p-value>0,05 maka residualnya memenuhi asumsi white noise. Kesimpulannya tentative model 4 diterima.

Plot QQ

boxresult<-LjungBoxTest(model4$residuals,k=2,StartLag=1)
plot(boxresult[,3],main= "Ljung-Box Q Test", ylab= "P-values",xlab= "Lag")

qqnorm(model4$residuals)
qqline(model4$residuals)

ARIMA Forecast

fit3<-arima(y_box, order=c(5,0,0))
forecast<-predict(fit3, n2)
autoplot(forecast(fit3, h=n3, level=c(99.5)))

head(forecast$pred)
## Time Series:
## Start = 884 
## End = 889 
## Frequency = 1 
## [1] 1.483625 1.485009 1.484515 1.484708 1.484811 1.484791

Data Frame (Data Forecast)

data_forecast1 <- forecast$pred
data_forecast1 <- as.data.frame(data_forecast1)
head(data_forecast1)
##          x
## 1 1.483625
## 2 1.485009
## 3 1.484515
## 4 1.484708
## 5 1.484811
## 6 1.484791

Data Frame (Data aktual=Data testing)

dataaktual <- as.data.frame(y_test)
head(dataaktual)
##     y_test
## 1 1.481356
## 2 1.479645
## 3 1.483381
## 4 1.479175
## 5 1.478109
## 6 1.475199

Akurasi data forecast dan data testing

akurasi.arima1 <- accuracy(data_forecast1$x,dataaktual$`y_test`)
akurasi.arima1
##                   ME       RMSE        MAE       MPE     MAPE
## Test set -0.01576744 0.02337396 0.01844858 -1.082461 1.260801

Hasil akurasi antara data training dan testing mencapai 1.260801. Karena nilai MAPE berada di antara < 10% , maka akurasi tersebut diterima, atau kemampuan model peramalan sangat baik.

Double Exponential Smoothing (Data AMAG)

Double Exponential Smoothing (data training and testing)

data1<-AMAG$Close[-884:-1262] #testing
data2<-AMAG$Close[-1:-883] #training

#readdata
data.ts<-ts(data1)
head(data.ts)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 374 380 370 372 372 390
#plotdata
plot(data.ts,main="Plot semua data")
points(data.ts)

lapfore2 = HoltWinters(data.ts, gamma=FALSE)
lapfore2
## Holt-Winters exponential smoothing with trend and without seasonal component.
## 
## Call:
## HoltWinters(x = data.ts, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.5433436
##  beta : 0.0279465
##  gamma: FALSE
## 
## Coefficients:
##           [,1]
## a 282.55967025
## b  -0.05864769
lapfore2$SSE
## [1] 110581.4
plot(lapfore2)

Hasil smoothing parameters: alpha = 0.5433436 dan beta = 0.0279465

Data Frame (Forecast)

hasil_forecast.sess <- forecast(lapfore2, h = 379)
hasil_forecast.ses <- as.data.frame(hasil_forecast.sess)
head(hasil_forecast.ses)
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
##  884       282.5010 268.1468 296.8553 260.5481 304.4540
##  885       282.4424 266.0009 298.8838 257.2974 307.5874
##  886       282.3837 263.9952 300.7723 254.2608 310.5066
##  887       282.3251 262.0866 302.5636 251.3730 313.2772
##  888       282.2664 260.2486 304.2843 248.5931 315.9398
##  889       282.2078 258.4633 305.9523 245.8937 318.5219
plot(hasil_forecast.sess)

Data Frame (Forecast)

data_forecast2 <- hasil_forecast.ses$`Point Forecast`
data_forecast2 <- as.data.frame(data_forecast2)
head(data_forecast2)
##   data_forecast2
## 1       282.5010
## 2       282.4424
## 3       282.3837
## 4       282.3251
## 5       282.2664
## 6       282.2078

Data Frame (Data testing)

dataaktual <- as.data.frame(data2)
head(dataaktual)
##   data2
## 1   274
## 2   270
## 3   280
## 4   268
## 5   266
## 6   258

Akurasi Data training dan testing

akurasi.ses <- accuracy(data_forecast2$data_forecast2,dataaktual$`data2`)
akurasi.ses
##                 ME    RMSE      MAE       MPE     MAPE
## Test set -2.028746 55.7199 49.37305 -4.792076 19.20809

Hasil akurasi antara data training dan testing mencapai 19.20809. Karena nilai MAPE berada di antara 10-20%, maka akurasi tersebut diterima atau kemampuan model peramalan baik

Berdasarkan hasil akurasi antara metode ARIMA dan Exponential, dimana hasil MAPE di ARIMA adalah 1.260801% sedangkan double exponential smoothing adalah 19.20809%. Maka metode yang tepat dan akurat untuk dilakukan proses forecasting yang baik adalah menggunakan Metode ARIMA.