Library

library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──

## ✓ ggplot2   3.3.5     ✓ fma       2.4  
## ✓ forecast  8.16      ✓ expsmooth 2.3

##

library(tseries)
library(forecast)
library(xts)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(GGally)

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## 
## Attaching package: 'GGally'

## The following object is masked from 'package:fma':
## 
##     pigs

library(quantmod)

## Loading required package: TTR

library(ggplot2)
library(ggfortify)

## Registered S3 methods overwritten by 'ggfortify':
##   method                 from    
##   autoplot.Arima         forecast
##   autoplot.acf           forecast
##   autoplot.ar            forecast
##   autoplot.bats          forecast
##   autoplot.decomposed.ts forecast
##   autoplot.ets           forecast
##   autoplot.forecast      forecast
##   autoplot.stl           forecast
##   autoplot.ts            forecast
##   fitted.ar              forecast
##   fortify.ts             forecast
##   residuals.ar           forecast

library(faraway)

## 
## Attaching package: 'faraway'

## The following object is masked from 'package:GGally':
## 
##     happy

## The following objects are masked from 'package:fma':
## 
##     airpass, eggs, ozone, wheat

library(MASS)

## 
## Attaching package: 'MASS'

## The following objects are masked from 'package:fma':
## 
##     cement, housing, petrol

library(FitAR)

## Loading required package: lattice

## 
## Attaching package: 'lattice'

## The following object is masked from 'package:faraway':
## 
##     melanoma

## Loading required package: leaps

## Loading required package: ltsa

## Loading required package: bestglm

## 
## Attaching package: 'FitAR'

## The following object is masked from 'package:forecast':
## 
##     BoxCox

library(MTS)

## 
## Attaching package: 'MTS'

## The following object is masked from 'package:TTR':
## 
##     VMA

library(AID)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:FitAR':
## 
##     Boot

## The following objects are masked from 'package:faraway':
## 
##     logit, vif

library(seastests)
library(tseries)
library(stats)
library(lmtest)

Import Data

library(readxl)
ABDA<-read_excel("~/Downloads/ABDA.xlsx")
head(ABDA)

## # A tibble: 6 × 7
##   Date                 Open  High   Low Price `Adj Close` Volume
##   <dttm>              <dbl> <dbl> <dbl> <dbl>       <dbl>  <dbl>
## 1 2017-01-02 00:00:00  6900  6900  6900  6900       6290.      0
## 2 2017-01-03 00:00:00  6900  6900  6900  6900       6290.      0
## 3 2017-01-04 00:00:00  6900  6900  6900  6900       6290.      0
## 4 2017-01-05 00:00:00  6900  6900  6900  6900       6290.      0
## 5 2017-01-06 00:00:00  6900  6900  6900  6900       6290.      0
## 6 2017-01-09 00:00:00  6900  6900  6900  6900       6290.      0

Cleaning Data

ABDA[is.na(ABDA)] <- 0
ABDA$Date <- as.Date(ABDA$Date, format = "%Y-%m-%d")
summary(ABDA)

##       Date                 Open           High           Low      
##  Min.   :2017-01-02   Min.   :3360   Min.   :3360   Min.   :3360  
##  1st Qu.:2018-03-27   1st Qu.:6500   1st Qu.:6575   1st Qu.:6500  
##  Median :2019-06-11   Median :6950   Median :6950   Median :6950  
##  Mean   :2019-06-22   Mean   :6717   Mean   :6725   Mean   :6712  
##  3rd Qu.:2020-09-16   3rd Qu.:7000   3rd Qu.:7000   3rd Qu.:7000  
##  Max.   :2021-12-30   Max.   :8025   Max.   :8150   Max.   :7625  
##      Price        Adj Close        Volume       
##  Min.   :3360   Min.   :3142   Min.   :    0.0  
##  1st Qu.:6512   1st Qu.:6198   1st Qu.:    0.0  
##  Median :6950   Median :6546   Median :    0.0  
##  Mean   :6718   Mean   :6395   Mean   :  348.6  
##  3rd Qu.:7000   3rd Qu.:6822   3rd Qu.:    0.0  
##  Max.   :7625   Max.   :7200   Max.   :31300.0

yt<-ABDA$Price

Plot data

plot.ts(yt)

fried(ts(yt, frequency=7))

## Test used:  Friedman rank 
##  
## Test statistic:  2.17 
## P-value:  0.9036314

Hasil 0.9036314>0.05 maka Data Harga Saham Closing ABDA tidak bersifat seasonal

ts_exchangerate<-ts(yt, frequency=7)
d_exchangerate<-decompose(ts_exchangerate,"additive")
plot(d_exchangerate)

Stationary test (yt)

acf(yt, lag.max=60)

adf.test(yt,alternative="stationary")

## 
##  Augmented Dickey-Fuller Test
## 
## data:  yt
## Dickey-Fuller = -3.7388, Lag order = 10, p-value = 0.02205
## alternative hypothesis: stationary

Decomposed

library(forecast)
abdadp<-(ts_exchangerate-d_exchangerate$seasonal)
autoplot(abdadp)

Stationary test (data decomposed)

plot(abdadp)

acf(abdadp, lag.max=50)

pacf(abdadp, lag.max=50)

adf.test(abdadp,alternative="stationary")

## 
##  Augmented Dickey-Fuller Test
## 
## data:  abdadp
## Dickey-Fuller = -3.7402, Lag order = 10, p-value = 0.02198
## alternative hypothesis: stationary

Split Data Training dan Testing

n<-length(abdadp)
n1<-abs(0.70*n)
n2<-n-1
n3<-n1+1
train<-abdadp[1:n1]
test<-abdadp[n3:n]
data.ts<-ts(train)
head(data.ts)

## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 6902.136 6903.892 6909.251 6909.852 6894.074 6883.879

plot.ts(train , main="Train Data Set Time Series")

plot.ts(test , main="Test Data Set Time Series")

Stationary test (data train)

acf(train, lag.max=50)

pacf(train, lag.max=50)

archTest(train)

## Q(m) of squared series(LM test):  
## Test statistic:  7117.037  p-value:  0 
## Rank-based Test:  
## Test statistic:  6751.015  p-value:  0

adf.test(train)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  train
## Dickey-Fuller = -3.423, Lag order = 9, p-value = 0.04971
## alternative hypothesis: stationary

Transform Data Train

#Lambda=1
#Do 1/y transformation
y_box<-(train)**1
plot(y_box)

Transform Data Testing

y_test<-(test)**1
plot(y_test)

Stationary Test (after transform)

archTest(y_box)

## Q(m) of squared series(LM test):  
## Test statistic:  7117.037  p-value:  0 
## Rank-based Test:  
## Test statistic:  6751.015  p-value:  0

adf.test(y_box)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  y_box
## Dickey-Fuller = -3.423, Lag order = 9, p-value = 0.04971
## alternative hypothesis: stationary

acf(y_box, lag.max=50)

pacf(y_box, lag.max=50)

Hasil ADF test, p value=0,04971<0,05. Maka data sudah stasioner sehingga langsung mencari tentative model tanpa melakukan proses differencing.

Find the tentative model without differencing process

model1<-arima(y_box,order=c(1,0,0))
summary(model1)

## 
## Call:
## arima(x = y_box, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.9812  6742.3531
## s.e.  0.0063   239.4058
## 
## sigma^2 estimated as 20086:  log likelihood = -5628.86,  aic = 11263.72
## 
## Training set error measures:
##                      ME     RMSE      MAE         MPE      MAPE    MASE
## Training set -0.9776668 141.7264 36.45566 -0.07603895 0.6141616 1.15221
##                    ACF1
## Training set -0.0185862

coeftest(model1)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1       9.8115e-01 6.3042e-03 155.635 < 2.2e-16 ***
## intercept 6.7424e+03 2.3941e+02  28.163 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model2<-arima(y_box,order=c(2,0,0))
summary(model2)

## 
## Call:
## arima(x = y_box, order = c(2, 0, 0))
## 
## Coefficients:
##          ar1     ar2  intercept
##       0.9615  0.0200  6754.6711
## s.e.  0.0336  0.0336   244.0599
## 
## sigma^2 estimated as 20079:  log likelihood = -5628.69,  aic = 11265.38
## 
## Training set error measures:
##                     ME     RMSE     MAE         MPE     MAPE     MASE
## Training set -1.239562 141.6993 36.4478 -0.08008879 0.614814 1.151962
##                     ACF1
## Training set 0.002706406

coeftest(model2)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value Pr(>|z|)    
## ar1       9.6150e-01 3.3611e-02 28.6063   <2e-16 ***
## ar2       2.0028e-02 3.3644e-02  0.5953   0.5517    
## intercept 6.7547e+03 2.4406e+02 27.6763   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model3<-arima(y_box,order=c(3,0,0))
summary(model3)

## 
## Call:
## arima(x = y_box, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1     ar2      ar3  intercept
##       0.9632  0.1001  -0.0833  6754.3410
## s.e.  0.0335  0.0465   0.0335   226.1775
## 
## sigma^2 estimated as 19939:  log likelihood = -5625.63,  aic = 11261.26
## 
## Training set error measures:
##                     ME     RMSE    MAE         MPE      MAPE     MASE
## Training set -1.114381 141.2071 37.261 -0.07672498 0.6250777 1.177664
##                      ACF1
## Training set -0.007976677

coeftest(model3)

## 
## z test of coefficients:
## 
##              Estimate  Std. Error z value Pr(>|z|)    
## ar1          0.963151    0.033497 28.7531  < 2e-16 ***
## ar2          0.100116    0.046506  2.1528  0.03134 *  
## ar3         -0.083293    0.033519 -2.4849  0.01296 *  
## intercept 6754.340987  226.177458 29.8630  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model4<-arima(y_box,order=c(4,0,0))
summary(model4)

## 
## Call:
## arima(x = y_box, order = c(4, 0, 0))
## 
## Coefficients:
##          ar1     ar2     ar3      ar4  intercept
##       0.9542  0.1108  0.0211  -0.1083  6704.7625
## s.e.  0.0334  0.0464  0.0463   0.0334   206.2003
## 
## sigma^2 estimated as 19703:  log likelihood = -5620.39,  aic = 11252.79
## 
## Training set error measures:
##                     ME     RMSE     MAE         MPE      MAPE     MASE
## Training set 0.1351413 140.3687 39.1235 -0.05498536 0.6487696 1.236529
##                       ACF1
## Training set -0.0006651154

coeftest(model4)

## 
## z test of coefficients:
## 
##              Estimate  Std. Error z value  Pr(>|z|)    
## ar1          0.954195    0.033412 28.5586 < 2.2e-16 ***
## ar2          0.110839    0.046357  2.3910  0.016803 *  
## ar3          0.021099    0.046327  0.4554  0.648791    
## ar4         -0.108256    0.033419 -3.2394  0.001198 ** 
## intercept 6704.762457  206.200267 32.5158 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Berdasarkan dari 4 tentative model, hasil AIC terkecil adalah 11252.79. Maka model ARIMA yang dipilih adalah ARIMA(4,0,0)

Check Residual

checkresiduals(model1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,0) with non-zero mean
## Q* = 21.86, df = 8, p-value = 0.005182
## 
## Model df: 2.   Total lags used: 10

checkresiduals(model2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,0) with non-zero mean
## Q* = 22.432, df = 7, p-value = 0.00214
## 
## Model df: 3.   Total lags used: 10

checkresiduals(model3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,0) with non-zero mean
## Q* = 14.739, df = 6, p-value = 0.02239
## 
## Model df: 4.   Total lags used: 10

checkresiduals(model4)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,0) with non-zero mean
## Q* = 4.0091, df = 5, p-value = 0.5481
## 
## Model df: 5.   Total lags used: 10

acf(model4$residuals)

Residual signifikan adalah di model 4 yaitu sebesar 0.5481. Apabila p-value>0,05 maka residualnya memenuhi asumsi white noise. Kesimpulannya tentative model 4 diterima.

Plot QQ

boxresult<-LjungBoxTest(model4$residuals,k=2,StartLag=1)
plot(boxresult[,3],main= "Ljung-Box Q Test", ylab= "P-values",xlab= "Lag")

qqnorm(model4$residuals)
qqline(model4$residuals)

ARIMA Forecast

fit3<-arima(y_box, order=c(4,0,0))
forecast<-predict(fit3, n2)
autoplot(forecast(fit3, h=n3, level=c(99.5)))

head(forecast$pred)

## Time Series:
## Start = 884 
## End = 889 
## Frequency = 1 
## [1] 5773.120 5795.100 5817.098 5840.402 5863.269 5885.756

Data Frame (Data Forecast)

data_forecast1 <- forecast$pred
data_forecast1 <- as.data.frame(data_forecast1)
head(data_forecast1)

##          x
## 1 5773.120
## 2 5795.100
## 3 5817.098
## 4 5840.402
## 5 5863.269
## 6 5885.756

Data Frame (Data aktual=Data testing)

dataaktual <- as.data.frame(y_test)
head(dataaktual)

##     y_test
## 1 5753.892
## 2 5759.251
## 3 5759.852
## 4 5744.074
## 5 5983.879
## 6 5746.917

Akurasi data forecast dan data testing

akurasi.arima1 <- accuracy(data_forecast1$x,dataaktual$`y_test`)
akurasi.arima1

##                ME     RMSE      MAE        MPE     MAPE
## Test set 17.47626 449.2002 366.2928 -0.1977255 5.722292

Hasil akurasi antara data training dan testing mencapai 5.722292. Karena nilai MAPE<10% , maka akurasi tersebut diterima, atau kemampuan model peramalan sangat baik.

Double Exponential Smoothing (Data ABDA)

Double Exponential Smoothing (data training and testing)

data1<-ABDA$Price[-884:-1262] #testing
data2<-ABDA$Price[-1:-883] #training

#readdata
data.ts<-ts(data1)
head(data.ts)

## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 6900 6900 6900 6900 6900 6900

#plotdata
plot(data.ts,main="Plot semua data")
points(data.ts)

lapfore2 = HoltWinters(data.ts, gamma=FALSE)
lapfore2

## Holt-Winters exponential smoothing with trend and without seasonal component.
## 
## Call:
## HoltWinters(x = data.ts, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.9757213
##  beta : 0
##  gamma: FALSE
## 
## Coefficients:
##   [,1]
## a 5750
## b    0

lapfore2$SSE

## [1] 18001425

plot(lapfore2)

Hasil smoothing parameters: alpha = 0.9757213 dan beta = 0

Data Frame (Forecast)

hasil_forecast.sess <- forecast(lapfore2, h = 379)
hasil_forecast.ses <- as.data.frame(hasil_forecast.sess)
head(hasil_forecast.ses)

##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
##  884           5750 5566.714 5933.286 5469.688 6030.312
##  885           5750 5493.922 6006.078 5358.362 6141.638
##  886           5750 5437.657 6062.343 5272.312 6227.688
##  887           5750 5390.082 6109.918 5199.554 6300.446
##  888           5750 5348.101 6151.899 5135.348 6364.652
##  889           5750 5310.108 6189.892 5077.243 6422.757

plot(hasil_forecast.sess)

Data Frame (Forecast)

data_forecast2 <- hasil_forecast.ses$`Point Forecast`
data_forecast2 <- as.data.frame(data_forecast2)
head(data_forecast2)

##   data_forecast2
## 1           5750
## 2           5750
## 3           5750
## 4           5750
## 5           5750
## 6           5750

Data Frame (Data testing)

dataaktual <- as.data.frame(data2)
head(dataaktual)

##   data2
## 1  5750
## 2  5750
## 3  5750
## 4  5750
## 5  6000
## 6  5750

Akurasi Data training dan testing

akurasi.ses <- accuracy(data_forecast2$data_forecast2,dataaktual$`data2`)
akurasi.ses

##               ME     RMSE      MAE      MPE    MAPE
## Test set 878.628 991.1165 892.6121 12.80737 13.0583

Hasil akurasi antara data training dan testing mencapai 13.0583. Karena nilai MAPE berada di antara 10-20%, maka akurasi tersebut diterima, atau kemampuan model peramalan baik.

Berdasarkan hasil akurasi antara metode ARIMA dan Exponential, dimana hasil MAPE di ARIMA adalah 5.722292% sedangkan double exponential smoothing adalah 13.0583%. Maka metode yang tepat dan akurat untuk dilakukan proses forecasting yang baik adalah menggunakan Metode ARIMA.

ABDA

Elvina Utama

2022-04-07

Library

Import Data

Cleaning Data

Plot data

Hasil 0.9036314>0.05 maka Data Harga Saham Closing ABDA tidak bersifat seasonal

Stationary test (yt)

Decomposed

Stationary test (data decomposed)

Split Data Training dan Testing

Stationary test (data train)

Transform Data Train

Transform Data Testing

Stationary Test (after transform)

Hasil ADF test, p value=0,04971<0,05. Maka data sudah stasioner sehingga langsung mencari tentative model tanpa melakukan proses differencing.

Find the tentative model without differencing process

Check Residual

Residual signifikan adalah di model 4 yaitu sebesar 0.5481. Apabila p-value>0,05 maka residualnya memenuhi asumsi white noise. Kesimpulannya tentative model 4 diterima.

Plot QQ

ARIMA Forecast

Data Frame (Data Forecast)

Data Frame (Data aktual=Data testing)

Akurasi data forecast dan data testing

Hasil akurasi antara data training dan testing mencapai 5.722292. Karena nilai MAPE<10% , maka akurasi tersebut diterima, atau kemampuan model peramalan sangat baik.

Double Exponential Smoothing (Data ABDA)

Double Exponential Smoothing (data training and testing)

Hasil smoothing parameters: alpha = 0.9757213 dan beta = 0

Data Frame (Forecast)

Data Frame (Forecast)

Data Frame (Data testing)

Akurasi Data training dan testing

Hasil akurasi antara data training dan testing mencapai 13.0583. Karena nilai MAPE berada di antara 10-20%, maka akurasi tersebut diterima, atau kemampuan model peramalan baik.

Berdasarkan hasil akurasi antara metode ARIMA dan Exponential, dimana hasil MAPE di ARIMA adalah 5.722292% sedangkan double exponential smoothing adalah 13.0583%. Maka metode yang tepat dan akurat untuk dilakukan proses forecasting yang baik adalah menggunakan Metode ARIMA.