Library

library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──

## ✓ ggplot2   3.3.5     ✓ fma       2.4  
## ✓ forecast  8.16      ✓ expsmooth 2.3

##

library(tseries)
library(forecast)
library(xts)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(GGally)

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## 
## Attaching package: 'GGally'

## The following object is masked from 'package:fma':
## 
##     pigs

library(quantmod)

## Loading required package: TTR

library(ggplot2)
library(ggfortify)

## Registered S3 methods overwritten by 'ggfortify':
##   method                 from    
##   autoplot.Arima         forecast
##   autoplot.acf           forecast
##   autoplot.ar            forecast
##   autoplot.bats          forecast
##   autoplot.decomposed.ts forecast
##   autoplot.ets           forecast
##   autoplot.forecast      forecast
##   autoplot.stl           forecast
##   autoplot.ts            forecast
##   fitted.ar              forecast
##   fortify.ts             forecast
##   residuals.ar           forecast

library(faraway)

## 
## Attaching package: 'faraway'

## The following object is masked from 'package:GGally':
## 
##     happy

## The following objects are masked from 'package:fma':
## 
##     airpass, eggs, ozone, wheat

library(MASS)

## 
## Attaching package: 'MASS'

## The following objects are masked from 'package:fma':
## 
##     cement, housing, petrol

library(FitAR)

## Loading required package: lattice

## 
## Attaching package: 'lattice'

## The following object is masked from 'package:faraway':
## 
##     melanoma

## Loading required package: leaps

## Loading required package: ltsa

## Loading required package: bestglm

## 
## Attaching package: 'FitAR'

## The following object is masked from 'package:forecast':
## 
##     BoxCox

library(MTS)

## 
## Attaching package: 'MTS'

## The following object is masked from 'package:TTR':
## 
##     VMA

library(AID)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:FitAR':
## 
##     Boot

## The following objects are masked from 'package:faraway':
## 
##     logit, vif

library(seastests)
library(tseries)
library(stats)
library(lmtest)

Import Data

library(readxl)
LPGI<-read_excel("~/Downloads/LPGI.xlsx")
head(LPGI)

## # A tibble: 6 × 7
##   Date                 Open  High   Low Close `Adj Close` Volume
##   <dttm>              <dbl> <dbl> <dbl> <dbl>       <dbl>  <dbl>
## 1 2017-01-02 00:00:00  5400  5400  5400  5400       3887.      0
## 2 2017-01-03 00:00:00  5400  5500  5400  5500       3959.  65100
## 3 2017-01-04 00:00:00  5500  5600  5400  5525       3977.  25700
## 4 2017-01-05 00:00:00  5525  5600  5500  5600       4031.  53100
## 5 2017-01-06 00:00:00  5600  5750  5575  5750       4139.   2500
## 6 2017-01-09 00:00:00  5750  5750  5475  5650       4067.  43400

Cleaning Data

LPGI[is.na(LPGI)] <- 0
LPGI$Date <- as.Date(LPGI$Date, format = "%Y-%m-%d")
summary(LPGI)

##       Date                 Open           High           Low      
##  Min.   :2017-01-02   Min.   :2800   Min.   :2900   Min.   :2700  
##  1st Qu.:2018-03-27   1st Qu.:3502   1st Qu.:3510   1st Qu.:3500  
##  Median :2019-06-11   Median :3860   Median :3890   Median :3800  
##  Mean   :2019-06-22   Mean   :4087   Mean   :4111   Mean   :4065  
##  3rd Qu.:2020-09-16   3rd Qu.:4450   3rd Qu.:4500   3rd Qu.:4448  
##  Max.   :2021-12-30   Max.   :5950   Max.   :5950   Max.   :5950  
##      Close        Adj Close        Volume       
##  Min.   :2900   Min.   :2291   Min.   :      0  
##  1st Qu.:3510   1st Qu.:3078   1st Qu.:      0  
##  Median :3860   Median :3342   Median :    100  
##  Mean   :4093   Mean   :3401   Mean   :   8321  
##  3rd Qu.:4500   3rd Qu.:3671   3rd Qu.:   3375  
##  Max.   :5950   Max.   :5175   Max.   :1000000

yt<-LPGI$Close

Plot data

plot.ts(yt)

fried(ts(yt, frequency=7))

## Test used:  Friedman rank 
##  
## Test statistic:  2.34 
## P-value:  0.8854967

Hasil 0.8854967>0.05 maka Data Harga Saham Closing LPGI tidak bersifat seasonal

ts_exchangerate<-ts(yt, frequency=7)
d_exchangerate<-decompose(ts_exchangerate,"additive")
plot(d_exchangerate)

Stationary test (yt)

acf(yt, lag.max=60)

adf.test(yt,alternative="stationary")

## 
##  Augmented Dickey-Fuller Test
## 
## data:  yt
## Dickey-Fuller = -1.1737, Lag order = 10, p-value = 0.9107
## alternative hypothesis: stationary

Decomposed

library(forecast)
lpgidp<-(ts_exchangerate-d_exchangerate$seasonal)
autoplot(lpgidp)

Stationary test (data decomposed)

plot(lpgidp)

acf(lpgidp, lag.max=50)

pacf(lpgidp, lag.max=50)

adf.test(lpgidp,alternative="stationary")

## 
##  Augmented Dickey-Fuller Test
## 
## data:  lpgidp
## Dickey-Fuller = -1.1574, Lag order = 10, p-value = 0.9133
## alternative hypothesis: stationary

Split Data Training dan Testing

n<-length(lpgidp)
n1<-abs(0.70*n)
n2<-n-1
n3<-n1+1
train<-lpgidp[1:n1]
test<-lpgidp[n3:n]
data.ts<-ts(train)
head(data.ts)

## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 5413.725 5508.370 5520.473 5593.418 5743.299 5643.823

plot.ts(train , main="Train Data Set Time Series")

plot.ts(test , main="Test Data Set Time Series")

Stationary test (data train)

acf(train, lag.max=50)

pacf(train, lag.max=50)

archTest(train)

## Q(m) of squared series(LM test):  
## Test statistic:  8260.081  p-value:  0 
## Rank-based Test:  
## Test statistic:  7600.462  p-value:  0

adf.test(train)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  train
## Dickey-Fuller = -2.1236, Lag order = 9, p-value = 0.526
## alternative hypothesis: stationary

Transform Data Train

BoxCox(train)

#Lambda=1
#Do 1/y transformation
y_box<-(train)**1
plot(y_box)

Transform Data Testing

y_test<-(test)**1
plot(y_test)

Stationary Test (after transform)

archTest(y_box)

## Q(m) of squared series(LM test):  
## Test statistic:  8260.081  p-value:  0 
## Rank-based Test:  
## Test statistic:  7600.462  p-value:  0

adf.test(y_box)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  y_box
## Dickey-Fuller = -2.1236, Lag order = 9, p-value = 0.526
## alternative hypothesis: stationary

acf(y_box, lag.max=50)

pacf(y_box, lag.max=50)

Hasil ADF test, p value=0,526>0,05. Maka data belum stasioner sehingga harus melakukan proses differencing.

Differencing

diffy_box<-diff(y_box, differences=1)
plot.ts(diffy_box)

acf(diffy_box, lag.max=50)

pacf(diffy_box, lag.max=50)

Stationary Test (after differencing)

archTest(diffy_box)

## Q(m) of squared series(LM test):  
## Test statistic:  154.5242  p-value:  0 
## Rank-based Test:  
## Test statistic:  335.869  p-value:  0

adf.test(diffy_box)

## Warning in adf.test(diffy_box): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diffy_box
## Dickey-Fuller = -11.491, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary

Hasil ADF test, p value=0,01<0,05. Maka data sudah stasioner sehingga dapat melakukan proses pemilihan tentative model.

Find the tentative model with differencing process

model1<-arima(y_box,order=c(1,1,0))
summary(model1)

## 
## Call:
## arima(x = y_box, order = c(1, 1, 0))
## 
## Coefficients:
##           ar1
##       -0.1748
## s.e.   0.0332
## 
## sigma^2 estimated as 11423:  log likelihood = -5371.95,  aic = 10747.9
## 
## Training set error measures:
##                     ME     RMSE      MAE         MPE      MAPE     MASE
## Training set -2.060063 106.8179 42.15738 -0.08247877 0.9939015 1.047724
##                     ACF1
## Training set -0.01086235

coeftest(model1)

## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1 -0.174790   0.033153 -5.2722 1.348e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model2<-arima(y_box,order=c(2,1,0))
summary(model2)

## 
## Call:
## arima(x = y_box, order = c(2, 1, 0))
## 
## Coefficients:
##           ar1      ar2
##       -0.1853  -0.0599
## s.e.   0.0336   0.0336
## 
## sigma^2 estimated as 11382:  log likelihood = -5370.36,  aic = 10746.72
## 
## Training set error measures:
##                     ME     RMSE      MAE         MPE     MAPE     MASE
## Training set -2.186534 106.6252 42.58087 -0.08710808 1.005876 1.058249
##                     ACF1
## Training set 0.004999031

coeftest(model2)

## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1 -0.185270   0.033615 -5.5115 3.558e-08 ***
## ar2 -0.059938   0.033591 -1.7844   0.07437 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model3<-arima(y_box,order=c(3,1,0))
summary(model3)

## 
## Call:
## arima(x = y_box, order = c(3, 1, 0))
## 
## Coefficients:
##           ar1      ar2     ar3
##       -0.1798  -0.0431  0.0905
## s.e.   0.0335   0.0340  0.0335
## 
## sigma^2 estimated as 11288:  log likelihood = -5366.72,  aic = 10741.45
## 
## Training set error measures:
##                     ME     RMSE      MAE         MPE     MAPE    MASE
## Training set -1.984303 106.1849 43.37521 -0.07925854 1.025416 1.07799
##                     ACF1
## Training set 0.003838781

coeftest(model3)

## 
## z test of coefficients:
## 
##      Estimate Std. Error z value  Pr(>|z|)    
## ar1 -0.179768   0.033538 -5.3602 8.312e-08 ***
## ar2 -0.043097   0.034027 -1.2665   0.20532    
## ar3  0.090550   0.033499  2.7031   0.00687 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model4<-arima(y_box,order=c(8,1,0))
summary(model4)

## 
## Call:
## arima(x = y_box, order = c(8, 1, 0))
## 
## Coefficients:
##           ar1      ar2     ar3      ar4      ar5      ar6      ar7      ar8
##       -0.1881  -0.0509  0.0795  -0.0528  -0.0678  -0.0840  -0.0834  -0.0098
## s.e.   0.0337   0.0341  0.0341   0.0341   0.0341   0.0341   0.0341   0.0336
## 
## sigma^2 estimated as 11102:  log likelihood = -5359.46,  aic = 10736.92
## 
## Training set error measures:
##                     ME    RMSE      MAE         MPE     MAPE     MASE
## Training set -2.536977 105.308 45.32421 -0.09799613 1.076866 1.126428
##                       ACF1
## Training set -0.0009370291

coeftest(model4)

## 
## z test of coefficients:
## 
##       Estimate Std. Error z value  Pr(>|z|)    
## ar1 -0.1881275  0.0336746 -5.5866 2.315e-08 ***
## ar2 -0.0508723  0.0341330 -1.4904   0.13612    
## ar3  0.0794937  0.0340601  2.3339   0.01960 *  
## ar4 -0.0528374  0.0341047 -1.5493   0.12132    
## ar5 -0.0678337  0.0340987 -1.9893   0.04666 *  
## ar6 -0.0839572  0.0340560 -2.4653   0.01369 *  
## ar7 -0.0834173  0.0341229 -2.4446   0.01450 *  
## ar8 -0.0098127  0.0336464 -0.2916   0.77056    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Berdasarkan dari 4 tentative model, hasil AIC terkecil adalah 10736.92. Maka model ARIMA yang dipilih adalah ARIMA(8,1,0).

Check Residual

checkresiduals(model1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,0)
## Q* = 26.524, df = 9, p-value = 0.001676
## 
## Model df: 1.   Total lags used: 10

checkresiduals(model2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,0)
## Q* = 25.089, df = 8, p-value = 0.001502
## 
## Model df: 2.   Total lags used: 10

checkresiduals(model3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,1,0)
## Q* = 15.281, df = 7, p-value = 0.03256
## 
## Model df: 3.   Total lags used: 10

checkresiduals(model4)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(8,1,0)
## Q* = 2.2708, df = 3, p-value = 0.5181
## 
## Model df: 8.   Total lags used: 11

acf(model4$residuals)

Residual signifikan adalah di model 4 yaitu sebesar 0.5181. Apabila p-value>0,05 maka residualnya memenuhi asumsi white noise. Kesimpulannya tentative model 4 diterima.

Plot QQ

boxresult<-LjungBoxTest(model4$residuals,k=2,StartLag=1)
plot(boxresult[,3],main= "Ljung-Box Q Test", ylab= "P-values",xlab= "Lag")

qqnorm(model4$residuals)
qqline(model4$residuals)

ARIMA Forecast

fit3<-arima(y_box, order=c(8,1,0))
forecast<-predict(fit3, n2)
autoplot(forecast(fit3, h=n3, level=c(99.5)))

head(forecast$pred)

## Time Series:
## Start = 884 
## End = 889 
## Frequency = 1 
## [1] 3870.240 3879.599 3887.594 3889.705 3888.269 3886.459

Data Frame (Data Forecast)

data_forecast1 <- forecast$pred
data_forecast1 <- as.data.frame(data_forecast1)
head(data_forecast1)

##          x
## 1 3870.240
## 2 3879.599
## 3 3887.594
## 4 3889.705
## 5 3888.269
## 6 3886.459

Data Frame (Data aktual=Data testing)

dataaktual <- as.data.frame(y_test)
head(dataaktual)

##     y_test
## 1 3808.370
## 2 3745.473
## 3 3743.418
## 4 3843.299
## 5 3843.823
## 6 3951.893

Akurasi data forecast dan data testing

akurasi.arima1 <- accuracy(data_forecast1$x,dataaktual$`y_test`)
akurasi.arima1

##                 ME     RMSE      MAE       MPE     MAPE
## Test set -223.1506 441.4826 371.6276 -7.131258 10.37647

Hasil akurasi antara data training dan testing mencapai 10.37647. Karena nilai MAPE berada di antara 10-20% , maka akurasi tersebut diterima, atau kemampuan model peramalan baik.

Double Exponential Smoothing (Data LPGI)

Double Exponential Smoothing (data training and testing)

data1<-LPGI$Close[-884:-1262] #testing
data2<-LPGI$Close[-1:-883] #training

#readdata
data.ts<-ts(data1)
head(data.ts)

## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
## [1] 5400 5500 5525 5600 5750 5650

#plotdata
plot(data.ts,main="Plot semua data")
points(data.ts)

lapfore2 = HoltWinters(data.ts, gamma=FALSE)
lapfore2

## Holt-Winters exponential smoothing with trend and without seasonal component.
## 
## Call:
## HoltWinters(x = data.ts, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.8365515
##  beta : 0.03274018
##  gamma: FALSE
## 
## Coefficients:
##          [,1]
## a 3850.746878
## b    3.429557

lapfore2$SSE

## [1] 10585935

plot(lapfore2)

Hasil smoothing parameters: alpha = 0.8365515 dan beta = 0.03274018

Data Frame (Forecast)

hasil_forecast.sess <- forecast(lapfore2, h = 379)
hasil_forecast.ses <- as.data.frame(hasil_forecast.sess)
head(hasil_forecast.ses)

##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
##  884       3854.176 3713.711 3994.642 3639.353 4069.000
##  885       3857.606 3671.979 4043.233 3573.714 4141.498
##  886       3861.036 3637.132 4084.939 3518.605 4203.466
##  887       3864.465 3606.035 4122.895 3469.231 4259.700
##  888       3867.895 3577.297 4158.493 3423.463 4312.326
##  889       3871.324 3550.161 4192.487 3380.148 4362.501

plot(hasil_forecast.sess)

Data Frame (Forecast)

data_forecast2 <- hasil_forecast.ses$`Point Forecast`
data_forecast2 <- as.data.frame(data_forecast2)
head(data_forecast2)

##   data_forecast2
## 1       3854.176
## 2       3857.606
## 3       3861.036
## 4       3864.465
## 5       3867.895
## 6       3871.324

Data Frame (Data testing)

dataaktual <- as.data.frame(data2)
head(dataaktual)

##   data2
## 1  3800
## 2  3750
## 3  3750
## 4  3850
## 5  3850
## 6  3950

Akurasi Data training dan testing

akurasi.ses <- accuracy(data_forecast2$data_forecast2,dataaktual$`data2`)
akurasi.ses

##                 ME    RMSE      MAE       MPE     MAPE
## Test set -840.0541 911.175 842.2392 -23.61308 23.66383

Hasil akurasi antara data training dan testing mencapai 23.66383. Karena nilai MAPE berada di antara 20-50%, maka akurasi tersebut diterima atau kemampuan model peramalan layak.

Berdasarkan hasil akurasi antara metode ARIMA dan Exponential, dimana hasil MAPE di ARIMA adalah 10.37647% sedangkan double exponential smoothing adalah 23.66383%. Maka metode yang tepat dan akurat untuk dilakukan proses forecasting yang baik adalah menggunakan Metode ARIMA.

LPGI

Elvina Utama

2022-04-08

Library

Import Data

Cleaning Data

Plot data

Hasil 0.8854967>0.05 maka Data Harga Saham Closing LPGI tidak bersifat seasonal

Stationary test (yt)

Decomposed

Stationary test (data decomposed)

Split Data Training dan Testing

Stationary test (data train)

Transform Data Train

Transform Data Testing

Stationary Test (after transform)

Hasil ADF test, p value=0,526>0,05. Maka data belum stasioner sehingga harus melakukan proses differencing.

Differencing

Stationary Test (after differencing)

Hasil ADF test, p value=0,01<0,05. Maka data sudah stasioner sehingga dapat melakukan proses pemilihan tentative model.

Find the tentative model with differencing process

Berdasarkan dari 4 tentative model, hasil AIC terkecil adalah 10736.92. Maka model ARIMA yang dipilih adalah ARIMA(8,1,0).

Check Residual

Residual signifikan adalah di model 4 yaitu sebesar 0.5181. Apabila p-value>0,05 maka residualnya memenuhi asumsi white noise. Kesimpulannya tentative model 4 diterima.

Plot QQ

ARIMA Forecast

Data Frame (Data Forecast)

Data Frame (Data aktual=Data testing)

Akurasi data forecast dan data testing

Hasil akurasi antara data training dan testing mencapai 10.37647. Karena nilai MAPE berada di antara 10-20% , maka akurasi tersebut diterima, atau kemampuan model peramalan baik.

Double Exponential Smoothing (Data LPGI)

Double Exponential Smoothing (data training and testing)

Hasil smoothing parameters: alpha = 0.8365515 dan beta = 0.03274018

Data Frame (Forecast)

Data Frame (Forecast)

Data Frame (Data testing)

Akurasi Data training dan testing

Hasil akurasi antara data training dan testing mencapai 23.66383. Karena nilai MAPE berada di antara 20-50%, maka akurasi tersebut diterima atau kemampuan model peramalan layak.

Berdasarkan hasil akurasi antara metode ARIMA dan Exponential, dimana hasil MAPE di ARIMA adalah 10.37647% sedangkan double exponential smoothing adalah 23.66383%. Maka metode yang tepat dan akurat untuk dilakukan proses forecasting yang baik adalah menggunakan Metode ARIMA.