Regresi dengan Peubah Lag

Library yang digunakan

library(dLagM) 
## Loading required package: nardl
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Loading required package: dynlm
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(dynlm) 
library(MLmetrics) 
## 
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:dLagM':
## 
##     MAPE
## The following object is masked from 'package:base':
## 
##     Recall
library(lmtest)
library(car)
## Loading required package: carData
library(readxl)

Data

Data yang digunakan merupakan Data Harga Emas Harian selama 10 bulan pada tahun 2000. Dataset ini bersumber dari kaggle.com dimana peubah yang digunakan adalah peubah Close dan peubah Open. Close sebagai peubah bebas (x) dan Open sebagai peubah tak bebas (y).

Import Data

data <- read_excel("C:/SEMESTER 5/MPDW/gold.xlsx")
knitr::kable(data,align = "c")
Date Close High Low Open Volume Currency
2000-01-04 283.7 289.5 280.0 289.5 21621 USD
2000-01-05 282.1 285.0 281.0 283.7 25448 USD
2000-01-06 282.4 282.8 280.2 281.6 19055 USD
2000-01-07 282.9 284.5 282.0 282.5 11266 USD
2000-01-10 282.7 283.9 281.8 282.4 30603 USD
2000-01-11 284.4 285.3 281.9 282.4 13500 USD
2000-01-12 283.7 285.0 282.5 284.5 17841 USD
2000-01-13 285.1 285.9 282.9 283.5 12171 USD
2000-01-14 284.9 285.6 284.0 285.2 32339 USD
2000-01-18 289.6 289.8 285.9 286.1 28615 USD
2000-01-19 290.3 291.0 287.5 289.6 24097 USD
2000-01-20 289.3 290.7 288.4 290.3 20707 USD
2000-01-21 289.7 290.3 287.4 289.0 25726 USD
2000-01-24 288.1 289.7 286.3 289.7 32140 USD
2000-01-25 286.6 291.8 286.6 288.0 36122 USD
2000-01-26 286.5 287.5 284.7 286.8 19633 USD
2000-01-27 287.1 287.5 285.6 286.2 14493 USD
2000-01-28 286.0 290.7 283.0 289.6 15193 USD
2000-01-31 286.2 287.4 284.9 286.0 13450 USD
2000-02-01 285.4 286.5 285.0 286.0 18559 USD
2000-02-02 287.7 289.2 285.3 285.4 21679 USD
2000-02-03 289.9 290.9 287.0 287.7 111565 USD
2000-02-04 313.0 318.5 289.9 290.2 81002 USD
2000-02-07 304.5 326.9 302.0 318.0 41222 USD
2000-02-08 301.7 308.0 298.5 306.0 43956 USD
2000-02-09 308.6 314.0 301.7 301.7 68353 USD
2000-02-10 318.7 319.0 308.5 308.5 33896 USD
2000-02-11 313.6 320.4 311.0 318.3 26794 USD
2000-02-14 311.0 313.6 308.3 313.6 29195 USD
2000-02-15 304.1 311.5 303.8 310.7 26789 USD
2000-02-16 305.0 306.0 299.7 304.2 56335 USD
2000-02-17 303.8 312.5 300.0 305.3 11809 USD
2000-02-18 307.3 307.8 302.0 303.3 19378 USD
2000-02-22 307.7 309.4 304.2 307.6 25886 USD
2000-02-23 302.4 308.2 302.3 307.7 28337 USD
2000-02-24 300.9 303.5 300.2 302.4 50881 USD
2000-02-25 294.6 301.2 293.5 300.9 12399 USD
2000-02-28 294.2 295.4 293.1 294.5 19656 USD
2000-02-29 294.2 296.5 293.5 293.8 14909 USD
2000-03-01 293.3 296.2 292.8 294.2 28152 USD
2000-03-02 289.7 293.3 289.4 292.9 14362 USD
2000-03-03 290.3 292.2 289.2 289.7 11764 USD
2000-03-06 289.4 291.7 289.2 290.6 29350 USD
2000-03-07 293.7 295.4 289.4 289.5 23098 USD
2000-03-08 290.4 295.0 289.8 293.8 20155 USD
2000-03-09 292.7 293.0 289.6 290.0 33677 USD
2000-03-10 290.0 294.0 289.4 292.7 15867 USD
2000-03-13 291.5 292.9 290.0 290.0 14104 USD
2000-03-14 289.7 292.6 289.2 291.7 10300 USD
2000-03-15 289.6 291.0 289.3 289.6 39830 USD
2000-03-16 287.0 290.4 286.3 289.9 22018 USD
2000-03-17 285.0 287.0 283.6 286.8 17900 USD
2000-03-20 286.5 286.9 284.5 285.0 42694 USD
2000-03-21 290.4 292.0 284.2 286.5 13804 USD
2000-03-22 288.3 291.1 288.1 290.4 36661 USD
2000-03-23 285.4 288.6 283.8 288.4 12053 USD
2000-03-24 285.1 286.1 284.6 285.6 51450 USD
2000-03-27 280.5 285.4 280.2 285.4 22148 USD
2000-03-28 279.5 281.8 279.4 280.8 17775 USD
2000-03-29 275.9 280.2 274.8 279.5 12107 USD
2000-03-30 279.1 280.2 277.9 279.0 29784 USD
2000-03-31 281.4 283.0 277.5 279.6 31833 USD
2000-04-03 280.4 282.0 279.7 281.5 12345 USD
2000-04-04 286.6 291.5 278.4 280.7 44068 USD
2000-04-05 283.8 287.1 283.2 286.7 21139 USD
2000-04-06 282.7 284.3 281.3 283.8 13216 USD
2000-04-07 282.5 283.6 281.9 283.0 7865 USD
2000-04-10 284.0 285.0 281.5 281.8 6784 USD
2000-04-11 283.7 286.0 283.1 284.3 17914 USD
2000-04-12 283.5 285.7 281.5 283.5 19820 USD
2000-04-13 283.2 284.8 282.0 283.5 11969 USD
2000-04-14 284.6 288.5 282.5 283.4 30672 USD
2000-04-17 284.2 287.5 282.8 285.1 17842 USD
2000-04-18 283.0 284.8 282.4 284.0 15410 USD
2000-04-19 282.5 283.9 282.0 283.0 11574 USD
2000-04-20 281.5 283.0 280.5 282.4 13093 USD
2000-04-24 281.2 282.9 280.9 281.5 7764 USD
2000-04-25 279.8 282.0 279.5 281.0 13617 USD
2000-04-26 277.1 280.1 276.4 280.0 27418 USD
2000-04-27 278.4 279.1 276.8 277.3 15077 USD
2000-04-28 274.7 279.0 274.0 278.2 25399 USD
2000-05-01 274.8 276.1 273.6 274.8 16303 USD
2000-05-02 277.0 277.9 274.7 275.1 16038 USD
2000-05-03 279.1 279.4 276.6 277.2 22230 USD
2000-05-04 281.2 284.0 278.1 280.0 34630 USD
2000-05-05 280.1 283.8 278.5 282.0 30215 USD
2000-05-08 277.6 279.8 277.3 279.8 15420 USD
2000-05-09 278.2 278.9 276.7 278.0 18476 USD
2000-05-10 278.7 280.0 277.9 278.4 14291 USD
2000-05-11 276.5 279.3 276.1 278.7 23254 USD
2000-05-12 276.9 277.9 276.5 276.8 10326 USD
2000-05-15 276.4 277.8 274.8 277.2 19334 USD
2000-05-16 276.2 278.0 275.7 276.8 15838 USD
2000-05-17 273.8 276.2 272.7 276.1 18760 USD
2000-05-18 273.7 274.8 272.1 273.8 18760 USD
2000-05-19 274.6 274.9 273.3 274.4 20592 USD
2000-05-22 276.2 277.5 274.0 274.3 30181 USD
2000-05-23 274.7 277.2 273.8 276.2 23831 USD
2000-05-24 273.8 275.0 273.2 275.0 15927 USD
2000-05-25 270.7 275.0 269.5 273.8 56143 USD
2000-05-26 272.2 273.3 270.1 270.7 30192 USD
2000-05-30 275.5 277.2 275.0 275.1 11812 USD
2000-05-31 274.8 275.9 274.5 275.6 12632 USD
2000-06-01 275.4 276.4 274.8 275.4 65969 USD
2000-06-02 284.2 285.9 275.3 275.5 33366 USD
2000-06-05 288.2 289.1 283.2 283.8 39835 USD
2000-06-06 291.8 294.5 286.6 287.0 33066 USD
2000-06-07 290.0 292.2 286.9 292.0 34158 USD
2000-06-08 286.8 290.2 286.3 290.0 18789 USD
2000-06-09 286.4 289.3 286.1 287.2 17145 USD
2000-06-12 289.1 289.2 285.6 286.4 44398 USD
2000-06-13 288.1 296.5 286.8 289.3 30656 USD
2000-06-14 294.2 294.5 288.1 288.7 23971 USD
2000-06-15 292.1 295.3 289.0 294.0 24105 USD
2000-06-16 291.2 294.4 290.6 291.5 14493 USD
2000-06-19 288.0 291.5 287.5 291.5 13545 USD
2000-06-20 288.1 289.7 287.0 288.2 11995 USD
2000-06-21 288.1 289.6 287.5 288.0 29171 USD
2000-06-22 287.2 291.1 286.5 288.2 24722 USD
2000-06-23 284.8 288.0 284.0 287.3 12557 USD
2000-06-26 285.5 286.5 283.6 284.8 14746 USD
2000-06-27 287.5 287.6 284.8 285.8 39725 USD
2000-06-28 294.3 294.8 287.4 287.7 23137 USD
2000-06-29 290.7 294.3 289.4 294.3 16279 USD
2000-06-30 291.5 291.7 289.1 290.5 25522 USD
2000-07-05 285.3 289.7 285.1 289.7 22407 USD
2000-07-06 284.6 287.0 283.4 285.3 15437 USD
2000-07-07 284.6 285.7 282.6 284.5 14496 USD
2000-07-10 284.9 286.4 284.3 284.7 17938 USD
2000-07-11 283.3 285.2 283.0 284.6 22032 USD
2000-07-12 281.4 284.5 280.6 283.3 15388 USD
2000-07-13 280.9 282.8 280.7 281.7 15388 USD
2000-07-14 281.9 282.0 280.6 281.0 23279 USD
2000-07-17 284.2 284.8 281.7 281.7 17138 USD
2000-07-18 283.1 284.2 282.6 283.7 26952 USD
2000-07-19 279.6 282.1 278.0 281.7 19905 USD
2000-07-20 280.4 281.5 278.3 278.7 13556 USD
2000-07-21 280.6 281.7 279.2 281.2 15240 USD
2000-07-24 279.3 280.4 279.2 280.4 26047 USD
2000-07-25 279.3 281.7 278.8 279.2 22570 USD
2000-07-26 279.7 280.5 279.3 280.0 29016 USD
2000-07-27 278.6 280.1 277.7 280.0 26356 USD
2000-07-28 284.2 285.2 283.7 285.0 17568 USD
2000-07-31 283.2 283.5 282.4 283.2 8947 USD
2000-08-01 283.2 284.4 283.2 283.4 16876 USD
2000-08-02 282.8 285.0 282.4 283.0 32643 USD
2000-08-03 279.5 281.9 278.4 281.1 21323 USD
2000-08-04 278.6 280.5 277.8 278.0 6557 USD
2000-08-07 279.1 279.6 278.4 279.0 16787 USD
2000-08-08 278.7 279.6 277.2 279.5 15243 USD
2000-08-09 277.3 278.5 276.5 277.8 12203 USD
2000-08-10 277.6 278.5 276.9 277.2 14939 USD
2000-08-11 280.3 280.5 277.7 278.4 11120 USD
2000-08-14 279.6 281.0 279.1 280.2 8319 USD
2000-08-15 279.6 280.3 278.7 279.9 22411 USD
2000-08-16 282.3 282.8 279.7 279.8 16032 USD
2000-08-17 282.6 283.9 281.6 282.4 11106 USD
2000-08-18 281.7 283.0 280.3 283.0 6072 USD
2000-08-21 280.0 281.7 279.2 281.5 11525 USD
2000-08-22 278.3 280.2 277.9 280.0 21617 USD
2000-08-23 275.7 278.8 275.3 278.7 12140 USD
2000-08-24 277.5 277.8 275.7 275.7 12396 USD
2000-08-25 278.6 279.3 276.9 277.9 6342 USD
2000-08-28 278.5 279.0 277.7 278.9 16951 USD
2000-08-29 277.5 279.5 276.7 278.6 7963 USD
2000-08-30 278.2 278.4 276.9 277.7 34241 USD
2000-08-31 282.1 282.7 278.4 278.5 8764 USD
2000-09-01 280.9 282.1 280.7 281.9 13478 USD
2000-09-05 279.7 281.3 278.9 281.0 17485 USD
2000-09-06 277.9 280.0 277.5 279.5 17260 USD
2000-09-07 277.7 278.6 276.3 278.2 24340 USD
2000-09-08 276.8 278.4 275.1 278.0 13640 USD
2000-09-11 276.8 277.3 275.9 276.8 24455 USD
2000-09-12 276.6 279.5 276.4 277.0 11101 USD
2000-09-13 276.5 277.4 275.9 276.5 15630 USD
2000-09-14 276.0 278.1 275.5 276.7 16426 USD
2000-09-15 275.8 276.9 274.3 275.8 17196 USD
2000-09-18 274.9 278.0 274.7 276.0 12053 USD
2000-09-19 275.4 276.3 274.0 275.3 23559 USD
2000-09-20 272.5 275.8 272.3 275.4 13480 USD
2000-09-21 273.5 274.2 271.7 272.7 30053 USD
2000-09-22 275.1 279.2 273.5 273.9 13097 USD
2000-09-25 277.3 277.4 275.2 275.2 19561 USD
2000-09-26 277.3 278.7 276.8 277.4 42430 USD
2000-09-27 281.6 282.5 277.1 277.4 28259 USD
2000-09-28 278.9 282.2 278.2 281.7 22090 USD
2000-09-29 276.9 279.0 275.5 278.8 11620 USD
2000-10-02 276.3 277.6 275.6 277.3 10992 USD
2000-10-03 274.8 276.2 274.5 276.1 15465 USD
2000-10-04 273.2 275.3 272.8 275.0 16472 USD
2000-10-05 273.4 274.7 272.3 273.5 15880 USD
2000-10-06 272.1 273.9 271.6 273.8 8482 USD
2000-10-09 272.9 273.2 272.0 272.0 23979 USD
2000-10-10 275.1 275.8 271.2 273.3 23630 USD
2000-10-11 273.0 277.5 272.8 275.3 38963 USD
2000-10-12 278.8 279.3 272.8 273.4 31284 USD
2000-10-13 274.8 279.9 273.8 278.9 11687 USD
2000-10-16 273.9 274.8 273.4 274.6 17153 USD
str(data)
## tibble [198 x 7] (S3: tbl_df/tbl/data.frame)
##  $ Date    : chr [1:198] "2000-01-04" "2000-01-05" "2000-01-06" "2000-01-07" ...
##  $ Close   : num [1:198] 284 282 282 283 283 ...
##  $ High    : num [1:198] 290 285 283 284 284 ...
##  $ Low     : num [1:198] 280 281 280 282 282 ...
##  $ Open    : num [1:198] 290 284 282 282 282 ...
##  $ Volume  : num [1:198] 21621 25448 19055 11266 30603 ...
##  $ Currency: chr [1:198] "USD" "USD" "USD" "USD" ...
data1 <- data[,c(2,5)]
data.y<-ts(data$Open)
plot(data.y, main = "Time Series Plot Harga Emas (Harian)", xlab = "Periode", ylab="Open")
points(data.y)

Grafik di atas merupakan time series plot dari data Harga Emas Harian selama 10 bulan pada tahun 2000. Berdasarkan grafik tersebut, dapat dilihat bahwa data harga emas harian tersebut cenderung memiliki pola gabungan.

Split Data

Data dibagi menjadi dua bagian dengan proporsi 150 data sebagai data train dan 48 data sebagai data test.

train<-data1[1:150,]
test<-data1[151:198,]

Data Time Series

train.ts<-ts(train)
test.ts<-ts(test)
data.ts<-ts(data)

Model Koyck

Metode Koyck didasarkan asumsi bahwa semakin jauh jarak lag peubah independen dari periode sekarang maka semakin kecil pengaruh peubah lag terhadap peubah dependen. Koyck mengusulkan suatu metode untuk menduga model dinamis distributed lag dengan mengasumsikan bahwa semua koefisien 𝛽 mempunyai tanda sama. Model Koyck merupakan jenis paling umum dari model infinite distributed lag dan juga dikenal sebagai geometric lag.

model.koyck = dLagM::koyckDlm(x = train$Close, y = train$Open)
summary(model.koyck)
## 
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.0635  -1.1049   0.5058   1.5947  10.7744 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -20.47862   10.97364  -1.866    0.064 .  
## Y.1          -0.08397    0.09700  -0.866    0.388    
## X.t           1.15623    0.10440  11.075   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.81 on 146 degrees of freedom
## Multiple R-Squared: 0.8152,  Adjusted R-squared: 0.8126 
## Wald test: 391.1 on 2 and 146 DF,  p-value: < 2.2e-16 
## 
## Diagnostic tests:
## NULL
## 
##                             alpha     beta         phi
## Geometric coefficients:  -18.8922 1.156233 -0.08397257
AIC(model.koyck)
## [1] 826.4231
BIC(model.koyck)
## [1] 838.4389

Hipotesis:

H0: β_i=0

H1: β_i≠0

Berdasarkan output di atas, hanya nilai p-value Xt < 0.05 yang mengindikasikan bahwa penduka parameter ini signifikan. Dari pengujian hipotesis, Xt berhasil menolak H0 sehingga Xt berpengaruh, sedangkan b0 dan Yt-1 tidak berhasil menolak H0. Menurut model koyck, nilai Close dipengaruhi oleh nilai Open namun tidak dipengaruhi oleh peubah lag-nya.

#ramalan
(fore.koyck <- dLagM::forecast(model = model.koyck, x=test$Close, h=48))
## $forecasts
##  [1] 276.6745 277.2587 280.3314 279.2641 279.3537 282.4680 282.5533 281.5056
##  [9] 279.6280 277.8200 274.9656 277.2865 278.3635 278.1575 277.0185 277.9235
## [17] 282.3568 280.5971 279.3574 277.3803 277.3150 276.2799 276.3668 276.1283
## [25] 276.0327 275.4626 275.2792 274.2540 274.9182 271.5094 272.9518 274.6807
## [33] 277.0792 276.8778 281.8665 278.3258 276.3106 275.7861 274.0958 272.3878
## [41] 272.7625 271.2279 272.2817 274.7370 272.1027 279.0301 273.8234 273.2200
## 
## $call
## forecast.koyckDlm(model = model.koyck, x = test$Close, h = 48)
## 
## attr(,"class")
## [1] "forecast.koyckDlm" "dLagM"
#mape data testing
mape.koyck <- MAPE(fore.koyck$forecasts, test$Open)

#akurasi data training
mape_train <- dLagM::GoF(model.koyck)["MAPE"]

c("MAPE_testing" = mape.koyck, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.006201109
## 
## $MAPE_training.MAPE
## [1] 0.007829506

Output di atas menunjukkan bahwa nilai MAPE testing lebih kecil dari nilai MAPE training. Selisih antara MAPE testing dan MAPE training juga tidak jauh berbeda yang mengindikasikan bahwa model ini tidak mengalami underfitted atau overfitted.

Regression with Distributed Lag

Regression with Distributed Lag (lag=2)

model.dlm = dLagM::dlm(x = train$Close,y = train$Open , q = 2)
summary(model.dlm)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7737 -0.3257 -0.0721  0.1853  6.1234 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.44396    2.41897   0.597  0.55149    
## x.t          0.03007    0.02192   1.372  0.17213    
## x.1          1.02370    0.02949  34.708  < 2e-16 ***
## x.2         -0.05834    0.02194  -2.659  0.00873 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8713 on 144 degrees of freedom
## Multiple R-squared:  0.9905, Adjusted R-squared:  0.9903 
## F-statistic:  4984 on 3 and 144 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##        AIC      BIC
## 1 385.1715 400.1575
AIC(model.dlm)
## [1] 385.1715
BIC(model.dlm)
## [1] 400.1575

Hipotesis:

H0: β_i=0

H1: β_i≠0

Berdasarkan output di atas, hanya nilai p-value X.1 dan X.2 yang mengindikasikan bahwa penduga parameter ini signifikan. Dari pengujian hipotesis, X.1 dan X.2 berhasil menolak H0 sehingga X.1 dan X.2 berpengaruh, sedangkan b0 dan X.t tidak berhasil menolak H0. Menurut model distribusi lag, nilai Open tidak dipengaruhi oleh nilai Close pada saat itu, namun dipengaruhi oleh nilai Close pada satu dan dua periode sebelumnya.

#ramalan
(fore.dlm <- forecast(model = model.dlm, x=test$Close, h=48)) 
## $forecasts
##  [1] 278.8064 277.4056 277.8756 280.6010 279.7269 279.8490 282.6220 282.7445
##  [9] 281.7546 280.0156 278.2963 275.7880 277.8155 278.8335 278.6369 277.6401
## [17] 278.5323 282.4478 280.9557 279.7432 277.9645 277.8377 276.9280 276.9745
## [25] 276.7668 276.6610 276.1490 275.9464 275.0518 275.5289 272.5611 273.8021
## [33] 275.4478 277.6066 277.6076 281.9283 278.8533 276.9454 276.4027 274.8541
## [41] 273.3097 273.5686 272.2502 273.2112 275.3535 273.2498 279.1895 274.7292
## 
## $call
## forecast.dlm(model = model.dlm, x = test$Close, h = 48)
## 
## attr(,"class")
## [1] "forecast.dlm" "dLagM"
#mape data testing
mape.dlm <- MAPE(fore.dlm$forecasts, test$Open)

#akurasi data training
mape_train <- GoF(model.dlm)["MAPE"]

c("MAPE_testing" = mape.dlm, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.0007290361
## 
## $MAPE_training.MAPE
## [1] 0.001551427

Output di atas menunjukkan bahwa nilai MAPE testing lebih kecil dari nilai MAPE training. Selisih antara MAPE testing dan MAPE training juga tidak jauh berbeda yang mengindikasikan bahwa model ini tidak mengalami underfitted atau overfitted.

Regression with Distributed Lag Optimum

#penentuan lag optimum 
finiteDLMauto(formula = Open ~ Close,
              data = data.frame(train), q.min = 1, q.max = 5 ,
              model.type = "dlm", error.type = "AIC", trace = TRUE) ##q max lag maksimum
##   q - k    MASE      AIC      BIC   GMRAE   MBRAE R.Adj.Sq Ljung-Box
## 5     5 0.20165 384.0514 407.8653 0.29467 0.07172  0.99017 0.8277074
## 3     3 0.20366 384.4289 402.3715 0.36627 0.08810  0.99025 0.7733520
## 4     4 0.20316 384.8739 405.7592 0.35046 0.08565  0.99017 0.7729545
## 2     2 0.20224 385.1715 400.1575 0.33825 0.09264  0.99026 0.7653745
## 1     1 0.19138 391.8557 403.8715 0.30979 0.15228  0.98986 0.6320060

5 merupakan nilai lag optimum bagi model.

#model dlm dengan lag optimum
model.dlm2 = dLagM::dlm(x = train$Close,y = train$Open , q = 5) 
summary(model.dlm2)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8547 -0.3238 -0.0729  0.1852  6.0628 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.63944    2.57234   0.637    0.525    
## x.t          0.03277    0.02255   1.454    0.148    
## x.1          1.01952    0.03040  33.532   <2e-16 ***
## x.2         -0.03962    0.02995  -1.323    0.188    
## x.3         -0.01766    0.02996  -0.590    0.556    
## x.4         -0.01786    0.03042  -0.587    0.558    
## x.5          0.01764    0.02264   0.779    0.437    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8824 on 138 degrees of freedom
## Multiple R-squared:  0.9906, Adjusted R-squared:  0.9902 
## F-statistic:  2419 on 6 and 138 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##        AIC      BIC
## 1 384.0514 407.8653
AIC(model.dlm2)
## [1] 384.0514
BIC(model.dlm2)
## [1] 407.8653

Hipotesis:

H0: β_i=0

H1: β_i≠0

Berdasarkan output di atas, hanya nilai p-value X.1 < 0.05 yang mengindikasikan bahwa penduga parameter ini signifikan. Dari pengujian hipotesis, X.1 berhasil menolak H0 sehingga X.1 berpengaruh, sedangkan b0, X.t, X.2, X.3, X.4, dan X.5 tidak berhasil menolak H0. Menurut model distribusi lag (optimum), nilai Open hanya dipengaruhi oleh nilai Close pada satu periode sebelumnya.

#ramalan
(fore.dlm2 <- forecast(model = model.dlm2, x=test$Close, h=48))
## $forecasts
##  [1] 278.8844 277.4318 277.8639 280.6224 279.8144 279.8529 282.5849 282.8144
##  [9] 281.7692 279.9624 278.2696 275.7965 277.8153 278.9084 278.7148 277.6247
## [17] 278.5197 282.4676 281.0541 279.7202 277.8899 277.8393 276.9617 277.0053
## [25] 276.7858 276.6880 276.1633 275.9550 275.0689 275.5299 272.5969 273.7875
## [33] 275.4776 277.7001 277.6566 281.9023 278.9026 276.9137 276.3030 274.9044
## [41] 273.3379 273.5645 272.3019 273.2397 275.3733 273.3478 279.1371 274.8116
## 
## $call
## forecast.dlm(model = model.dlm2, x = test$Close, h = 48)
## 
## attr(,"class")
## [1] "forecast.dlm" "dLagM"
#akurasi testing
mape.dlm2 <- MAPE(fore.dlm2$forecasts, test$Open)

#akurasi data training
mape_train <- GoF(model.dlm2)["MAPE"]

c("MAPE_testing" = mape.dlm2, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.0007382915
## 
## $MAPE_training.MAPE
## [1] 0.001573662

Output di atas menunjukkan bahwa nilai MAPE testing lebih kecil dari nilai MAPE training. Selisih antara MAPE testing dan MAPE training juga tidak jauh berbeda yang mengindikasikan bahwa model ini tidak mengalami underfitted atau overfitted.

Model Autoregressive / Dynamic Regression

Apabila peubah dependen dipengaruhi oleh peubah independen pada waktu sekarang, serta dipengaruhii juga oleh peubah dependen itu sendiri pada satu waktu yang lalu maka model tersebut disebut autoregressive (Gujarati, 2004)

MODEL AUTOREGRESSIVE

#library(dLagM)
model.ardl = ardlDlm(x = train$Close, y = train$Open, p = 1 , q = 1) #p:lag x, q:lag y
#model untuk p=1, q=1: yt=b0+b1yt-1+b2xt+b3xt-1
#model untuk p=2, q=3: yt=b0+b1yt-1+b2yt-2+b3xt+b4xt-1+b5xt-2

summary(model.ardl)
## 
## Time series regression with "ts" data:
## Start = 2, End = 150
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7910 -0.3085 -0.0893  0.1764  6.1289 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.31021    2.41780   0.542   0.5887    
## X.t          0.02994    0.02191   1.367   0.1739    
## X.1          1.01833    0.02931  34.745   <2e-16 ***
## Y.1         -0.05233    0.02158  -2.425   0.0166 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8719 on 145 degrees of freedom
## Multiple R-squared:  0.9904, Adjusted R-squared:  0.9902 
## F-statistic:  4980 on 3 and 145 DF,  p-value: < 2.2e-16
AIC(model.ardl)
## [1] 387.9335
BIC(model.ardl)
## [1] 402.9533
#ramalan
(fore.ardl <- forecast(model = model.ardl, x=test$Close, h=48))
## $forecasts
##  [1] 278.7925 277.4129 277.8714 280.5760 279.7216 279.8471 282.5990 282.7336
##  [9] 281.7591 280.0281 278.3097 275.8059 277.8028 278.8155 278.6307 277.6430
## [17] 278.5243 282.4137 280.9522 279.7528 277.9766 277.8390 276.9297 276.9713
## [25] 276.7624 276.6566 276.1470 275.9430 275.0522 275.5211 272.5734 273.7939
## [33] 275.4252 277.5801 277.5961 281.8932 278.8590 276.9632 276.4065 274.8602
## [41] 273.3178 273.5633 272.2506 273.1998 275.3275 273.2514 279.1465 274.7378
## 
## $call
## forecast.ardlDlm(model = model.ardl, x = test$Close, h = 48)
## 
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"
#akurasi testing
mape.ardl <- MAPE(fore.ardl$forecasts, test$Open) #data testing

#akurasi data training
mape_train <- GoF(model.ardl)["MAPE"]

c("MAPE_testing" = mape.ardl, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.0007137695
## 
## $MAPE_training.MAPE
## [1] 0.001525473

Output di atas menunjukkan bahwa nilai MAPE testing lebih kecil dari nilai MAPE training. Selisih antara MAPE testing dan MAPE training juga tidak jauh berbeda yang mengindikasikan bahwa model ini tidak mengalami underfitted atau overfitted.

#penentuan lag optimum
ardlBoundOrders(data = data.frame(train) , formula = Open ~ Close ) #yang digunakan harusnya data train, tetapi karena keterbatasan data jika menggunakan data train menyebabkan error sehingga dicontohkan menggunakan keseluruhan data
## $p
##   Close
## 1    14
## 
## $q
## [1] 4
## 
## $Stat.table
##           q = 1    q = 2    q = 3    q = 4    q = 5    q = 6    q = 7    q = 8
## p = 1  388.3069 388.2546 388.6754 388.2191 387.4607 387.7174 387.7701 387.0566
## p = 2  388.3073 390.2056 390.6190 390.1672 389.3971 389.6751 389.7392 388.9914
## p = 3  389.7309 389.7309 391.7030 391.3806 390.5694 390.8701 390.9067 390.1089
## p = 4  383.4454 384.6310 384.6310 386.1779 385.8579 386.1069 386.0749 385.5465
## p = 5  387.1548 388.3276 383.7000 383.7000 385.6973 386.0385 385.8957 385.1800
## p = 6  388.9442 390.2352 385.8584 385.0006 385.0006 386.6522 386.5192 385.8296
## p = 7  390.0265 390.9053 386.1040 385.1772 385.8541 385.8541 387.5211 386.6316
## p = 8  388.3050 389.4032 385.5454 384.9660 385.7663 386.9417 386.9417 388.2578
## p = 9  387.7174 388.8970 384.5953 383.7989 384.6838 386.0638 387.4533 387.4533
## p = 10 388.3087 389.2967 385.0032 384.4778 385.3830 386.7566 388.3797 389.5831
## p = 11 388.0492 388.9358 384.8746 384.2331 385.1609 386.5669 388.1477 389.2701
## p = 12 388.1037 389.1995 385.0346 384.2143 385.2084 386.4348 388.1271 389.1036
## p = 13 387.6927 388.7839 384.6458 384.0472 385.2003 386.5174 388.2312 389.1129
## p = 14 386.6808 387.4928 383.5575 383.0334 384.2071 385.7101 387.5685 388.8390
## p = 15 388.2170 389.2788 385.3083 384.7243 385.9282 387.2564 388.9155 390.0624
##           q = 9   q = 10   q = 11   q = 12   q = 13   q = 14   q = 15
## p = 1  386.0751 386.1026 386.4390 386.0212 386.4144 386.3861 386.5806
## p = 2  387.9696 388.0249 388.3410 387.9254 388.3229 388.2859 388.4520
## p = 3  389.1851 389.1781 389.5092 389.1907 389.5948 389.5900 389.7851
## p = 4  384.9331 384.9331 385.1743 385.0156 385.4956 385.2440 385.5489
## p = 5  384.8176 384.7260 384.8365 384.5285 385.0361 384.8438 385.1392
## p = 6  385.3865 385.3863 385.5406 385.3995 385.8960 385.8387 386.1978
## p = 7  386.2992 386.1990 386.2074 386.0552 386.5798 386.5962 386.9149
## p = 8  387.9187 387.8333 387.7864 387.7606 388.2807 388.3071 388.5874
## p = 9  389.4457 389.3049 389.1455 389.2024 389.7368 389.8024 390.0921
## p = 10 389.5831 391.3011 391.1449 391.1998 391.7340 391.8010 392.0920
## p = 11 391.2694 391.2694 392.6837 392.6275 393.1568 393.3249 393.5692
## p = 12 391.1032 392.7612 392.7612 394.5633 395.0868 395.2692 395.4878
## p = 13 391.1127 392.7964 394.7913 394.7913 396.6854 396.7787 397.0601
## p = 14 390.8260 392.4214 394.4001 395.8267 395.8267 397.7595 398.1621
## p = 15 392.0601 393.6994 395.6774 397.3207 398.2566 398.2566 400.0559
## 
## $min.Stat
## [1] 383.0334

PEMODELAN DLM dan ARDL dengan library dynlm

#sama dengan model dlm p=1
cons_lm1 <- dynlm(Open ~ Close+L(Close),data = train.ts)

#sama dengan model ardl p=0 q=1
cons_lm2 <- dynlm(Open ~ Close+L(Open),data = train.ts)

#sama dengan ardl p=1 q=1
cons_lm3 <- dynlm(Open ~ Close+L(Close)+L(Open),data = train.ts)

#sama dengan dlm p=2
cons_lm4 <- dynlm(Open ~ Close+L(Close)+L(Close,2),data = train.ts)

#Ringkasan Model
summary(cons_lm1)
## 
## Time series regression with "ts" data:
## Start = 2, End = 150
## 
## Call:
## dynlm(formula = Open ~ Close + L(Close), data = train.ts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7577 -0.2787 -0.0828  0.1172  6.1023 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.23678    2.41632   0.098    0.922    
## Close        0.02842    0.02226   1.277    0.204    
## L(Close)     0.97122    0.02231  43.534   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8863 on 146 degrees of freedom
## Multiple R-squared:   0.99,  Adjusted R-squared:  0.9899 
## F-statistic:  7225 on 2 and 146 DF,  p-value: < 2.2e-16
summary(cons_lm2)
## 
## Time series regression with "ts" data:
## Start = 2, End = 150
## 
## Call:
## dynlm(formula = Open ~ Close + L(Open), data = train.ts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.3187  -1.2429  -0.1319   1.1273  19.9969 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.17515    7.35632   0.432    0.667    
## Close        0.54441    0.04914  11.078  < 2e-16 ***
## L(Open)      0.44471    0.04918   9.042 8.69e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.653 on 146 degrees of freedom
## Multiple R-squared:  0.9104, Adjusted R-squared:  0.9091 
## F-statistic: 741.3 on 2 and 146 DF,  p-value: < 2.2e-16
summary(cons_lm3)
## 
## Time series regression with "ts" data:
## Start = 2, End = 150
## 
## Call:
## dynlm(formula = Open ~ Close + L(Close) + L(Open), data = train.ts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7910 -0.3085 -0.0893  0.1764  6.1289 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.31021    2.41780   0.542   0.5887    
## Close        0.02994    0.02191   1.367   0.1739    
## L(Close)     1.01833    0.02931  34.745   <2e-16 ***
## L(Open)     -0.05233    0.02158  -2.425   0.0166 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8719 on 145 degrees of freedom
## Multiple R-squared:  0.9904, Adjusted R-squared:  0.9902 
## F-statistic:  4980 on 3 and 145 DF,  p-value: < 2.2e-16
summary(cons_lm4)
## 
## Time series regression with "ts" data:
## Start = 3, End = 150
## 
## Call:
## dynlm(formula = Open ~ Close + L(Close) + L(Close, 2), data = train.ts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7737 -0.3257 -0.0721  0.1853  6.1234 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.44396    2.41897   0.597  0.55149    
## Close        0.03007    0.02192   1.372  0.17213    
## L(Close)     1.02370    0.02949  34.708  < 2e-16 ***
## L(Close, 2) -0.05834    0.02194  -2.659  0.00873 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8713 on 144 degrees of freedom
## Multiple R-squared:  0.9905, Adjusted R-squared:  0.9903 
## F-statistic:  4984 on 3 and 144 DF,  p-value: < 2.2e-16
#SSE
deviance(cons_lm1)
## [1] 114.6956
deviance(cons_lm2)
## [1] 1027.93
deviance(cons_lm3)
## [1] 110.2263
deviance(cons_lm4)
## [1] 109.3198

Uji Diagnostik Model

Uji Non Autokorelasi

#uji model
if(require("lmtest")) encomptest(cons_lm1, cons_lm2)
## Encompassing test
## 
## Model 1: Open ~ Close + L(Close)
## Model 2: Open ~ Close + L(Open)
## Model E: Open ~ Close + L(Close) + L(Open)
##           Res.Df Df         F  Pr(>F)    
## M1 vs. ME    145 -1    5.8792 0.01655 *  
## M2 vs. ME    145 -1 1207.2166 < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Diagnostik
#durbin watson
dwtest(cons_lm1)
## 
##  Durbin-Watson test
## 
## data:  cons_lm1
## DW = 1.9216, p-value = 0.2886
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm2)
## 
##  Durbin-Watson test
## 
## data:  cons_lm2
## DW = 2.9565, p-value = 1
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm3)
## 
##  Durbin-Watson test
## 
## data:  cons_lm3
## DW = 1.8427, p-value = 0.1501
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm4)
## 
##  Durbin-Watson test
## 
## data:  cons_lm4
## DW = 1.948, p-value = 0.347
## alternative hypothesis: true autocorrelation is greater than 0

Uji Heterogenitas

#heterogenitas
bptest(cons_lm1)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm1
## BP = 0.48254, df = 2, p-value = 0.7856
bptest(cons_lm2)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm2
## BP = 32.672, df = 2, p-value = 8.043e-08
bptest(cons_lm3)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm3
## BP = 4.599, df = 3, p-value = 0.2036
bptest(cons_lm4)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm4
## BP = 5.3111, df = 3, p-value = 0.1504

Uji Normalitas

#shapiro wilk
shapiro.test(residuals(cons_lm1))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm1)
## W = 0.60252, p-value < 2.2e-16
shapiro.test(residuals(cons_lm2))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm2)
## W = 0.76871, p-value = 4.709e-14
shapiro.test(residuals(cons_lm3))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm3)
## W = 0.6518, p-value < 2.2e-16
shapiro.test(residuals(cons_lm4))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm4)
## W = 0.66408, p-value < 2.2e-16

Perbandingan Keakuratan Ramalan

PERBANDINGAN

akurasi <- matrix(c(mape.koyck, mape.dlm, mape.dlm2, mape.ardl))
row.names(akurasi)<- c("Koyck","DLM 1","DLM 2","Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi
##                        MAPE
## Koyck          0.0062011089
## DLM 1          0.0007290361
## DLM 2          0.0007382915
## Autoregressive 0.0007137695

PLOT

par(mfrow=c(1,1))
plot(test$Close, test$Open, type="b", col="black", ylim=c(270.7,318.3))
points(test$Close, fore.koyck$forecasts,col="red")
lines(test$Close, fore.koyck$forecasts,col="red")
points(test$Close, fore.dlm$forecasts,col="blue")
lines(test$Close, fore.dlm$forecasts,col="blue")
points(test$Close, fore.dlm2$forecasts,col="orange")
lines(test$Close, fore.dlm2$forecasts,col="orange")
points(test$Close, fore.ardl$forecasts,col="green")
lines(test$Close, fore.ardl$forecasts,col="green")
legend("topleft",c("aktual", "koyck","DLM 1","DLM 2", "autoregressive"), lty=1, col=c("black","red","blue","orange","green"), cex=0.8)