Model-Kyock.knit

#PACKAGES
library(dLagM)

## Loading required package: nardl

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Loading required package: dynlm

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(dynlm)
library(MLmetrics)

## 
## Attaching package: 'MLmetrics'

## The following object is masked from 'package:dLagM':
## 
##     MAPE

## The following object is masked from 'package:base':
## 
##     Recall

library(lmtest)
library(car)

## Loading required package: carData

library(readxl)

Dataset yang digunakan pada analisis ini merupakan data iklim yang terjadi di New Delhi pada rentang periode 1 tahun berdasarkan data harian sebanyak 365 hari di tahun 2013. Data dibagi menjadi 2 bagian yaitu data training dan testing dengan perbandingan 80:20. Data training akan digunakan untuk menentukan model yang sesuai berdasarkan keseluruhan dataset dengan jumlah 292 data. Sedangkan data testing akan digunakan untuk menguji dan mengetahui performa model yang telah didapatkan pada tahap sebelumnya dan berjumlah 73 data.

#IMPORT DATA
data<-read_excel("~/Downloads/buat bibit.xlsx", sheet = 2)
str(data)

## tibble [365 × 2] (S3: tbl_df/tbl/data.frame)
##  $ xt: num [1:365] 10 7.4 7.17 8.67 6 ...
##  $ yt: num [1:365] 1016 1018 1019 1017 1016 ...

data

## # A tibble: 365 × 2
##       xt    yt
##    <dbl> <dbl>
##  1 10    1016.
##  2  7.4  1018.
##  3  7.17 1019.
##  4  8.67 1017.
##  5  6    1016.
##  6  7    1018 
##  7  7    1020 
##  8  8.86 1019.
##  9 14    1017 
## 10 11    1016.
## # … with 355 more rows

#SPLIT DATA
train<-data[1:292,]
test<-data[293:365,]

#data time series
train.ts<-ts(train)
test.ts<-ts(test)
data.ts<-ts(data)

Model Koyck adalah suatu metode untuk menduga model dinamis ter-distribusi lag dengan mengasumsikan bahwa semua koefisien 𝛽 mempunyai tanda sama. Model Koyck merupakan jenis paling umum dari model infinite distributed lag dan juga dikenal sebagai geometric lag. Adapun AIC (Akaike’s Information Criteria) dan BIC (Bayesian Information Criteria) digunakan untuk mengukur kebaikan model dimana semakin kecil nilai AIC/BIC yang diperoleh, semakin bagus suatu model.

#MODEL KOYCK
model.koyck = koyckDlm(x = train$xt, y = train$yt)
summary(model.koyck)

## 
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7.50692 -1.10870 -0.03138  0.96298  6.68910 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 125.30041   31.39876   3.991 8.37e-05 ***
## Y.1           0.87833    0.03043  28.865  < 2e-16 ***
## X.t          -0.11084    0.03321  -3.338 0.000955 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.703 on 288 degrees of freedom
## Multiple R-Squared: 0.9486,  Adjusted R-squared: 0.9483 
## Wald test:  2650 on 2 and 288 DF,  p-value: < 2.2e-16 
## 
## Diagnostic tests:
## NULL
## 
##                             alpha       beta       phi
## Geometric coefficients:  1029.812 -0.1108421 0.8783269

AIC(model.koyck)

## [1] 1140.781

BIC(model.koyck)

## [1] 1155.474

fore.koyck <- forecast(model = model.koyck, x=test$xt, h=73)
mape.koyck <- MAPE(fore.koyck$forecasts, test$yt)
#akurasi data training
GoF(model.koyck)

##               n      MAE           MPE        MAPE       sMAPE      MASE
## model.koyck 291 1.277827 -2.897208e-06 0.001270682 0.001270747 0.9570558
##                  MSE     MRAE   GMRAE
## model.koyck 2.871517 50443496 1.15466

Distribution lag model adalah model regresi yang tidak hanya mencakup nilai sekarang tetapi juga nilai masa lalu (lag) dari variabel penjelas (X) sedangkan autoregressive distributed lag adalag model yang mencakup saru atau lebih nilai masa lalu (lag) dari variabel terkait antara variabel penjelasnya. Dalam kasus dataset berikut digunakan lag sebesar 2 agar data menjadi stasioner. Kemudian dicari nilai MAPE (Mean Absolute Percentage Error) untuk mengetahui ukuran akurasi prediksi metode peramalan dalam statistik untuk diperbandingkan dengan nilai error lainnya.

#REGRESSION WITH DISTRIBUTED LAG
model.dlm = dlm(x = train$xt,y = train$yt , q = 2)
summary(model.dlm)

## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5547 -2.4705  0.3057  2.3251  8.0997 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) 1030.7719     0.7993 1289.599  < 2e-16 ***
## x.t           -0.4599     0.1130   -4.070  6.1e-05 ***
## x.1           -0.2068     0.1331   -1.554   0.1212    
## x.2           -0.2775     0.1115   -2.488   0.0134 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.468 on 286 degrees of freedom
## Multiple R-squared:  0.7865, Adjusted R-squared:  0.7843 
## F-statistic: 351.3 on 3 and 286 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##        AIC      BIC
## 1 1550.219 1568.568

AIC(model.dlm)

## [1] 1550.219

BIC(model.dlm)

## [1] 1568.568

fore.dlm <- forecast(model = model.dlm, x=test$xt, h=73)
mape.dlm <- MAPE(fore.dlm$forecasts, test$yt)
#akurasi data training
GoF(model.dlm)

##             n      MAE           MPE        MAPE       sMAPE    MASE     MSE
## model.dlm 290 2.786971 -1.176615e-05 0.002772449 0.002771991 2.08483 11.8606
##                MRAE    GMRAE
## model.dlm 330379323 2.627131

#penentuan lag optimum 
finiteDLMauto(formula = yt ~ xt,
              data = data.frame(train), q.min = 1, q.max = 4,
              model.type = "dlm", error.type = "AIC", trace = FALSE)

##   q - k    MASE     AIC      BIC   GMRAE    MBRAE R.Adj.Sq Ljung-Box
## 4     4 2.01832 1532.07 1557.711 2.40038 -0.04749  0.78878         0

#model dlm dengan lag optimum
model.dlm2 = dlm(x = train$xt,y = train$yt , q = 4)
summary(model.dlm2)

## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4351 -2.4987  0.2324  2.2651  8.2445 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) 1031.22896    0.80911 1274.518  < 2e-16 ***
## x.t           -0.42310    0.11247   -3.762 0.000205 ***
## x.1           -0.16776    0.13374   -1.254 0.210744    
## x.2           -0.05541    0.13542   -0.409 0.682722    
## x.3           -0.11834    0.13339   -0.887 0.375746    
## x.4           -0.19691    0.11085   -1.776 0.076761 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.411 on 282 degrees of freedom
## Multiple R-squared:  0.7925, Adjusted R-squared:  0.7888 
## F-statistic: 215.3 on 5 and 282 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##       AIC      BIC
## 1 1532.07 1557.711

AIC(model.dlm2)

## [1] 1532.07

BIC(model.dlm2)

## [1] 1557.711

fore.dlm2 <- forecast(model = model.dlm2, x=test$xt, h=73)
mape.dlm2<- MAPE(fore.dlm2$forecasts, test$yt)
#akurasi data training
GoF(model.dlm2)

##              n      MAE           MPE        MAPE       sMAPE     MASE      MSE
## model.dlm2 288 2.701623 -1.130744e-05 0.002687917 0.002687471 2.018318 11.39528
##                 MRAE    GMRAE
## model.dlm2 329229591 2.400377

Model Autoregressive atau Dynamic Regression merupakan model yang apabila peubah dependen dipengaruhi oleh peubah independen pada waktu sekarang, serta dipengaruhi juga oleh peubah dependen itu sendiri pada satu waktu yang lalu. Adapun dari hasil yang diperoleh akan dihasilkan beberapa nilai MAPE dari berbagai model. Hasil perbandingan menunjukan bahwa nilai Autoregressive memiiliki nilai MAPE terkecil sehingga disimpukkan bahwa model Autoregressive merupakan model terbaik.

#MODEL AUTOREGRESSIVE 
#library(dLagM)
model.ardl = ardlDlm(x = train$xt, y = train$yt, p = 1 , q = 1)
summary(model.ardl)

## 
## Time series regression with "ts" data:
## Start = 2, End = 292
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.4686 -1.0666 -0.0284  0.9608  6.3768 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 138.06818   28.83132   4.789 2.69e-06 ***
## X.t          -0.25353    0.05319  -4.767 2.98e-06 ***
## X.1           0.12515    0.05483   2.282   0.0232 *  
## Y.1           0.86610    0.02798  30.957  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.685 on 287 degrees of freedom
## Multiple R-squared:  0.9499, Adjusted R-squared:  0.9493 
## F-statistic:  1812 on 3 and 287 DF,  p-value: < 2.2e-16

AIC(model.ardl)

## [1] 1135.573

BIC(model.ardl)

## [1] 1153.94

fore.ardl <- forecast(model = model.ardl, x=test$xt, h=73)
mape.ardl <- MAPE(fore.ardl$forecasts, test$yt)
#akurasi data training
GoF(model.ardl)

##              n      MAE           MPE        MAPE       sMAPE     MASE      MSE
## model.ardl 291 1.268052 -2.775011e-06 0.001260977 0.001260997 0.949735 2.801268
##                MRAE    GMRAE
## model.ardl 49069328 1.129525

#penentuan lag optimum
ardlBoundOrders(data = data.frame(data) , formula = yt ~ xt )

## $p
##   xt
## 1 15
## 
## $q
## [1] 2
## 
## $Stat.table
##           q = 1    q = 2    q = 3    q = 4    q = 5    q = 6    q = 7    q = 8
## p = 1  1386.991 1367.127 1362.649 1359.837 1358.771 1355.141 1352.691 1351.937
## p = 2  1385.544 1369.022 1364.534 1361.661 1360.589 1356.838 1354.411 1353.663
## p = 3  1367.243 1367.243 1365.820 1362.870 1361.660 1357.896 1355.203 1354.453
## p = 4  1364.986 1363.768 1363.768 1364.859 1363.644 1359.888 1357.202 1356.451
## p = 5  1369.156 1362.350 1363.997 1363.997 1365.642 1361.888 1359.186 1358.435
## p = 6  1370.801 1360.938 1361.706 1363.591 1363.591 1363.561 1360.895 1360.165
## p = 7  1364.159 1355.104 1355.934 1357.838 1359.348 1359.348 1358.420 1357.703
## p = 8  1358.428 1350.638 1351.770 1353.672 1355.452 1357.452 1357.452 1359.452
## p = 9  1360.048 1350.567 1351.567 1353.344 1355.342 1356.333 1356.779 1356.779
## p = 10 1355.248 1344.047 1345.291 1346.858 1348.850 1349.452 1350.064 1351.665
## p = 11 1349.400 1339.228 1340.564 1342.233 1344.191 1345.302 1346.327 1347.618
## p = 12 1340.831 1332.785 1334.164 1335.862 1337.860 1339.406 1340.567 1342.017
## p = 13 1345.159 1335.501 1336.447 1337.993 1339.918 1341.161 1341.725 1343.519
## p = 14 1346.231 1335.495 1336.205 1337.753 1339.665 1340.741 1341.008 1342.816
## p = 15 1342.708 1331.747 1332.892 1334.347 1336.083 1337.069 1336.833 1338.556
##           q = 9   q = 10   q = 11   q = 12   q = 13   q = 14   q = 15
## p = 1  1348.415 1341.911 1339.869 1336.795 1335.974 1335.191 1332.640
## p = 2  1350.315 1343.771 1341.658 1338.758 1337.937 1337.155 1334.572
## p = 3  1351.271 1344.800 1342.934 1339.976 1339.065 1338.274 1335.777
## p = 4  1353.268 1346.799 1344.924 1341.878 1340.971 1340.140 1337.646
## p = 5  1355.213 1348.663 1346.817 1343.847 1342.923 1342.089 1339.511
## p = 6  1356.838 1350.473 1348.573 1345.485 1344.580 1343.675 1341.162
## p = 7  1354.078 1347.647 1345.337 1342.896 1341.888 1341.121 1338.261
## p = 8  1355.872 1349.622 1347.284 1344.833 1343.844 1343.070 1340.224
## p = 9  1357.837 1351.560 1349.256 1346.773 1345.797 1345.013 1342.197
## p = 10 1351.665 1350.680 1348.463 1345.529 1344.616 1343.744 1341.194
## p = 11 1349.237 1349.237 1350.268 1347.277 1346.293 1345.430 1342.751
## p = 12 1344.012 1345.760 1345.760 1347.490 1346.559 1345.815 1343.048
## p = 13 1345.281 1345.315 1345.883 1345.883 1347.818 1347.087 1344.578
## p = 14 1344.468 1344.218 1345.106 1346.650 1346.650 1348.644 1346.196
## p = 15 1339.945 1339.584 1340.330 1341.935 1343.742 1343.742 1345.175
## 
## $min.Stat
## [1] 1331.747

#PEMODELAN DLM dan ARDL dengan library dynlm
#sama dengan model dlm q=1
cons_lm1 <- dynlm(yt ~ xt+L(xt),data = train.ts)
#sama dengan model ardl p=1 q=0
cons_lm2 <- dynlm(yt ~ xt+L(yt),data = train.ts)
#sama dengan ardl p=1 q=1
cons_lm3 <- dynlm(yt ~ xt+L(xt)+L(yt),data = train.ts)
#sama dengan dlm p=2
cons_lm4 <- dynlm(yt ~ xt+L(xt)+L(xt,2),data = train.ts)

#Ringkasan Model
summary(cons_lm1)

## 
## Time series regression with "ts" data:
## Start = 2, End = 292
## 
## Call:
## dynlm(formula = yt ~ xt + L(xt), data = train.ts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.6006  -2.4309   0.3092   2.4423   8.4211 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) 1030.5164     0.7966 1293.724  < 2e-16 ***
## xt            -0.5217     0.1091   -4.781 2.79e-06 ***
## L(xt)         -0.4128     0.1081   -3.817 0.000165 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.505 on 288 degrees of freedom
## Multiple R-squared:  0.7825, Adjusted R-squared:  0.7809 
## F-statistic: 517.9 on 2 and 288 DF,  p-value: < 2.2e-16

summary(cons_lm2)

## 
## Time series regression with "ts" data:
## Start = 2, End = 292
## 
## Call:
## dynlm(formula = yt ~ xt + L(yt), data = train.ts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.4315 -1.0833 -0.0186  0.9380  6.4200 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 158.99503   27.53366   5.775 2.00e-08 ***
## xt           -0.15025    0.02816  -5.337 1.92e-07 ***
## L(yt)         0.84586    0.02673  31.646  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.698 on 288 degrees of freedom
## Multiple R-squared:  0.949,  Adjusted R-squared:  0.9486 
## F-statistic:  2677 on 2 and 288 DF,  p-value: < 2.2e-16

summary(cons_lm3)

## 
## Time series regression with "ts" data:
## Start = 2, End = 292
## 
## Call:
## dynlm(formula = yt ~ xt + L(xt) + L(yt), data = train.ts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.4686 -1.0666 -0.0284  0.9608  6.3768 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 138.06818   28.83132   4.789 2.69e-06 ***
## xt           -0.25353    0.05319  -4.767 2.98e-06 ***
## L(xt)         0.12515    0.05483   2.282   0.0232 *  
## L(yt)         0.86610    0.02798  30.957  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.685 on 287 degrees of freedom
## Multiple R-squared:  0.9499, Adjusted R-squared:  0.9493 
## F-statistic:  1812 on 3 and 287 DF,  p-value: < 2.2e-16

summary(cons_lm4)

## 
## Time series regression with "ts" data:
## Start = 3, End = 292
## 
## Call:
## dynlm(formula = yt ~ xt + L(xt) + L(xt, 2), data = train.ts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5547 -2.4705  0.3057  2.3251  8.0997 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) 1030.7719     0.7993 1289.599  < 2e-16 ***
## xt            -0.4599     0.1130   -4.070  6.1e-05 ***
## L(xt)         -0.2068     0.1331   -1.554   0.1212    
## L(xt, 2)      -0.2775     0.1115   -2.488   0.0134 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.468 on 286 degrees of freedom
## Multiple R-squared:  0.7865, Adjusted R-squared:  0.7843 
## F-statistic: 351.3 on 3 and 286 DF,  p-value: < 2.2e-16

#SSE
deviance(cons_lm1)

## [1] 3537.114

deviance(cons_lm2)

## [1] 829.9657

deviance(cons_lm3)

## [1] 815.169

deviance(cons_lm4)

## [1] 3439.574

#uji model
if(require("lmtest")) encomptest(cons_lm1, cons_lm2)

## Encompassing test
## 
## Model 1: yt ~ xt + L(xt)
## Model 2: yt ~ xt + L(yt)
## Model E: yt ~ xt + L(xt) + L(yt)
##           Res.Df Df        F Pr(>F)    
## M1 vs. ME    287 -1 958.3266 <2e-16 ***
## M2 vs. ME    287 -1   5.2095 0.0232 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Diagnostik
#durbin watson
dwtest(cons_lm1)

## 
##  Durbin-Watson test
## 
## data:  cons_lm1
## DW = 0.28827, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

dwtest(cons_lm2)

## 
##  Durbin-Watson test
## 
## data:  cons_lm2
## DW = 1.8614, p-value = 0.09871
## alternative hypothesis: true autocorrelation is greater than 0

dwtest(cons_lm3)

## 
##  Durbin-Watson test
## 
## data:  cons_lm3
## DW = 1.9066, p-value = 0.1862
## alternative hypothesis: true autocorrelation is greater than 0

dwtest(cons_lm4)

## 
##  Durbin-Watson test
## 
## data:  cons_lm4
## DW = 0.28712, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

#heterogenitas
bptest(cons_lm1)

## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm1
## BP = 0.32262, df = 2, p-value = 0.851

bptest(cons_lm2)

## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm2
## BP = 4.5563, df = 2, p-value = 0.1025

bptest(cons_lm3)

## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm3
## BP = 7.3445, df = 3, p-value = 0.06169

bptest(cons_lm4)

## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm4
## BP = 1.3406, df = 3, p-value = 0.7195

#shapiro wilk
shapiro.test(residuals(cons_lm1))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm1)
## W = 0.99324, p-value = 0.2138

shapiro.test(residuals(cons_lm2))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm2)
## W = 0.97851, p-value = 0.0002307

shapiro.test(residuals(cons_lm3))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm3)
## W = 0.97829, p-value = 0.0002106

shapiro.test(residuals(cons_lm4))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm4)
## W = 0.99147, p-value = 0.09164

#PERBANDINGAN
akurasi <- matrix(c(mape.koyck, mape.dlm, mape.dlm2, mape.ardl))
row.names(akurasi)<- c("Koyck","DLM 1","DLM 2","Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi

##                       MAPE
## Koyck          0.002672700
## DLM 1          0.002387160
## DLM 2          0.002432927
## Autoregressive 0.002348180

Berikut merupakan plot dataset beserta Regresi Peubah Lag-nya

#PLOT
par(mfrow=c(1,1))
plot(test$xt, test$yt, type="b", col="black")
points(test$xt, fore.koyck$forecasts,col="red")
lines(test$xt, fore.koyck$forecasts,col="red")
points(test$xt, fore.dlm$forecasts,col="blue")
lines(test$xt, fore.dlm$forecasts,col="blue")
points(test$xt, fore.dlm2$forecasts,col="orange")
lines(test$xt, fore.dlm2$forecasts,col="orange")
points(test$xt, fore.ardl$forecasts,col="green")
lines(test$xt, fore.ardl$forecasts,col="green")
legend("topleft",c("aktual", "koyck","DLM 1","DLM 2", "autoregressive"), lty=1, col=c("black","red","blue","orange","green"), cex=0.8)