library(dLagM)
## Warning: package 'dLagM' was built under R version 4.4.3
## Loading required package: nardl
## Warning: package 'nardl' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: dynlm
## Warning: package 'dynlm' was built under R version 4.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(dynlm)
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.4.3
##
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:dLagM':
##
## MAPE
## The following object is masked from 'package:base':
##
## Recall
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.2
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
#library read file csv
library(readr)
## Warning: package 'readr' was built under R version 4.4.2
data <- read_csv("C:/Users/LENOVO/Downloads/NewDelhi_Air_quality.csv")
## New names:
## Rows: 72 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): datetime dbl (9): ...1, AQI, CO, no2, o3, pm10, pm25, so2, ts dttm (2):
## timestamp_local, timestamp_utc
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
data
## # A tibble: 72 × 12
## ...1 AQI CO datetime no2 o3 pm10 pm25 so2 timestamp_local
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dttm>
## 1 0 30.2 199. 2022-10… 0.0469 55.8 10.5 5.64 0.387 2022-10-21 23:00:00
## 2 1 28.2 198. 2022-10… 0.0465 54.9 10.7 4.62 0.410 2022-10-22 00:00:00
## 3 2 26.6 199. 2022-10… 0.0469 54.6 11.2 3.52 0.402 2022-10-22 01:00:00
## 4 3 25 202. 2022-10… 0.0482 55.1 11.1 2.23 0.376 2022-10-22 02:00:00
## 5 4 26 205. 2022-10… 0.0489 55.8 10.4 1.98 0.339 2022-10-22 03:00:00
## 6 5 26 209. 2022-10… 0.0502 56.5 9.40 1.79 0.313 2022-10-22 04:00:00
## 7 6 26 209. 2022-10… 0.0502 55.8 8.48 1.75 0.313 2022-10-22 05:00:00
## 8 7 24 205. 2022-10… 0.0502 52.9 7.59 1.86 0.343 2022-10-22 06:00:00
## 9 8 23 200. 2022-10… 0.0469 50.1 7.64 2.21 0.395 2022-10-22 07:00:00
## 10 9 24 197. 2022-10… 0.0442 51.5 8.49 2.48 0.428 2022-10-22 08:00:00
## # ℹ 62 more rows
## # ℹ 2 more variables: timestamp_utc <dttm>, ts <dbl>
#eksplorasi data
str(data)
## spc_tbl_ [72 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ...1 : num [1:72] 0 1 2 3 4 5 6 7 8 9 ...
## $ AQI : num [1:72] 30.2 28.2 26.6 25 26 26 26 24 23 24 ...
## $ CO : num [1:72] 199 198 199 202 205 ...
## $ datetime : chr [1:72] "2022-10-21:18" "2022-10-21:19" "2022-10-21:20" "2022-10-21:21" ...
## $ no2 : num [1:72] 0.0469 0.0465 0.0469 0.0482 0.0489 ...
## $ o3 : num [1:72] 55.8 54.9 54.6 55.1 55.8 ...
## $ pm10 : num [1:72] 10.5 10.7 11.2 11.1 10.4 ...
## $ pm25 : num [1:72] 5.64 4.62 3.52 2.23 1.98 ...
## $ so2 : num [1:72] 0.387 0.41 0.402 0.376 0.339 ...
## $ timestamp_local: POSIXct[1:72], format: "2022-10-21 23:00:00" "2022-10-22 00:00:00" ...
## $ timestamp_utc : POSIXct[1:72], format: "2022-10-21 18:00:00" "2022-10-21 19:00:00" ...
## $ ts : num [1:72] 1.67e+09 1.67e+09 1.67e+09 1.67e+09 1.67e+09 ...
## - attr(*, "spec")=
## .. cols(
## .. ...1 = col_double(),
## .. AQI = col_double(),
## .. CO = col_double(),
## .. datetime = col_character(),
## .. no2 = col_double(),
## .. o3 = col_double(),
## .. pm10 = col_double(),
## .. pm25 = col_double(),
## .. so2 = col_double(),
## .. timestamp_local = col_datetime(format = ""),
## .. timestamp_utc = col_datetime(format = ""),
## .. ts = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(data)
## ...1 AQI CO datetime
## Min. : 0.00 Min. :19.00 Min. :191.9 Length:72
## 1st Qu.:17.75 1st Qu.:25.00 1st Qu.:197.4 Class :character
## Median :35.50 Median :27.00 Median :200.3 Mode :character
## Mean :35.50 Mean :26.18 Mean :200.1
## 3rd Qu.:53.25 3rd Qu.:28.00 3rd Qu.:203.6
## Max. :71.00 Max. :32.00 Max. :208.6
## no2 o3 pm10 pm25
## Min. :0.01556 Min. :41.48 Min. : 6.687 Min. :1.633
## 1st Qu.:0.03799 1st Qu.:53.64 1st Qu.: 7.865 1st Qu.:1.829
## Median :0.04552 Median :57.22 Median : 8.914 Median :2.202
## Mean :0.04200 Mean :56.57 Mean : 9.115 Mean :2.295
## 3rd Qu.:0.05020 3rd Qu.:60.08 3rd Qu.:10.275 3rd Qu.:2.522
## Max. :0.06091 Max. :68.66 Max. :12.846 Max. :5.637
## so2 timestamp_local timestamp_utc
## Min. :0.2831 Min. :2022-10-21 23:00:00 Min. :2022-10-21 18:00:00
## 1st Qu.:0.3204 1st Qu.:2022-10-22 16:45:00 1st Qu.:2022-10-22 11:45:00
## Median :0.3725 Median :2022-10-23 10:30:00 Median :2022-10-23 05:30:00
## Mean :0.3634 Mean :2022-10-23 10:30:00 Mean :2022-10-23 05:30:00
## 3rd Qu.:0.3958 3rd Qu.:2022-10-24 04:15:00 3rd Qu.:2022-10-23 23:15:00
## Max. :0.4545 Max. :2022-10-24 22:00:00 Max. :2022-10-24 17:00:00
## ts
## Min. :1.666e+09
## 1st Qu.:1.666e+09
## Median :1.667e+09
## Mean :1.667e+09
## 3rd Qu.:1.667e+09
## Max. :1.667e+09
#Membuat matriks korelasi
cor_matrix <- cor(data[, c("AQI", "CO", "no2", "o3", "pm10", "pm25", "so2")], use = "complete.obs")
cor_matrix
## AQI CO no2 o3 pm10 pm25
## AQI 1.00000000 0.79763320 0.7376547 0.97359901 0.08257568 -0.3466552
## CO 0.79763320 1.00000000 0.6520228 0.82919809 -0.09598578 -0.5829151
## no2 0.73765472 0.65202279 1.0000000 0.73204188 0.25652368 -0.3588091
## o3 0.97359901 0.82919809 0.7320419 1.00000000 0.03381079 -0.5219779
## pm10 0.08257568 -0.09598578 0.2565237 0.03381079 1.00000000 0.2684827
## pm25 -0.34665524 -0.58291511 -0.3588091 -0.52197786 0.26848271 1.0000000
## so2 -0.74247813 -0.79872002 -0.5859130 -0.78562485 -0.04779446 0.6743428
## so2
## AQI -0.74247813
## CO -0.79872002
## no2 -0.58591304
## o3 -0.78562485
## pm10 -0.04779446
## pm25 0.67434283
## so2 1.00000000
#Visualisasi matriks korelasi
corrplot(cor_matrix, method = "circle", type = "upper", tl.col = "black", tl.srt = 45)
##Interpretasi matriks korelasi Dari matriks korelasi diatas, dapat dilihat bahwa terdapat korelasi yang cukup kuat antara beberapa variabel. Misalnya, AQI memiliki korelasi positif yang cukup kuat dengan CO (0.798) dan O3 (0.974), yang menunjukkan bahwa peningkatan konsentrasi CO dan O3 cenderung diikuti oleh peningkatan AQI. Sehingga variabel yang dapat digunakan sebagai variabel untuk menjadi regresi lag adalah CO dan O3 karena memiliki korelasi yang cukup kuat dengan AQI.
#menghapus data selain AQI dan o3
data <- data[, c("AQI", "o3")]
colnames(data) <- c("Yt", "Xt")
data
## # A tibble: 72 × 2
## Yt Xt
## <dbl> <dbl>
## 1 30.2 55.8
## 2 28.2 54.9
## 3 26.6 54.6
## 4 25 55.1
## 5 26 55.8
## 6 26 56.5
## 7 26 55.8
## 8 24 52.9
## 9 23 50.1
## 10 24 51.5
## # ℹ 62 more rows
##Pembagian Data
#Split Data
train <- data[1:58,]
test <- data[59:72,]
#data time series
train.ts<-ts(train)
test.ts<-ts(test)
data.ts<-ts(data)
##Model koyck
#MODEL KOYCK
model.koyck <- koyckDlm(x = train$Xt, y = train$Yt)
summary(model.koyck)
##
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.112282 -0.366467 -0.002877 0.258311 1.106529
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.26260 0.93950 0.280 0.781
## Y.1 0.42943 0.08641 4.969 7.14e-06 ***
## X.t 0.25823 0.04259 6.064 1.35e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5239 on 54 degrees of freedom
## Multiple R-Squared: 0.9449, Adjusted R-squared: 0.9429
## Wald test: 417.1 on 2 and 54 DF, p-value: < 2.2e-16
##
## Diagnostic tests:
## NULL
##
## alpha beta phi
## Geometric coefficients: 0.460243 0.2582336 0.4294305
AIC(model.koyck)
## [1] 92.98106
BIC(model.koyck)
## [1] 101.1533
##Interpretasi Model Koyck Dari hasil model Koyck di atas, dapat dilihat bahwa koefisien regresi untuk variabel Xt (O3) adalah positif dan signifikan, yang menunjukkan bahwa peningkatan konsentrasi O3 cenderung diikuti oleh peningkatan AQI. Koefisien autokorelasi (rho) juga positif, menunjukkan adanya efek lag pada hubungan antara O3 dan AQI. Nilai AIC dan BIC yang relatif rendah menunjukkan bahwa model ini memiliki kecocokan yang baik dengan data pelatihan.
fore.koyck <- forecast(model = model.koyck, x=test$Xt, h=14)
fore.koyck
## $forecasts
## [1] 22.51461 21.65971 20.92318 20.05278 19.58666 19.84824 20.51469 20.80088
## [9] 20.92378 21.43831 22.95219 24.71052 26.20441 27.21533
##
## $call
## forecast.koyckDlm(model = model.koyck, x = test$Xt, h = 14)
##
## attr(,"class")
## [1] "forecast.koyckDlm" "dLagM"
mape.koyck <- MAPE(fore.koyck$forecasts, test$Yt)
mape.koyck #data test
## [1] 0.02909543
#akurasi data training
GoF(model.koyck)
## n MAE MPE MAPE sMAPE MASE MSE
## model.koyck 57 0.39765 -0.0005611615 0.01494044 0.01491151 0.5986129 0.260024
## MRAE GMRAE
## model.koyck 545222624 5097.033
##Regresi dengan Distributed Lag
model.dlm <- dlm(x = train$Xt,y = train$Yt , q = 2)
summary(model.dlm)
##
## Call:
## lm(formula = model.formula, data = design)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48041 -0.34171 -0.01844 0.25829 1.41892
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.170522 0.676995 -0.252 0.802
## x.t 0.487583 0.052341 9.315 1.13e-12 ***
## x.1 -0.017540 0.098118 -0.179 0.859
## x.2 -0.005899 0.055262 -0.107 0.915
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3496 on 52 degrees of freedom
## Multiple R-squared: 0.9763, Adjusted R-squared: 0.9749
## F-statistic: 713.6 on 3 and 52 DF, p-value: < 2.2e-16
##
## AIC and BIC values for the model:
## AIC BIC
## 1 47.04815 57.1749
AIC(model.dlm)
## [1] 47.04815
BIC(model.dlm)
## [1] 57.1749
Dari hasil model Distributed Lag di atas, dapat dilihat bahwa koefisien regresi untuk variabel Xt (O3) pada lag 0, lag 1, dan lag 2 semuanya positif dan signifikan, serta nilai P-Value dari intercept dan \(x_{t-1}<0.05\) yang intercept dan \(x_{t-1}\) berpengaruh signifikan terhadap AQI. Nilai AIC dan BIC yang relatif rendah menunjukkan bahwa model ini memiliki kecocokan yang baik dengan data pelatihan.
Peramalan dan Akurasi
fore.dlm <- forecast(model = model.dlm, x=test$Xt, h=14)
fore.dlm
## $forecasts
## [1] 21.98407 20.83473 20.19803 19.19165 19.06335 19.95415 20.97114 20.92295
## [9] 20.91029 21.78216 24.19202 26.18613 27.47631 28.09830
##
## $call
## forecast.dlm(model = model.dlm, x = test$Xt, h = 14)
##
## attr(,"class")
## [1] "forecast.dlm" "dLagM"
mape.dlm <- MAPE(fore.dlm$forecasts, test$Yt)
mape.dlm
## [1] 0.006409836
#akurasi data training
GoF(model.dlm)
## n MAE MPE MAPE sMAPE MASE MSE
## model.dlm 56 0.2659115 -0.000153498 0.009883436 0.009895147 0.4108184 0.1134602
## MRAE GMRAE
## model.dlm 1020290187 5489.74
###Lag Optimum
#penentuan lag optimum
finiteDLMauto(formula = Yt ~ Xt,
data = data.frame(train), q.min = 1, q.max = 20,
model.type = "dlm", error.type = "AIC", trace = TRUE)
## q - k MASE AIC BIC GMRAE MBRAE R.Adj.Sq Ljung-Box
## 20 20 0.19768 7.37876 45.04324 5794.456 0.49242 0.99072 0.004907684
## 19 19 0.22518 11.63823 48.23659 8872.570 0.51006 0.98952 0.008978678
## 14 14 0.29922 12.98760 43.31883 16384.653 0.51592 0.98825 0.254876195
## 18 18 0.25240 13.28125 48.74771 12656.558 0.52254 0.98891 0.001782351
## 15 15 0.28853 15.37352 47.07512 12079.709 0.50452 0.98781 0.246870375
## 16 16 0.28898 18.08564 51.10137 10365.391 0.49271 0.98718 0.266860265
## 17 17 0.27844 18.59912 52.87057 11228.549 0.51302 0.98713 0.059660051
## 10 10 0.33647 24.10757 48.43318 10779.390 0.48008 0.98393 0.411222122
## 12 12 0.34265 24.68572 52.11534 19408.016 0.51678 0.98404 0.370606458
## 11 11 0.33724 26.25501 52.15708 13097.266 0.49613 0.98321 0.454299817
## 5 5 0.36542 27.02479 42.78712 6410.480 0.48192 0.98352 0.189196553
## 13 13 0.36574 27.07270 55.97930 25489.522 0.52237 0.98360 0.412694470
## 3 3 0.38312 27.10177 39.14577 5879.350 0.48128 0.98290 0.111471296
## 4 4 0.38276 27.96420 41.88709 7635.896 0.48443 0.98283 0.173995415
## 6 6 0.36302 29.64290 47.20409 4742.320 0.47166 0.98307 0.197590728
## 9 9 0.36482 30.09649 52.79833 8867.244 0.48588 0.98222 0.763000495
## 7 7 0.37392 30.57737 49.89563 5848.238 0.47755 0.98314 0.277645440
## 8 8 0.38196 31.33341 52.36567 8039.153 0.49778 0.98265 0.417315437
## 2 2 0.41082 47.04815 57.17490 5489.740 0.47425 0.97492 0.087865341
## 1 1 0.48746 91.76306 99.93527 5751.269 0.38586 0.94411 0.162422786
Berdasarkan hasil di atas, lag optimum yang dipilih adalah lag ke-20 dengan nilai AIC terendah yaitu 7.378. Hal ini menunjukkan bahwa model Distributed Lag dengan lag ke-20 memiliki kecocokan terbaik dengan data pelatihan dibandingkan dengan model dengan lag lainnya dalam rentang yang diuji (1 hingga 20). Selanjutnya akan dilakukan permodelan untuk lag ke 20
#model dlm dengan lag optimum
model.dlm2 <- dlm(x = train$Xt,y = train$Yt , q = 20)
summary(model.dlm2)
##
## Call:
## lm(formula = model.formula, data = design)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30810 -0.09114 0.00787 0.09984 0.32321
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.63320 1.73395 -0.942 0.36025
## x.t 0.45147 0.05794 7.792 7.79e-07 ***
## x.1 0.13360 0.12926 1.034 0.31668
## x.2 -0.09471 0.14036 -0.675 0.50948
## x.3 -0.10610 0.14071 -0.754 0.46179
## x.4 0.02698 0.14027 0.192 0.84988
## x.5 0.11985 0.14221 0.843 0.41179
## x.6 -0.10800 0.14088 -0.767 0.45446
## x.7 0.24917 0.14003 1.779 0.09418 .
## x.8 -0.49515 0.13971 -3.544 0.00270 **
## x.9 0.42529 0.14468 2.940 0.00962 **
## x.10 0.09799 0.14124 0.694 0.49775
## x.11 -0.42495 0.14292 -2.973 0.00897 **
## x.12 0.30033 0.12577 2.388 0.02963 *
## x.13 -0.24379 0.11608 -2.100 0.05192 .
## x.14 0.21499 0.11829 1.818 0.08791 .
## x.15 -0.08719 0.11930 -0.731 0.47544
## x.16 0.03590 0.12092 0.297 0.77039
## x.17 -0.06223 0.11973 -0.520 0.61033
## x.18 0.17404 0.11688 1.489 0.15590
## x.19 -0.15534 0.09706 -1.600 0.12908
## x.20 0.03578 0.04547 0.787 0.44279
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2243 on 16 degrees of freedom
## Multiple R-squared: 0.996, Adjusted R-squared: 0.9907
## F-statistic: 189 on 21 and 16 DF, p-value: 2.141e-15
##
## AIC and BIC values for the model:
## AIC BIC
## 1 7.378756 45.04324
AIC(model.dlm2)
## [1] 7.378756
BIC(model.dlm2)
## [1] 45.04324
Dari hasil model Distributed Lag dengan lag optimum (lag ke-20) di atas, dapat dilihat bahwa koefisien regresi untuk variabel Xt (O3) pada lag 0 hingga lag 20 semuanya positif dan signifikan, serta nilai P-Value dari intercept \(x_{t-1}<0.05\) dan \(x_{t-1}\) berpengaruh signifikan terhadap AQI. Nilai AIC dan BIC yang relatif rendah menunjukkan bahwa model ini memiliki kecocokan yang baik dengan data pelatihan.
Peramalan dan Akurasi
#peramalan dan akurasi
fore.dlm2 <- forecast(model = model.dlm2, x=test$Xt, h=14)
#akurasi data test
mape.dlm2<- MAPE(fore.dlm2$forecasts, test$Yt)
mape.dlm2
## [1] 0.02298732
#akurasi data training
GoF(model.dlm2)
## n MAE MPE MAPE sMAPE MASE
## model.dlm2 38 0.1175398 -2.864864e-05 0.00431907 0.004318774 0.1976806
## MSE MRAE GMRAE
## model.dlm2 0.02119001 538582405 5794.456
Model diatas merupakan model yang sangat baik karena nilai MAPE kuranng dari 10%.
model.ardl <- ardlDlm(x = train$Xt, y = train$Yt, p = 1 , q = 1)
summary(model.ardl)
##
## Time series regression with "ts" data:
## Start = 2, End = 58
##
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0233 -0.2713 0.1217 0.2984 0.8689
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.24530 0.66489 -0.369 0.714
## X.t 0.48660 0.03236 15.036 < 2e-16 ***
## X.1 -0.23184 0.04540 -5.106 4.56e-06 ***
## Y.1 0.45632 0.06613 6.901 6.51e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3797 on 53 degrees of freedom
## Multiple R-squared: 0.9716, Adjusted R-squared: 0.97
## F-statistic: 604.7 on 3 and 53 DF, p-value: < 2.2e-16
AIC(model.ardl)
## [1] 57.2222
BIC(model.ardl)
## [1] 67.43746
model.ardl <- ardlDlm(formula = Yt ~ Xt,
data = train,p = 1 , q = 1)
summary(model.ardl)
##
## Time series regression with "ts" data:
## Start = 2, End = 58
##
## Call:
## dynlm(formula = as.formula(model.text), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0233 -0.2713 0.1217 0.2984 0.8689
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.24530 0.66489 -0.369 0.714
## Xt.t 0.48660 0.03236 15.036 < 2e-16 ***
## Xt.1 -0.23184 0.04540 -5.106 4.56e-06 ***
## Yt.1 0.45632 0.06613 6.901 6.51e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3797 on 53 degrees of freedom
## Multiple R-squared: 0.9716, Adjusted R-squared: 0.97
## F-statistic: 604.7 on 3 and 53 DF, p-value: < 2.2e-16
AIC(model.ardl)
## [1] 57.2222
BIC(model.ardl)
## [1] 67.43746
Hasil di atas menunjukkan bahwa selain peubah \(x_{t-1}\), hasil uji t menunjukkan nilai-p pada peubah \(\ge0.05\) Hal ini menunjukkan bahwa peubah \(x_{t-1}\) berpengaruh signifikan terhadap \(y_t\), sementara \(x_t\) dan \(y_{t-1}\) berpengaruh signifikan terhadap \(y_t\).”
fore.ardl <- forecast(model = model.ardl, x=test$Xt, h=14)
fore.ardl
## $forecasts
## [1] 21.79557 20.69111 20.07141 19.07614 18.94543 19.83881 20.87605 20.85189
## [9] 20.84087 21.70595 24.12246 26.15266 27.47632 28.11312
##
## $call
## forecast.ardlDlm(model = model.ardl, x = test$Xt, h = 14)
##
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"
Data di atas merupakan hasil peramalan untuk 14 periode kedepan dari model autoregressive dengan nilai \(p=1\) dan \(q=1\).
mape.ardl <- MAPE(fore.ardl$forecasts, test$Yt)
mape.ardl
## [1] 0.00779035
#akurasi data training
GoF(model.ardl)
## n MAE MPE MAPE sMAPE MASE MSE
## model.ardl 57 0.3138843 -0.0001781471 0.01165753 0.01164777 0.472514 0.1340668
## MRAE GMRAE
## model.ardl 1108253474 6593.929
Berdasarkan akurasi di atas, terlihat bahwa nilai MAPE keduanya tidak
jauh berbeda. Artinya, model regresi dengan distribusi lag ini
tidakoverfitted atau underfitted”
#penentuan lag optimum
model.ardl.opt <- ardlBoundOrders(data = data.frame(data), ic = "AIC",
formula = Yt ~ Xt )
model.ardl.opt
## $p
## Xt
## 1 13
##
## $q
## [1] 2
##
## $Stat.table
## q = 1 q = 2 q = 3 q = 4 q = 5 q = 6 q = 7 q = 8
## p = 1 38.64746 23.22848 25.42302 27.72031 28.70926 26.39158 27.98723 30.39158
## p = 2 22.86594 24.77455 27.31646 28.69429 28.81115 25.64963 27.33061 30.08845
## p = 3 27.18085 27.18085 28.84077 30.37243 30.71504 25.55412 27.74318 30.55048
## p = 4 27.27918 28.33392 28.33392 27.22651 29.94853 27.52594 29.31474 32.19458
## p = 5 28.22371 28.68011 29.37749 29.37749 30.12962 28.60210 31.06991 33.54724
## p = 6 27.11336 28.13965 27.00217 28.68077 28.68077 30.47844 33.06989 35.19285
## p = 7 30.19026 30.92504 29.24074 30.23773 30.81010 30.81010 29.56882 27.97012
## p = 8 26.81548 26.88306 26.96639 27.84677 26.80343 28.40155 28.40155 29.27146
## p = 9 25.15641 25.64777 25.75789 27.48094 26.12645 27.83358 29.58776 29.58776
## p = 10 28.59615 28.86775 29.29778 31.09265 28.82957 30.63888 32.57903 34.34994
## p = 11 24.53224 23.37847 24.80335 26.48648 23.35632 25.20746 26.60810 26.81938
## p = 12 27.14281 25.70395 27.06234 28.84787 26.03408 27.92602 28.91965 28.33033
## p = 13 19.25858 16.55044 18.53543 20.33824 17.61125 19.60705 21.03414 21.71506
## p = 14 20.31747 18.36045 20.36004 22.34912 19.16521 21.06710 22.41853 23.18530
## p = 15 22.41646 20.32505 22.22270 24.14701 21.64522 23.25026 24.97916 26.17790
## q = 9 q = 10 q = 11 q = 12 q = 13 q = 14 q = 15
## p = 1 33.09252 33.18190 32.84415 33.19542 35.85663 30.71580 27.67083
## p = 2 32.80513 33.98671 32.34936 32.42167 35.04537 31.25200 27.25097
## p = 3 33.31925 34.28268 33.42041 33.94715 36.44309 32.48438 28.87461
## p = 4 34.95803 36.14844 35.27999 35.94335 38.39874 34.28870 30.56428
## p = 5 36.11311 37.41170 36.41377 37.18234 39.82841 36.17273 32.18350
## p = 6 35.88842 36.25737 35.13490 36.94569 39.67535 32.69633 29.29501
## p = 7 30.90878 33.89102 34.52801 36.37243 38.74818 31.23844 29.15791
## p = 8 31.44233 34.39275 26.01225 26.78247 27.92580 24.10080 23.75594
## p = 9 31.57509 33.98987 27.47278 26.92081 26.62574 21.91896 21.71187
## p = 10 34.34994 33.75383 24.46154 25.13892 26.98653 23.27262 23.65748
## p = 11 23.68447 23.68447 24.90968 27.13805 28.98636 25.18996 25.61717
## p = 12 25.59091 27.59091 27.59091 29.08999 29.84364 23.51211 22.55179
## p = 13 19.56880 19.27526 21.04212 21.04212 22.98266 23.87236 23.91841
## p = 14 22.81463 22.84285 24.84274 22.27955 22.27955 24.27951 25.89778
## p = 15 25.36264 24.86236 26.81668 26.73848 27.78117 27.78117 27.75060
##
## $min.Stat
## [1] 16.55044
min_p=c()
for(i in 1:6){
min_p[i]=min(model.ardl.opt$Stat.table[[i]])
}
q_opt=which(min_p==min(min_p, na.rm = TRUE))
p_opt=which(model.ardl.opt$Stat.table[[q_opt]] ==
min(model.ardl.opt$Stat.table[[q_opt]], na.rm = TRUE))
data.frame("q_optimum" = q_opt, "p_optimum" = p_opt,
"AIC"=model.ardl.opt$min.Stat)
## q_optimum p_optimum AIC
## 1 2 13 16.55044
model.ardl2 <- ardlDlm(formula = Yt ~ Xt,
data = train,p = 13, q = 2)
summary(model.ardl2)
##
## Time series regression with "ts" data:
## Start = 14, End = 58
##
## Call:
## dynlm(formula = as.formula(model.text), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41430 -0.19394 0.02836 0.16087 0.47584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.04456 1.06711 -0.979 0.3360
## Xt.t 0.40658 0.06938 5.860 2.67e-06 ***
## Xt.1 0.20835 0.18174 1.146 0.2613
## Xt.2 -0.09590 0.20134 -0.476 0.6376
## Xt.3 -0.13900 0.15910 -0.874 0.3897
## Xt.4 0.07385 0.15906 0.464 0.6460
## Xt.5 0.05911 0.14382 0.411 0.6842
## Xt.6 -0.15248 0.13442 -1.134 0.2663
## Xt.7 0.25536 0.13453 1.898 0.0680 .
## Xt.8 -0.28691 0.14185 -2.023 0.0528 .
## Xt.9 0.12287 0.15600 0.788 0.4375
## Xt.10 0.13887 0.15975 0.869 0.3921
## Xt.11 -0.20179 0.14670 -1.376 0.1799
## Xt.12 0.05814 0.12013 0.484 0.6322
## Xt.13 0.01800 0.05426 0.332 0.7426
## Yt.1 -0.11385 0.19128 -0.595 0.5565
## Yt.2 0.14225 0.18763 0.758 0.4547
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2851 on 28 degrees of freedom
## Multiple R-squared: 0.9892, Adjusted R-squared: 0.9831
## F-statistic: 160.6 on 16 and 28 DF, p-value: < 2.2e-16
Dari tabel di atas, dapat terlihat bahwa nilai AIC terendah didapat
ketika \(p=13\) dan \(q=2\), yaitu sebesar 16,5504.
Artinya, model autoregressive optimum didapat ketika \(p=13\) dan \(q=2\).
Selanjutnya dapat dilakukan pemodelan dengan nilai \(p\) dan \(q\) optimum seperti inisialisasi di langkah sebelumnya.”
model.ardl2 <- ardlDlm(formula = Yt ~ Xt,
data = train,p = 13 , q = 2)
summary(model.ardl2)
##
## Time series regression with "ts" data:
## Start = 14, End = 58
##
## Call:
## dynlm(formula = as.formula(model.text), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41430 -0.19394 0.02836 0.16087 0.47584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.04456 1.06711 -0.979 0.3360
## Xt.t 0.40658 0.06938 5.860 2.67e-06 ***
## Xt.1 0.20835 0.18174 1.146 0.2613
## Xt.2 -0.09590 0.20134 -0.476 0.6376
## Xt.3 -0.13900 0.15910 -0.874 0.3897
## Xt.4 0.07385 0.15906 0.464 0.6460
## Xt.5 0.05911 0.14382 0.411 0.6842
## Xt.6 -0.15248 0.13442 -1.134 0.2663
## Xt.7 0.25536 0.13453 1.898 0.0680 .
## Xt.8 -0.28691 0.14185 -2.023 0.0528 .
## Xt.9 0.12287 0.15600 0.788 0.4375
## Xt.10 0.13887 0.15975 0.869 0.3921
## Xt.11 -0.20179 0.14670 -1.376 0.1799
## Xt.12 0.05814 0.12013 0.484 0.6322
## Xt.13 0.01800 0.05426 0.332 0.7426
## Yt.1 -0.11385 0.19128 -0.595 0.5565
## Yt.2 0.14225 0.18763 0.758 0.4547
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2851 on 28 degrees of freedom
## Multiple R-squared: 0.9892, Adjusted R-squared: 0.9831
## F-statistic: 160.6 on 16 and 28 DF, p-value: < 2.2e-16
AIC(model.ardl2)
## [1] 29.42385
BIC(model.ardl2)
## [1] 61.94377
Hasil di atas menunjukkan bahwa selain peubah \(x_{t-1}\), hasil uji t menunjukkan nilai-p pada peubah \(\ge0.05\) Hal ini menunjukkan bahwa peubah \(x_{t-1}\) berpengaruh signifikan terhadap \(y_t\), sementara \(x_t\) dan \(y_{t-1}\) berpengaruh signifikan terhadap \(y_t\).”
fore.ardl2 <- forecast(model = model.ardl2, x=test$Xt, h=14)
fore.ardl2
## $forecasts
## [1] 21.77229 20.52108 20.02939 19.14999 18.73570 19.45331 20.86257 21.11623
## [9] 20.47743 21.43916 23.65285 26.32716 27.71046 27.92030
##
## $call
## forecast.ardlDlm(model = model.ardl2, x = test$Xt, h = 14)
##
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"
Data di atas merupakan hasil peramalan untuk 14 periode kedepan dari model autoregressive dengan nilai \(p=13\) dan \(q=2\).
mape.ardl2 <- MAPE(fore.ardl2$forecasts, test$Yt)
mape.ardl2
## [1] 0.01445912
#akurasi data training
GoF(model.ardl2)
## n MAE MPE MAPE sMAPE MASE
## model.ardl2 45 0.1934879 -6.779947e-05 0.00711318 0.007110808 0.3547279
## MSE MRAE GMRAE
## model.ardl2 0.05058937 932166881 19290.28
Berdasarkan akurasi di atas, terlihat bahwa nilai MAPE keduanya tidak
jauh berbeda. Artinya, model regresi dengan distribusi lag ini
tidakoverfitted atau underfitted”
dynlm#sama dengan model dlm q=1
cons_lm1 <- dynlm(Yt ~ Xt+L(Xt),data = train.ts)
#sama dengan model ardl p=1 q=0
cons_lm2 <- dynlm(Yt ~ Xt+L(Yt),data = train.ts)
#sama dengan ardl p=1 q=1
cons_lm3 <- dynlm(Yt ~ Xt+L(Xt)+L(Yt),data = train.ts)
#sama dengan dlm p=2
cons_lm4 <- dynlm(Yt ~ Xt+L(Xt)+L(Xt,2),data = train.ts)
summary(cons_lm1)
##
## Time series regression with "ts" data:
## Start = 2, End = 58
##
## Call:
## dynlm(formula = Yt ~ Xt + L(Xt), data = train.ts)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48930 -0.40406 -0.07391 0.21622 2.81151
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.31531 0.90081 0.350 0.728
## Xt 0.47373 0.04410 10.741 5.12e-15 ***
## L(Xt) -0.01702 0.04511 -0.377 0.707
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5183 on 54 degrees of freedom
## Multiple R-squared: 0.9461, Adjusted R-squared: 0.9441
## F-statistic: 474 on 2 and 54 DF, p-value: < 2.2e-16
summary(cons_lm2)
##
## Time series regression with "ts" data:
## Start = 2, End = 58
##
## Call:
## dynlm(formula = Yt ~ Xt + L(Yt), data = train.ts)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.76190 -0.29995 -0.04978 0.23740 1.79589
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.54580 0.80143 -0.681 0.498762
## Xt 0.36702 0.02703 13.579 < 2e-16 ***
## L(Yt) 0.22481 0.05825 3.859 0.000306 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4595 on 54 degrees of freedom
## Multiple R-squared: 0.9576, Adjusted R-squared: 0.9561
## F-statistic: 610.5 on 2 and 54 DF, p-value: < 2.2e-16
summary(cons_lm3)
##
## Time series regression with "ts" data:
## Start = 2, End = 58
##
## Call:
## dynlm(formula = Yt ~ Xt + L(Xt) + L(Yt), data = train.ts)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0233 -0.2713 0.1217 0.2984 0.8689
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.24530 0.66489 -0.369 0.714
## Xt 0.48660 0.03236 15.036 < 2e-16 ***
## L(Xt) -0.23184 0.04540 -5.106 4.56e-06 ***
## L(Yt) 0.45632 0.06613 6.901 6.51e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3797 on 53 degrees of freedom
## Multiple R-squared: 0.9716, Adjusted R-squared: 0.97
## F-statistic: 604.7 on 3 and 53 DF, p-value: < 2.2e-16
summary(cons_lm4)
##
## Time series regression with "ts" data:
## Start = 3, End = 58
##
## Call:
## dynlm(formula = Yt ~ Xt + L(Xt) + L(Xt, 2), data = train.ts)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48041 -0.34171 -0.01844 0.25829 1.41892
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.170522 0.676995 -0.252 0.802
## Xt 0.487583 0.052341 9.315 1.13e-12 ***
## L(Xt) -0.017540 0.098118 -0.179 0.859
## L(Xt, 2) -0.005899 0.055262 -0.107 0.915
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3496 on 52 degrees of freedom
## Multiple R-squared: 0.9763, Adjusted R-squared: 0.9749
## F-statistic: 713.6 on 3 and 52 DF, p-value: < 2.2e-16
deviance(cons_lm1)
## [1] 14.50802
deviance(cons_lm2)
## [1] 11.40143
deviance(cons_lm3)
## [1] 7.641805
deviance(cons_lm4)
## [1] 6.353772
#uji model
if(require("lmtest")) encomptest(cons_lm1, cons_lm2)
## Encompassing test
##
## Model 1: Yt ~ Xt + L(Xt)
## Model 2: Yt ~ Xt + L(Yt)
## Model E: Yt ~ Xt + L(Xt) + L(Yt)
## Res.Df Df F Pr(>F)
## M1 vs. ME 53 -1 47.621 6.514e-09 ***
## M2 vs. ME 53 -1 26.075 4.562e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#durbin watson
dwtest(cons_lm1)
##
## Durbin-Watson test
##
## data: cons_lm1
## DW = 1.0808, p-value = 4.224e-05
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm2)
##
## Durbin-Watson test
##
## data: cons_lm2
## DW = 1.5997, p-value = 0.03841
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm3)
##
## Durbin-Watson test
##
## data: cons_lm3
## DW = 2.7648, p-value = 0.9961
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm4)
##
## Durbin-Watson test
##
## data: cons_lm4
## DW = 2.1132, p-value = 0.5554
## alternative hypothesis: true autocorrelation is greater than 0
bptest(cons_lm1)
##
## studentized Breusch-Pagan test
##
## data: cons_lm1
## BP = 1.0373, df = 2, p-value = 0.5953
bptest(cons_lm2)
##
## studentized Breusch-Pagan test
##
## data: cons_lm2
## BP = 23.613, df = 2, p-value = 7.456e-06
bptest(cons_lm3)
##
## studentized Breusch-Pagan test
##
## data: cons_lm3
## BP = 13.789, df = 3, p-value = 0.003207
bptest(cons_lm4)
##
## studentized Breusch-Pagan test
##
## data: cons_lm4
## BP = 0.94346, df = 3, p-value = 0.8149
shapiro.test(residuals(cons_lm1))
##
## Shapiro-Wilk normality test
##
## data: residuals(cons_lm1)
## W = 0.68131, p-value = 7.651e-10
shapiro.test(residuals(cons_lm2))
##
## Shapiro-Wilk normality test
##
## data: residuals(cons_lm2)
## W = 0.93215, p-value = 0.003296
shapiro.test(residuals(cons_lm3))
##
## Shapiro-Wilk normality test
##
## data: residuals(cons_lm3)
## W = 0.96815, p-value = 0.1375
shapiro.test(residuals(cons_lm4))
##
## Shapiro-Wilk normality test
##
## data: residuals(cons_lm4)
## W = 0.85348, p-value = 7.243e-06
akurasi <- matrix(c(mape.koyck, mape.dlm, mape.dlm2, mape.ardl))
row.names(akurasi)<- c("Koyck","DLM 1","DLM 2","Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi
## MAPE
## Koyck 0.029095435
## DLM 1 0.006409836
## DLM 2 0.022987324
## Autoregressive 0.007790350
Berdasarkan nilai MAPE, model paling optimum didapat pada Model DLM 1 karena memiliki nilai MAPE yang terkecil.”
par(mfrow=c(1,1))
plot(test$Xt, test$Yt, type="b", col="black", ylim=c(20,30))
points(test$Xt, fore.koyck$forecasts,col="red")
lines(test$Xt, fore.koyck$forecasts,col="red")
points(test$Xt, fore.dlm$forecasts,col="blue")
lines(test$Xt, fore.dlm$forecasts,col="blue")
points(test$Xt, fore.dlm2$forecasts,col="orange")
lines(test$Xt, fore.dlm2$forecasts,col="orange")
points(test$Xt, fore.ardl$forecasts,col="green")
lines(test$Xt, fore.ardl2$forecasts,col="green")
legend("topleft",c("aktual", "koyck","DLM 1","DLM 2", "autoregressive"), lty=1, col=c("black","red","blue","orange","green"), cex=0.8)
Berdasarkan plot tersebut, terlihat bahwa plot yang paling mendekati data aktualnya adalah Model DLM 1, sehingga dapat disimpulkan model terbaik dalam hal ini adalah model regresi DLM 1”