library(dLagM)
## Warning: package 'dLagM' was built under R version 4.4.3
## Loading required package: nardl
## Warning: package 'nardl' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: dynlm
## Warning: package 'dynlm' was built under R version 4.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(dynlm)
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.4.3
##
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:dLagM':
##
## MAPE
## The following object is masked from 'package:base':
##
## Recall
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.2
library(rio)
## Warning: package 'rio' was built under R version 4.4.2
data_air_ql <- import("https://raw.githubusercontent.com/nyayunabila/mpdw/refs/heads/main/Pertemuan%203/NewDelhi_Air_quality.csv")
data_air_ql
## V1 AQI CO datetime no2 o3 pm10 pm25
## 1 0 30.2 198.6027 2022-10-21:18 0.04685717 55.78995 10.486722 5.637410
## 2 1 28.2 197.6013 2022-10-21:19 0.04645554 54.93164 10.719325 4.618169
## 3 2 26.6 198.6027 2022-10-21:20 0.04685717 54.64554 11.155578 3.520902
## 4 3 25.0 201.9405 2022-10-21:21 0.04819594 55.07469 11.116206 2.225919
## 5 4 26.0 205.2784 2022-10-21:22 0.04886533 55.78995 10.405250 1.979471
## 6 5 26.0 208.6163 2022-10-21:23 0.05020411 56.50520 9.402518 1.792677
## 7 6 26.0 208.6163 2022-10-22:00 0.05020411 55.78995 8.482822 1.752237
## 8 7 24.0 205.2784 2022-10-22:01 0.05020411 52.92892 7.586667 1.858459
## 9 8 23.0 200.2716 2022-10-22:02 0.04685717 50.06790 7.636587 2.208076
## 10 9 24.0 196.9338 2022-10-22:03 0.04417961 51.49841 8.487648 2.476681
## 11 10 25.0 198.6027 2022-10-22:04 0.04284084 53.64418 9.357371 2.585729
## 12 11 26.0 200.2716 2022-10-22:05 0.04049798 55.78995 10.338728 2.390081
## 13 12 27.0 203.6095 2022-10-22:06 0.04016329 59.36623 10.523314 2.086022
## 14 13 29.0 205.2784 2022-10-22:07 0.03882451 62.22725 10.635970 1.937047
## 15 14 29.0 205.2784 2022-10-22:08 0.03882451 62.94250 9.964803 1.835311
## 16 15 29.0 205.2784 2022-10-22:09 0.03882451 62.94250 9.214411 1.725189
## 17 16 29.0 203.6095 2022-10-22:10 0.03748573 62.22725 8.772117 1.640662
## 18 17 28.0 201.9405 2022-10-22:11 0.04250614 60.79674 8.753794 1.633373
## 19 18 27.0 200.2716 2022-10-22:12 0.05020411 59.36623 8.807467 1.636810
## 20 19 27.0 201.9405 2022-10-22:13 0.05622860 58.65097 8.774860 1.666231
## 21 20 27.0 201.9405 2022-10-22:14 0.05622860 58.65097 8.675813 1.690578
## 22 21 27.0 201.9405 2022-10-22:15 0.05488982 57.93572 8.915920 1.705802
## 23 22 27.0 200.2716 2022-10-22:16 0.05288166 57.93572 9.227945 1.732754
## 24 23 27.0 200.2716 2022-10-22:17 0.05221227 58.65097 9.491790 1.765387
## 25 24 27.0 200.2716 2022-10-22:18 0.05221227 59.36623 9.907828 1.827327
## 26 25 28.0 201.9405 2022-10-22:19 0.05221227 61.51199 10.095885 1.888416
## 27 26 29.0 203.6095 2022-10-22:20 0.05288166 63.65776 9.879244 1.892860
## 28 27 31.0 205.2784 2022-10-22:21 0.05221227 66.51878 9.503339 1.859564
## 29 28 31.0 206.9473 2022-10-22:22 0.05221227 67.94930 9.008138 1.829196
## 30 29 32.0 206.9473 2022-10-22:23 0.05154289 68.66455 8.729765 1.787945
## 31 30 32.0 205.2784 2022-10-23:00 0.05087350 68.66455 8.912433 1.796048
## 32 31 31.0 206.9473 2022-10-23:01 0.05488982 67.23404 9.337687 1.810690
## 33 32 31.0 206.9473 2022-10-23:02 0.06091431 66.51878 9.351754 1.826056
## 34 33 30.0 208.6163 2022-10-23:03 0.05555921 65.80353 8.640116 1.822618
## 35 34 30.0 206.9473 2022-10-23:04 0.04618778 64.37302 7.750329 1.819336
## 36 35 29.0 205.2784 2022-10-23:05 0.03815512 62.22725 6.815522 1.831419
## 37 36 27.0 203.6095 2022-10-23:06 0.03079185 59.36623 6.965347 1.929624
## 38 37 26.0 200.2716 2022-10-23:07 0.02577144 56.50520 7.022172 2.049507
## 39 38 25.0 200.2716 2022-10-23:08 0.02342858 55.07469 6.946332 2.152871
## 40 39 25.0 198.6027 2022-10-23:09 0.02376328 53.64418 6.834290 2.219349
## 41 40 24.0 196.9338 2022-10-23:10 0.02677552 52.92892 6.686746 2.248050
## 42 41 24.0 195.2648 2022-10-23:11 0.03146124 52.92892 7.593563 2.276788
## 43 42 25.0 195.2648 2022-10-23:12 0.03815512 53.64418 8.999838 2.334924
## 44 43 25.0 195.2648 2022-10-23:13 0.04351023 53.64418 11.022134 2.365020
## 45 44 25.0 196.9338 2022-10-23:14 0.04351023 54.35944 12.198974 2.366476
## 46 45 26.0 198.6027 2022-10-23:15 0.04484900 55.78995 11.969586 2.369681
## 47 46 26.0 198.6027 2022-10-23:16 0.04618778 57.22046 9.593794 2.339545
## 48 47 27.0 198.6027 2022-10-23:17 0.04551839 57.93572 7.763611 2.331504
## 49 48 27.0 198.6027 2022-10-23:18 0.04417961 58.65097 7.517936 2.324217
## 50 49 27.0 198.6027 2022-10-23:19 0.04484900 58.65097 8.041439 2.320275
## 51 50 27.0 200.2716 2022-10-23:20 0.04618778 59.36623 8.654108 2.283845
## 52 51 28.0 200.2716 2022-10-23:21 0.04752655 60.08148 8.514708 2.195546
## 53 52 28.0 200.2716 2022-10-23:22 0.04685717 60.08148 7.923108 2.066149
## 54 53 27.0 200.2716 2022-10-23:23 0.04618778 59.36623 7.097172 2.059171
## 55 54 27.0 198.6027 2022-10-24:00 0.04551839 58.65097 6.815780 2.072363
## 56 55 26.0 198.6027 2022-10-24:01 0.04752655 57.22046 7.661589 2.102422
## 57 56 25.0 198.6027 2022-10-24:02 0.04551839 54.35944 8.718614 2.230021
## 58 57 23.0 198.6027 2022-10-24:03 0.03715104 50.78316 8.457276 2.501037
## 59 58 22.0 196.9338 2022-10-24:04 0.02744491 47.92213 7.184991 2.750613
## 60 59 21.0 193.5959 2022-10-24:05 0.02125307 45.41874 6.950173 2.984422
## 61 60 20.0 193.5959 2022-10-24:06 0.01757144 43.98823 7.205668 3.065950
## 62 61 19.0 191.9270 2022-10-24:07 0.01573062 41.84246 7.898480 3.118563
## 63 62 19.0 191.9270 2022-10-24:08 0.01556327 41.48483 8.744758 3.090378
## 64 63 20.0 191.9270 2022-10-24:09 0.01807348 43.27297 9.623435 2.976335
## 65 64 21.0 191.9270 2022-10-24:10 0.02208981 45.41874 10.253539 2.846886
## 66 65 21.0 191.9270 2022-10-24:11 0.02577144 45.41874 10.703334 2.771884
## 67 66 21.0 191.9270 2022-10-24:12 0.03045716 45.41874 10.663057 2.730851
## 68 67 22.0 193.5959 2022-10-24:13 0.03547757 47.20688 10.423121 2.720472
## 69 68 24.0 195.2648 2022-10-24:14 0.03915921 52.21367 11.240661 2.713109
## 70 69 26.0 196.9338 2022-10-24:15 0.04417961 56.50520 12.098125 2.743044
## 71 70 27.0 198.6027 2022-10-24:16 0.04685717 59.36623 12.845977 2.780022
## 72 71 28.0 198.6027 2022-10-24:17 0.04752655 60.79674 12.583639 2.745875
## so2 timestamp_local timestamp_utc ts
## 1 0.3874302 2022-10-21 23:00:00 2022-10-21 18:00:00 1666375200
## 2 0.4097819 2022-10-22 00:00:00 2022-10-21 19:00:00 1666378800
## 3 0.4023313 2022-10-22 01:00:00 2022-10-21 20:00:00 1666382400
## 4 0.3762543 2022-10-22 02:00:00 2022-10-21 21:00:00 1666386000
## 5 0.3390014 2022-10-22 03:00:00 2022-10-21 22:00:00 1666389600
## 6 0.3129244 2022-10-22 04:00:00 2022-10-21 23:00:00 1666393200
## 7 0.3129244 2022-10-22 05:00:00 2022-10-22 00:00:00 1666396800
## 8 0.3427267 2022-10-22 06:00:00 2022-10-22 01:00:00 1666400400
## 9 0.3948808 2022-10-22 07:00:00 2022-10-22 02:00:00 1666404000
## 10 0.4284084 2022-10-22 08:00:00 2022-10-22 03:00:00 1666407600
## 11 0.4284084 2022-10-22 09:00:00 2022-10-22 04:00:00 1666411200
## 12 0.3986061 2022-10-22 10:00:00 2022-10-22 05:00:00 1666414800
## 13 0.3501773 2022-10-22 11:00:00 2022-10-22 06:00:00 1666418400
## 14 0.3166497 2022-10-22 12:00:00 2022-10-22 07:00:00 1666422000
## 15 0.2980232 2022-10-22 13:00:00 2022-10-22 08:00:00 1666425600
## 16 0.2868474 2022-10-22 14:00:00 2022-10-22 09:00:00 1666429200
## 17 0.2831221 2022-10-22 15:00:00 2022-10-22 10:00:00 1666432800
## 18 0.2942979 2022-10-22 16:00:00 2022-10-22 11:00:00 1666436400
## 19 0.3017485 2022-10-22 17:00:00 2022-10-22 12:00:00 1666440000
## 20 0.3054738 2022-10-22 18:00:00 2022-10-22 13:00:00 1666443600
## 21 0.3091991 2022-10-22 19:00:00 2022-10-22 14:00:00 1666447200
## 22 0.3129244 2022-10-22 20:00:00 2022-10-22 15:00:00 1666450800
## 23 0.3203750 2022-10-22 21:00:00 2022-10-22 16:00:00 1666454400
## 24 0.3241003 2022-10-22 22:00:00 2022-10-22 17:00:00 1666458000
## 25 0.3278256 2022-10-22 23:00:00 2022-10-22 18:00:00 1666461600
## 26 0.3278256 2022-10-23 00:00:00 2022-10-22 19:00:00 1666465200
## 27 0.3203750 2022-10-23 01:00:00 2022-10-22 20:00:00 1666468800
## 28 0.3166497 2022-10-23 02:00:00 2022-10-22 21:00:00 1666472400
## 29 0.3129244 2022-10-23 03:00:00 2022-10-22 22:00:00 1666476000
## 30 0.3166497 2022-10-23 04:00:00 2022-10-22 23:00:00 1666479600
## 31 0.3166497 2022-10-23 05:00:00 2022-10-23 00:00:00 1666483200
## 32 0.3166497 2022-10-23 06:00:00 2022-10-23 01:00:00 1666486800
## 33 0.3203750 2022-10-23 07:00:00 2022-10-23 02:00:00 1666490400
## 34 0.3203750 2022-10-23 08:00:00 2022-10-23 03:00:00 1666494000
## 35 0.3241003 2022-10-23 09:00:00 2022-10-23 04:00:00 1666497600
## 36 0.3278256 2022-10-23 10:00:00 2022-10-23 05:00:00 1666501200
## 37 0.3390014 2022-10-23 11:00:00 2022-10-23 06:00:00 1666504800
## 38 0.3501773 2022-10-23 12:00:00 2022-10-23 07:00:00 1666508400
## 39 0.3576279 2022-10-23 13:00:00 2022-10-23 08:00:00 1666512000
## 40 0.3650784 2022-10-23 14:00:00 2022-10-23 09:00:00 1666515600
## 41 0.3725290 2022-10-23 15:00:00 2022-10-23 10:00:00 1666519200
## 42 0.3762543 2022-10-23 16:00:00 2022-10-23 11:00:00 1666522800
## 43 0.3762543 2022-10-23 17:00:00 2022-10-23 12:00:00 1666526400
## 44 0.3725290 2022-10-23 18:00:00 2022-10-23 13:00:00 1666530000
## 45 0.3725290 2022-10-23 19:00:00 2022-10-23 14:00:00 1666533600
## 46 0.3725290 2022-10-23 20:00:00 2022-10-23 15:00:00 1666537200
## 47 0.3799796 2022-10-23 21:00:00 2022-10-23 16:00:00 1666540800
## 48 0.3911555 2022-10-23 22:00:00 2022-10-23 17:00:00 1666544400
## 49 0.3986061 2022-10-23 23:00:00 2022-10-23 18:00:00 1666548000
## 50 0.3986061 2022-10-24 00:00:00 2022-10-23 19:00:00 1666551600
## 51 0.3874302 2022-10-24 01:00:00 2022-10-23 20:00:00 1666555200
## 52 0.3762543 2022-10-24 02:00:00 2022-10-23 21:00:00 1666558800
## 53 0.3650784 2022-10-24 03:00:00 2022-10-23 22:00:00 1666562400
## 54 0.3688037 2022-10-24 04:00:00 2022-10-23 23:00:00 1666566000
## 55 0.3762543 2022-10-24 05:00:00 2022-10-24 00:00:00 1666569600
## 56 0.3837049 2022-10-24 06:00:00 2022-10-24 01:00:00 1666573200
## 57 0.4023313 2022-10-24 07:00:00 2022-10-24 02:00:00 1666576800
## 58 0.4284084 2022-10-24 08:00:00 2022-10-24 03:00:00 1666580400
## 59 0.4433095 2022-10-24 09:00:00 2022-10-24 04:00:00 1666584000
## 60 0.4544854 2022-10-24 10:00:00 2022-10-24 05:00:00 1666587600
## 61 0.4544854 2022-10-24 11:00:00 2022-10-24 06:00:00 1666591200
## 62 0.4507601 2022-10-24 12:00:00 2022-10-24 07:00:00 1666594800
## 63 0.4395843 2022-10-24 13:00:00 2022-10-24 08:00:00 1666598400
## 64 0.4209578 2022-10-24 14:00:00 2022-10-24 09:00:00 1666602000
## 65 0.4097819 2022-10-24 15:00:00 2022-10-24 10:00:00 1666605600
## 66 0.4060566 2022-10-24 16:00:00 2022-10-24 11:00:00 1666609200
## 67 0.3986061 2022-10-24 17:00:00 2022-10-24 12:00:00 1666612800
## 68 0.3911555 2022-10-24 18:00:00 2022-10-24 13:00:00 1666616400
## 69 0.3837049 2022-10-24 19:00:00 2022-10-24 14:00:00 1666620000
## 70 0.3762543 2022-10-24 20:00:00 2022-10-24 15:00:00 1666623600
## 71 0.3725290 2022-10-24 21:00:00 2022-10-24 16:00:00 1666627200
## 72 0.3688037 2022-10-24 22:00:00 2022-10-24 17:00:00 1666630800
Untuk mengecek X mana yang akan kita gunakan, maka kita akan gunakan korelasi peubah X dengan AQI
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.2
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.2
## Warning: package 'lubridate' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
colnames(data_air_ql)
## [1] "V1" "AQI" "CO" "datetime"
## [5] "no2" "o3" "pm10" "pm25"
## [9] "so2" "timestamp_local" "timestamp_utc" "ts"
cor_values <- cor(data_air_ql %>% select(AQI, pm25, pm10, no2, so2, o3, CO), use = "complete.obs")
print(cor_values[1, -1])
## pm25 pm10 no2 so2 o3 CO
## -0.34665524 0.08257568 0.73765472 -0.74247813 0.97359901 0.79763320
Kita akan mencari X yang korelasinya tinggi dengan AQI, pada dataset ini, O3 memiliki korelasi yang tinggi, sehingga O3 adalah variabel paling cocok dipakai sebagai X tunggal untuk regresi lag.
o3 <- data_air_ql %>% select(AQI, o3) %>% na.omit()
o3 <- o3 %>% mutate(
t = row_number(),
Yt = AQI,
Yt_1 = lag(Yt, 1),
Xt = o3
) %>%
select(t, Yt, Yt_1, Xt)
o3
## t Yt Yt_1 Xt
## 1 1 30.2 NA 55.78995
## 2 2 28.2 30.2 54.93164
## 3 3 26.6 28.2 54.64554
## 4 4 25.0 26.6 55.07469
## 5 5 26.0 25.0 55.78995
## 6 6 26.0 26.0 56.50520
## 7 7 26.0 26.0 55.78995
## 8 8 24.0 26.0 52.92892
## 9 9 23.0 24.0 50.06790
## 10 10 24.0 23.0 51.49841
## 11 11 25.0 24.0 53.64418
## 12 12 26.0 25.0 55.78995
## 13 13 27.0 26.0 59.36623
## 14 14 29.0 27.0 62.22725
## 15 15 29.0 29.0 62.94250
## 16 16 29.0 29.0 62.94250
## 17 17 29.0 29.0 62.22725
## 18 18 28.0 29.0 60.79674
## 19 19 27.0 28.0 59.36623
## 20 20 27.0 27.0 58.65097
## 21 21 27.0 27.0 58.65097
## 22 22 27.0 27.0 57.93572
## 23 23 27.0 27.0 57.93572
## 24 24 27.0 27.0 58.65097
## 25 25 27.0 27.0 59.36623
## 26 26 28.0 27.0 61.51199
## 27 27 29.0 28.0 63.65776
## 28 28 31.0 29.0 66.51878
## 29 29 31.0 31.0 67.94930
## 30 30 32.0 31.0 68.66455
## 31 31 32.0 32.0 68.66455
## 32 32 31.0 32.0 67.23404
## 33 33 31.0 31.0 66.51878
## 34 34 30.0 31.0 65.80353
## 35 35 30.0 30.0 64.37302
## 36 36 29.0 30.0 62.22725
## 37 37 27.0 29.0 59.36623
## 38 38 26.0 27.0 56.50520
## 39 39 25.0 26.0 55.07469
## 40 40 25.0 25.0 53.64418
## 41 41 24.0 25.0 52.92892
## 42 42 24.0 24.0 52.92892
## 43 43 25.0 24.0 53.64418
## 44 44 25.0 25.0 53.64418
## 45 45 25.0 25.0 54.35944
## 46 46 26.0 25.0 55.78995
## 47 47 26.0 26.0 57.22046
## 48 48 27.0 26.0 57.93572
## 49 49 27.0 27.0 58.65097
## 50 50 27.0 27.0 58.65097
## 51 51 27.0 27.0 59.36623
## 52 52 28.0 27.0 60.08148
## 53 53 28.0 28.0 60.08148
## 54 54 27.0 28.0 59.36623
## 55 55 27.0 27.0 58.65097
## 56 56 26.0 27.0 57.22046
## 57 57 25.0 26.0 54.35944
## 58 58 23.0 25.0 50.78316
## 59 59 22.0 23.0 47.92213
## 60 60 21.0 22.0 45.41874
## 61 61 20.0 21.0 43.98823
## 62 62 19.0 20.0 41.84246
## 63 63 19.0 19.0 41.48483
## 64 64 20.0 19.0 43.27297
## 65 65 21.0 20.0 45.41874
## 66 66 21.0 21.0 45.41874
## 67 67 21.0 21.0 45.41874
## 68 68 22.0 21.0 47.20688
## 69 69 24.0 22.0 52.21367
## 70 70 26.0 24.0 56.50520
## 71 71 27.0 26.0 59.36623
## 72 72 28.0 27.0 60.79674
library(tidyverse)
#cek linearitas
ggplot(o3, aes(x = Xt, y = Yt)) +
geom_point(alpha = 0.5, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Scatterplot AQI vs O3",
x = "O3",
y = "Air Quality Index (AQI)")
## `geom_smooth()` using formula = 'y ~ x'
modelo3 <- lm(Yt ~ Xt, data = o3)
#cek normalitas residual
shapiro.test(resid(modelo3))
##
## Shapiro-Wilk normality test
##
## data: resid(modelo3)
## W = 0.51931, p-value = 5.517e-14
#uji independensi residual
dwtest(modelo3)
##
## Durbin-Watson test
##
## data: modelo3
## DW = 0.54636, p-value = 3.207e-14
## alternative hypothesis: true autocorrelation is greater than 0
#uji homoskedastisitas
bptest(modelo3)
##
## studentized Breusch-Pagan test
##
## data: modelo3
## BP = 0.039736, df = 1, p-value = 0.842
Dari hasil uji asumsi di atas dapat disimpulkan bahwa,
Untuk mengatasi autokorelasi, kita dapat menggunakan regresi lag. Akan digunakan 3 macam model, yaitu Koyck, Distributed Lag, Autoregressive.
#split data
train<-o3[1:58,]
test<-o3[59:72,]
#data time series
train.ts<-ts(train)
test.ts<-ts(test)
data.ts<-ts(o3)
model.koyck <- koyckDlm(x = train$Xt, y = train$Yt)
summary(model.koyck)
##
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.112282 -0.366467 -0.002877 0.258311 1.106529
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.26260 0.93950 0.280 0.781
## Y.1 0.42943 0.08641 4.969 7.14e-06 ***
## X.t 0.25823 0.04259 6.064 1.35e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5239 on 54 degrees of freedom
## Multiple R-Squared: 0.9449, Adjusted R-squared: 0.9429
## Wald test: 417.1 on 2 and 54 DF, p-value: < 2.2e-16
##
## Diagnostic tests:
## NULL
##
## alpha beta phi
## Geometric coefficients: 0.460243 0.2582336 0.4294305
AIC(model.koyck)
## [1] 92.98106
BIC(model.koyck)
## [1] 101.1533
Interpretasi Hasil:
Dari summary
, kita dapatkan model:
\[ \hat{Y_t}=0.26260+0.25823X_t+0.42943Y_{t-1} \]
Koefisien \(X_t\) (0.25823) signifikan, artinya nilai $X$ saat ini berpengaruh terhadap \(Y\).
Koefisien \(Y_{t−1}\) (0.42943) signifikan. Hal ini menunjukkan bahwa Lag variabel dependen (Yt-1) = 0.4294, signifikan (p < 0.001) → AQI periode sebelumnya berpengaruh positif terhadap AQI periode sekarang
AIC = 92.98106, BIC = 101.1533
Berikut adalah hasil peramalan y untuk 5 periode kedepan menggunakan model koyck
fore.koyck <- forecast(model = model.koyck, x=test$Xt, h=14)
fore.koyck
## $forecasts
## [1] 22.51461 21.65971 20.92318 20.05278 19.58666 19.84824 20.51469 20.80088
## [9] 20.92378 21.43831 22.95219 24.71052 26.20441 27.21533
##
## $call
## forecast.koyckDlm(model = model.koyck, x = test$Xt, h = 14)
##
## attr(,"class")
## [1] "forecast.koyckDlm" "dLagM"
mape.koyck <- MAPE(fore.koyck$forecasts, test$Yt)
mape.koyck #data test
## [1] 0.02909543
#akurasi data training
GoF(model.koyck)
## n MAE MPE MAPE sMAPE MASE MSE
## model.koyck 57 0.39765 -0.0005611615 0.01494044 0.01491151 0.5986129 0.260024
## MRAE GMRAE
## model.koyck 545222624 5097.033
Interpretasi Hasil:
Akurasi pada data uji, MAPE (Mean Absolute Percentage Error) = 0.02909543 (≈ 2.9%) artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 2.9% < 10%. Ini menunjukkan bahwa model memiliki performa yang baik dalam memprediksi nilai AQI berdasarkan konsentrasi O3.
Akurasi pada data latih, MAPE = 0.0149(≈ 1.49%), artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 1.49%. Ini menunjukkan bahwa model memiliki performa yang sangat baik.
Kita akan menentukan Lag Optimum
#penentuan lag optimum
finiteDLMauto(formula = Yt ~ Xt,
data = data.frame(train), q.min = 1, q.max = 10,
model.type = "dlm", error.type = "AIC", trace = TRUE)
## q - k MASE AIC BIC GMRAE MBRAE R.Adj.Sq Ljung-Box
## 10 10 0.33647 24.10757 48.43318 10779.390 0.48008 0.98393 0.41122212
## 5 5 0.36542 27.02479 42.78712 6410.480 0.48192 0.98352 0.18919655
## 3 3 0.38312 27.10177 39.14577 5879.350 0.48128 0.98290 0.11147130
## 4 4 0.38276 27.96420 41.88709 7635.896 0.48443 0.98283 0.17399541
## 6 6 0.36302 29.64290 47.20409 4742.320 0.47166 0.98307 0.19759073
## 9 9 0.36482 30.09649 52.79833 8867.244 0.48588 0.98222 0.76300049
## 7 7 0.37392 30.57737 49.89563 5848.238 0.47755 0.98314 0.27764544
## 8 8 0.38196 31.33341 52.36567 8039.153 0.49778 0.98265 0.41731544
## 2 2 0.41082 47.04815 57.17490 5489.740 0.47425 0.97492 0.08786534
## 1 1 0.48746 91.76306 99.93527 5751.269 0.38586 0.94411 0.16242279
Berdasarkan output tersebut, lag optimum didapatkan ketika lag=10. Selanjutnya dilakukan pemodelan untuk lag=10
#model dlm dengan lag optimum
model.dlm <- dlm(x = train$Xt,y = train$Yt , q = 10)
summary(model.dlm)
##
## Call:
## lm(formula = model.formula, data = design)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48304 -0.17355 0.04636 0.17934 0.52554
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.64267 0.89957 -0.714 0.47958
## x.t 0.42814 0.06180 6.928 4.08e-08 ***
## x.1 0.13889 0.13339 1.041 0.30473
## x.2 -0.05734 0.12788 -0.448 0.65654
## x.3 -0.14994 0.12237 -1.225 0.22840
## x.4 0.15911 0.12493 1.274 0.21095
## x.5 -0.01335 0.12436 -0.107 0.91510
## x.6 -0.12709 0.12430 -1.022 0.31338
## x.7 0.27371 0.12253 2.234 0.03180 *
## x.8 -0.37459 0.12097 -3.097 0.00378 **
## x.9 0.28924 0.10409 2.779 0.00862 **
## x.10 -0.09537 0.04837 -1.971 0.05640 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2739 on 36 degrees of freedom
## Multiple R-squared: 0.9877, Adjusted R-squared: 0.9839
## F-statistic: 262.6 on 11 and 36 DF, p-value: < 2.2e-16
##
## AIC and BIC values for the model:
## AIC BIC
## 1 24.10757 48.43318
AIC(model.dlm)
## [1] 24.10757
BIC(model.dlm)
## [1] 48.43318
Dari hasil tersebut terdapat beberapa peubah yang berpengaruh signifikan terhadap taraf nyata 5% yaitu \(x_t\) , \(x_{t-7}\) , \(x_{t-8}\) , \(x_{t-9}\). Adapun keseluruhan model yang terbentuk adalah
\[ \hat{Y_t}=-0.64267+0.42814X_t+0.13889X_{t-1}+ ... -0.09537X_{t10} \] AIC = 24.10757, BIC = 48.43318
#peramalan dan akurasi
fore.dlm <- forecast(model = model.dlm, x=test$Xt, h=14)
#akurasi data test
mape.dlm<- MAPE(fore.dlm$forecasts, test$Yt)
mape.dlm
## [1] 0.0127254
#akurasi data training
GoF(model.dlm)
## n MAE MPE MAPE sMAPE MASE
## model.dlm 48 0.2004478 -7.414126e-05 0.007373085 0.007368538 0.336466
## MSE MRAE GMRAE
## model.dlm 0.05628638 896417348 10779.39
Interpretasi Hasil:
Akurasi pada data uji, MAPE (Mean Absolute Percentage Error) = 0.0127254 (≈ 1.27%) artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 1.27%. Ini menunjukkan bahwa model memiliki performa yang sangat baik dalam memprediksi nilai AQI berdasarkan konsentrasi O3.
Akurasi pada data latih, MAPE = 0.007373085 (≈ 0.737%), artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 0.737%. Ini menunjukkan bahwa model memiliki performa yang sangat baik.
Kita akan menentukan lag optimum
#penentuan lag optimum
model.ardl.opt <- ardlBoundOrders(data = data.frame(o3), ic = "AIC",
formula = Yt ~ Xt )
model.ardl.opt
## $p
## Xt
## 1 13
##
## $q
## [1] 2
##
## $Stat.table
## q = 1 q = 2 q = 3 q = 4 q = 5 q = 6 q = 7 q = 8
## p = 1 38.64746 23.22848 25.42302 27.72031 28.70926 26.39158 27.98723 30.39158
## p = 2 22.86594 24.77455 27.31646 28.69429 28.81115 25.64963 27.33061 30.08845
## p = 3 27.18085 27.18085 28.84077 30.37243 30.71504 25.55412 27.74318 30.55048
## p = 4 27.27918 28.33392 28.33392 27.22651 29.94853 27.52594 29.31474 32.19458
## p = 5 28.22371 28.68011 29.37749 29.37749 30.12962 28.60210 31.06991 33.54724
## p = 6 27.11336 28.13965 27.00217 28.68077 28.68077 30.47844 33.06989 35.19285
## p = 7 30.19026 30.92504 29.24074 30.23773 30.81010 30.81010 29.56882 27.97012
## p = 8 26.81548 26.88306 26.96639 27.84677 26.80343 28.40155 28.40155 29.27146
## p = 9 25.15641 25.64777 25.75789 27.48094 26.12645 27.83358 29.58776 29.58776
## p = 10 28.59615 28.86775 29.29778 31.09265 28.82957 30.63888 32.57903 34.34994
## p = 11 24.53224 23.37847 24.80335 26.48648 23.35632 25.20746 26.60810 26.81938
## p = 12 27.14281 25.70395 27.06234 28.84787 26.03408 27.92602 28.91965 28.33033
## p = 13 19.25858 16.55044 18.53543 20.33824 17.61125 19.60705 21.03414 21.71506
## p = 14 20.31747 18.36045 20.36004 22.34912 19.16521 21.06710 22.41853 23.18530
## p = 15 22.41646 20.32505 22.22270 24.14701 21.64522 23.25026 24.97916 26.17790
## q = 9 q = 10 q = 11 q = 12 q = 13 q = 14 q = 15
## p = 1 33.09252 33.18190 32.84415 33.19542 35.85663 30.71580 27.67083
## p = 2 32.80513 33.98671 32.34936 32.42167 35.04537 31.25200 27.25097
## p = 3 33.31925 34.28268 33.42041 33.94715 36.44309 32.48438 28.87461
## p = 4 34.95803 36.14844 35.27999 35.94335 38.39874 34.28870 30.56428
## p = 5 36.11311 37.41170 36.41377 37.18234 39.82841 36.17273 32.18350
## p = 6 35.88842 36.25737 35.13490 36.94569 39.67535 32.69633 29.29501
## p = 7 30.90878 33.89102 34.52801 36.37243 38.74818 31.23844 29.15791
## p = 8 31.44233 34.39275 26.01225 26.78247 27.92580 24.10080 23.75594
## p = 9 31.57509 33.98987 27.47278 26.92081 26.62574 21.91896 21.71187
## p = 10 34.34994 33.75383 24.46154 25.13892 26.98653 23.27262 23.65748
## p = 11 23.68447 23.68447 24.90968 27.13805 28.98636 25.18996 25.61717
## p = 12 25.59091 27.59091 27.59091 29.08999 29.84364 23.51211 22.55179
## p = 13 19.56880 19.27526 21.04212 21.04212 22.98266 23.87236 23.91841
## p = 14 22.81463 22.84285 24.84274 22.27955 22.27955 24.27951 25.89778
## p = 15 25.36264 24.86236 26.81668 26.73848 27.78117 27.78117 27.75060
##
## $min.Stat
## [1] 16.55044
min_p=c()
for(i in 1:10){ # cek sampai lag 10
min_p[i]=min(model.ardl.opt$Stat.table[[i]])
}
q_opt=which(min_p==min(min_p, na.rm = TRUE))
p_opt=which(model.ardl.opt$Stat.table[[q_opt]] ==
min(model.ardl.opt$Stat.table[[q_opt]], na.rm = TRUE))
data.frame("q_optimum" = q_opt, "p_optimum" = p_opt,
"AIC"=model.ardl.opt$min.Stat)
## q_optimum p_optimum AIC
## 1 2 13 16.55044
Dari tabel di atas, dapat terlihat bahwa nilai AIC terendah didapat
ketika \(p=6\) dan \(q=1\), yaitu sebesar 15.06976
.
Artinya, model autoregressive optimum didapat ketika \(p=2\) dan \(q=13\).
model.ardl <- ardlDlm(formula = Yt ~ Xt,
data = train,p = 13, q = 2)
summary(model.ardl)
##
## Time series regression with "ts" data:
## Start = 14, End = 58
##
## Call:
## dynlm(formula = as.formula(model.text), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41430 -0.19394 0.02836 0.16087 0.47584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.04456 1.06711 -0.979 0.3360
## Xt.t 0.40658 0.06938 5.860 2.67e-06 ***
## Xt.1 0.20835 0.18174 1.146 0.2613
## Xt.2 -0.09590 0.20134 -0.476 0.6376
## Xt.3 -0.13900 0.15910 -0.874 0.3897
## Xt.4 0.07385 0.15906 0.464 0.6460
## Xt.5 0.05911 0.14382 0.411 0.6842
## Xt.6 -0.15248 0.13442 -1.134 0.2663
## Xt.7 0.25536 0.13453 1.898 0.0680 .
## Xt.8 -0.28691 0.14185 -2.023 0.0528 .
## Xt.9 0.12287 0.15600 0.788 0.4375
## Xt.10 0.13887 0.15975 0.869 0.3921
## Xt.11 -0.20179 0.14670 -1.376 0.1799
## Xt.12 0.05814 0.12013 0.484 0.6322
## Xt.13 0.01800 0.05426 0.332 0.7426
## Yt.1 -0.11385 0.19128 -0.595 0.5565
## Yt.2 0.14225 0.18763 0.758 0.4547
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2851 on 28 degrees of freedom
## Multiple R-squared: 0.9892, Adjusted R-squared: 0.9831
## F-statistic: 160.6 on 16 and 28 DF, p-value: < 2.2e-16
AIC(model.ardl)
## [1] 29.42385
BIC(model.ardl)
## [1] 61.94377
Interpretasi Hasil:
fore.ardl <- forecast(model = model.ardl, x=test$Xt, h=14)
fore.ardl
## $forecasts
## [1] 21.77229 20.52108 20.02939 19.14999 18.73570 19.45331 20.86257 21.11623
## [9] 20.47743 21.43916 23.65285 26.32716 27.71046 27.92030
##
## $call
## forecast.ardlDlm(model = model.ardl, x = test$Xt, h = 14)
##
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"
mape.ardl <- MAPE(fore.ardl$forecasts, test$Yt)
mape.ardl
## [1] 0.01445912
#akurasi data training
GoF(model.ardl)
## n MAE MPE MAPE sMAPE MASE
## model.ardl 45 0.1934879 -6.779947e-05 0.00711318 0.007110808 0.3547279
## MSE MRAE GMRAE
## model.ardl 0.05058937 932166881 19290.28
Interpretasi Hasil:
Akurasi pada data uji, MAPE (Mean Absolute Percentage Error) = 0.01445912 (≈ 1.44%) artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 1.44%. Ini menunjukkan bahwa model memiliki performa yang sangat baik dalam memprediksi nilai AQI berdasarkan konsentrasi O3.
Akurasi pada data latih, MAPE = 0.0071 (≈ 0.71%), artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 0.75%. Ini menunjukkan bahwa model memiliki performa yang sangat baik.
akurasi <- matrix(c(mape.koyck, mape.dlm, mape.ardl))
row.names(akurasi)<- c("Koyck","DLM" ,"Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi
## MAPE
## Koyck 0.02909543
## DLM 0.01272540
## Autoregressive 0.01445912
par(mfrow=c(1,1))
plot(test$Xt, test$Yt, type="b", col="black",
ylim=range(c(test$Yt,
fore.koyck$forecasts,
fore.dlm$forecasts,
fore.ardl$forecasts)))
points(test$Xt, fore.koyck$forecasts, col="red")
lines(test$Xt, fore.koyck$forecasts, col="red")
points(test$Xt, fore.dlm$forecasts, col="blue")
lines(test$Xt, fore.dlm$forecasts, col="blue")
points(test$Xt, fore.ardl$forecasts, col="green")
lines(test$Xt, fore.ardl$forecasts, col="green")
legend("topleft", c("aktual", "koyck", "DLM", "autoregressive"),
lty=1, col=c("black","red","blue","green"), cex=0.8)
Interpretasi Hasil: