1 Load Library

library(dLagM)
## Warning: package 'dLagM' was built under R version 4.4.3
## Loading required package: nardl
## Warning: package 'nardl' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Loading required package: dynlm
## Warning: package 'dynlm' was built under R version 4.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(dynlm)
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.4.3
## 
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:dLagM':
## 
##     MAPE
## The following object is masked from 'package:base':
## 
##     Recall
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.2

2 Impor Data

library(rio)
## Warning: package 'rio' was built under R version 4.4.2
data_air_ql <- import("https://raw.githubusercontent.com/nyayunabila/mpdw/refs/heads/main/Pertemuan%203/NewDelhi_Air_quality.csv")

data_air_ql
##    V1  AQI       CO      datetime        no2       o3      pm10     pm25
## 1   0 30.2 198.6027 2022-10-21:18 0.04685717 55.78995 10.486722 5.637410
## 2   1 28.2 197.6013 2022-10-21:19 0.04645554 54.93164 10.719325 4.618169
## 3   2 26.6 198.6027 2022-10-21:20 0.04685717 54.64554 11.155578 3.520902
## 4   3 25.0 201.9405 2022-10-21:21 0.04819594 55.07469 11.116206 2.225919
## 5   4 26.0 205.2784 2022-10-21:22 0.04886533 55.78995 10.405250 1.979471
## 6   5 26.0 208.6163 2022-10-21:23 0.05020411 56.50520  9.402518 1.792677
## 7   6 26.0 208.6163 2022-10-22:00 0.05020411 55.78995  8.482822 1.752237
## 8   7 24.0 205.2784 2022-10-22:01 0.05020411 52.92892  7.586667 1.858459
## 9   8 23.0 200.2716 2022-10-22:02 0.04685717 50.06790  7.636587 2.208076
## 10  9 24.0 196.9338 2022-10-22:03 0.04417961 51.49841  8.487648 2.476681
## 11 10 25.0 198.6027 2022-10-22:04 0.04284084 53.64418  9.357371 2.585729
## 12 11 26.0 200.2716 2022-10-22:05 0.04049798 55.78995 10.338728 2.390081
## 13 12 27.0 203.6095 2022-10-22:06 0.04016329 59.36623 10.523314 2.086022
## 14 13 29.0 205.2784 2022-10-22:07 0.03882451 62.22725 10.635970 1.937047
## 15 14 29.0 205.2784 2022-10-22:08 0.03882451 62.94250  9.964803 1.835311
## 16 15 29.0 205.2784 2022-10-22:09 0.03882451 62.94250  9.214411 1.725189
## 17 16 29.0 203.6095 2022-10-22:10 0.03748573 62.22725  8.772117 1.640662
## 18 17 28.0 201.9405 2022-10-22:11 0.04250614 60.79674  8.753794 1.633373
## 19 18 27.0 200.2716 2022-10-22:12 0.05020411 59.36623  8.807467 1.636810
## 20 19 27.0 201.9405 2022-10-22:13 0.05622860 58.65097  8.774860 1.666231
## 21 20 27.0 201.9405 2022-10-22:14 0.05622860 58.65097  8.675813 1.690578
## 22 21 27.0 201.9405 2022-10-22:15 0.05488982 57.93572  8.915920 1.705802
## 23 22 27.0 200.2716 2022-10-22:16 0.05288166 57.93572  9.227945 1.732754
## 24 23 27.0 200.2716 2022-10-22:17 0.05221227 58.65097  9.491790 1.765387
## 25 24 27.0 200.2716 2022-10-22:18 0.05221227 59.36623  9.907828 1.827327
## 26 25 28.0 201.9405 2022-10-22:19 0.05221227 61.51199 10.095885 1.888416
## 27 26 29.0 203.6095 2022-10-22:20 0.05288166 63.65776  9.879244 1.892860
## 28 27 31.0 205.2784 2022-10-22:21 0.05221227 66.51878  9.503339 1.859564
## 29 28 31.0 206.9473 2022-10-22:22 0.05221227 67.94930  9.008138 1.829196
## 30 29 32.0 206.9473 2022-10-22:23 0.05154289 68.66455  8.729765 1.787945
## 31 30 32.0 205.2784 2022-10-23:00 0.05087350 68.66455  8.912433 1.796048
## 32 31 31.0 206.9473 2022-10-23:01 0.05488982 67.23404  9.337687 1.810690
## 33 32 31.0 206.9473 2022-10-23:02 0.06091431 66.51878  9.351754 1.826056
## 34 33 30.0 208.6163 2022-10-23:03 0.05555921 65.80353  8.640116 1.822618
## 35 34 30.0 206.9473 2022-10-23:04 0.04618778 64.37302  7.750329 1.819336
## 36 35 29.0 205.2784 2022-10-23:05 0.03815512 62.22725  6.815522 1.831419
## 37 36 27.0 203.6095 2022-10-23:06 0.03079185 59.36623  6.965347 1.929624
## 38 37 26.0 200.2716 2022-10-23:07 0.02577144 56.50520  7.022172 2.049507
## 39 38 25.0 200.2716 2022-10-23:08 0.02342858 55.07469  6.946332 2.152871
## 40 39 25.0 198.6027 2022-10-23:09 0.02376328 53.64418  6.834290 2.219349
## 41 40 24.0 196.9338 2022-10-23:10 0.02677552 52.92892  6.686746 2.248050
## 42 41 24.0 195.2648 2022-10-23:11 0.03146124 52.92892  7.593563 2.276788
## 43 42 25.0 195.2648 2022-10-23:12 0.03815512 53.64418  8.999838 2.334924
## 44 43 25.0 195.2648 2022-10-23:13 0.04351023 53.64418 11.022134 2.365020
## 45 44 25.0 196.9338 2022-10-23:14 0.04351023 54.35944 12.198974 2.366476
## 46 45 26.0 198.6027 2022-10-23:15 0.04484900 55.78995 11.969586 2.369681
## 47 46 26.0 198.6027 2022-10-23:16 0.04618778 57.22046  9.593794 2.339545
## 48 47 27.0 198.6027 2022-10-23:17 0.04551839 57.93572  7.763611 2.331504
## 49 48 27.0 198.6027 2022-10-23:18 0.04417961 58.65097  7.517936 2.324217
## 50 49 27.0 198.6027 2022-10-23:19 0.04484900 58.65097  8.041439 2.320275
## 51 50 27.0 200.2716 2022-10-23:20 0.04618778 59.36623  8.654108 2.283845
## 52 51 28.0 200.2716 2022-10-23:21 0.04752655 60.08148  8.514708 2.195546
## 53 52 28.0 200.2716 2022-10-23:22 0.04685717 60.08148  7.923108 2.066149
## 54 53 27.0 200.2716 2022-10-23:23 0.04618778 59.36623  7.097172 2.059171
## 55 54 27.0 198.6027 2022-10-24:00 0.04551839 58.65097  6.815780 2.072363
## 56 55 26.0 198.6027 2022-10-24:01 0.04752655 57.22046  7.661589 2.102422
## 57 56 25.0 198.6027 2022-10-24:02 0.04551839 54.35944  8.718614 2.230021
## 58 57 23.0 198.6027 2022-10-24:03 0.03715104 50.78316  8.457276 2.501037
## 59 58 22.0 196.9338 2022-10-24:04 0.02744491 47.92213  7.184991 2.750613
## 60 59 21.0 193.5959 2022-10-24:05 0.02125307 45.41874  6.950173 2.984422
## 61 60 20.0 193.5959 2022-10-24:06 0.01757144 43.98823  7.205668 3.065950
## 62 61 19.0 191.9270 2022-10-24:07 0.01573062 41.84246  7.898480 3.118563
## 63 62 19.0 191.9270 2022-10-24:08 0.01556327 41.48483  8.744758 3.090378
## 64 63 20.0 191.9270 2022-10-24:09 0.01807348 43.27297  9.623435 2.976335
## 65 64 21.0 191.9270 2022-10-24:10 0.02208981 45.41874 10.253539 2.846886
## 66 65 21.0 191.9270 2022-10-24:11 0.02577144 45.41874 10.703334 2.771884
## 67 66 21.0 191.9270 2022-10-24:12 0.03045716 45.41874 10.663057 2.730851
## 68 67 22.0 193.5959 2022-10-24:13 0.03547757 47.20688 10.423121 2.720472
## 69 68 24.0 195.2648 2022-10-24:14 0.03915921 52.21367 11.240661 2.713109
## 70 69 26.0 196.9338 2022-10-24:15 0.04417961 56.50520 12.098125 2.743044
## 71 70 27.0 198.6027 2022-10-24:16 0.04685717 59.36623 12.845977 2.780022
## 72 71 28.0 198.6027 2022-10-24:17 0.04752655 60.79674 12.583639 2.745875
##          so2     timestamp_local       timestamp_utc         ts
## 1  0.3874302 2022-10-21 23:00:00 2022-10-21 18:00:00 1666375200
## 2  0.4097819 2022-10-22 00:00:00 2022-10-21 19:00:00 1666378800
## 3  0.4023313 2022-10-22 01:00:00 2022-10-21 20:00:00 1666382400
## 4  0.3762543 2022-10-22 02:00:00 2022-10-21 21:00:00 1666386000
## 5  0.3390014 2022-10-22 03:00:00 2022-10-21 22:00:00 1666389600
## 6  0.3129244 2022-10-22 04:00:00 2022-10-21 23:00:00 1666393200
## 7  0.3129244 2022-10-22 05:00:00 2022-10-22 00:00:00 1666396800
## 8  0.3427267 2022-10-22 06:00:00 2022-10-22 01:00:00 1666400400
## 9  0.3948808 2022-10-22 07:00:00 2022-10-22 02:00:00 1666404000
## 10 0.4284084 2022-10-22 08:00:00 2022-10-22 03:00:00 1666407600
## 11 0.4284084 2022-10-22 09:00:00 2022-10-22 04:00:00 1666411200
## 12 0.3986061 2022-10-22 10:00:00 2022-10-22 05:00:00 1666414800
## 13 0.3501773 2022-10-22 11:00:00 2022-10-22 06:00:00 1666418400
## 14 0.3166497 2022-10-22 12:00:00 2022-10-22 07:00:00 1666422000
## 15 0.2980232 2022-10-22 13:00:00 2022-10-22 08:00:00 1666425600
## 16 0.2868474 2022-10-22 14:00:00 2022-10-22 09:00:00 1666429200
## 17 0.2831221 2022-10-22 15:00:00 2022-10-22 10:00:00 1666432800
## 18 0.2942979 2022-10-22 16:00:00 2022-10-22 11:00:00 1666436400
## 19 0.3017485 2022-10-22 17:00:00 2022-10-22 12:00:00 1666440000
## 20 0.3054738 2022-10-22 18:00:00 2022-10-22 13:00:00 1666443600
## 21 0.3091991 2022-10-22 19:00:00 2022-10-22 14:00:00 1666447200
## 22 0.3129244 2022-10-22 20:00:00 2022-10-22 15:00:00 1666450800
## 23 0.3203750 2022-10-22 21:00:00 2022-10-22 16:00:00 1666454400
## 24 0.3241003 2022-10-22 22:00:00 2022-10-22 17:00:00 1666458000
## 25 0.3278256 2022-10-22 23:00:00 2022-10-22 18:00:00 1666461600
## 26 0.3278256 2022-10-23 00:00:00 2022-10-22 19:00:00 1666465200
## 27 0.3203750 2022-10-23 01:00:00 2022-10-22 20:00:00 1666468800
## 28 0.3166497 2022-10-23 02:00:00 2022-10-22 21:00:00 1666472400
## 29 0.3129244 2022-10-23 03:00:00 2022-10-22 22:00:00 1666476000
## 30 0.3166497 2022-10-23 04:00:00 2022-10-22 23:00:00 1666479600
## 31 0.3166497 2022-10-23 05:00:00 2022-10-23 00:00:00 1666483200
## 32 0.3166497 2022-10-23 06:00:00 2022-10-23 01:00:00 1666486800
## 33 0.3203750 2022-10-23 07:00:00 2022-10-23 02:00:00 1666490400
## 34 0.3203750 2022-10-23 08:00:00 2022-10-23 03:00:00 1666494000
## 35 0.3241003 2022-10-23 09:00:00 2022-10-23 04:00:00 1666497600
## 36 0.3278256 2022-10-23 10:00:00 2022-10-23 05:00:00 1666501200
## 37 0.3390014 2022-10-23 11:00:00 2022-10-23 06:00:00 1666504800
## 38 0.3501773 2022-10-23 12:00:00 2022-10-23 07:00:00 1666508400
## 39 0.3576279 2022-10-23 13:00:00 2022-10-23 08:00:00 1666512000
## 40 0.3650784 2022-10-23 14:00:00 2022-10-23 09:00:00 1666515600
## 41 0.3725290 2022-10-23 15:00:00 2022-10-23 10:00:00 1666519200
## 42 0.3762543 2022-10-23 16:00:00 2022-10-23 11:00:00 1666522800
## 43 0.3762543 2022-10-23 17:00:00 2022-10-23 12:00:00 1666526400
## 44 0.3725290 2022-10-23 18:00:00 2022-10-23 13:00:00 1666530000
## 45 0.3725290 2022-10-23 19:00:00 2022-10-23 14:00:00 1666533600
## 46 0.3725290 2022-10-23 20:00:00 2022-10-23 15:00:00 1666537200
## 47 0.3799796 2022-10-23 21:00:00 2022-10-23 16:00:00 1666540800
## 48 0.3911555 2022-10-23 22:00:00 2022-10-23 17:00:00 1666544400
## 49 0.3986061 2022-10-23 23:00:00 2022-10-23 18:00:00 1666548000
## 50 0.3986061 2022-10-24 00:00:00 2022-10-23 19:00:00 1666551600
## 51 0.3874302 2022-10-24 01:00:00 2022-10-23 20:00:00 1666555200
## 52 0.3762543 2022-10-24 02:00:00 2022-10-23 21:00:00 1666558800
## 53 0.3650784 2022-10-24 03:00:00 2022-10-23 22:00:00 1666562400
## 54 0.3688037 2022-10-24 04:00:00 2022-10-23 23:00:00 1666566000
## 55 0.3762543 2022-10-24 05:00:00 2022-10-24 00:00:00 1666569600
## 56 0.3837049 2022-10-24 06:00:00 2022-10-24 01:00:00 1666573200
## 57 0.4023313 2022-10-24 07:00:00 2022-10-24 02:00:00 1666576800
## 58 0.4284084 2022-10-24 08:00:00 2022-10-24 03:00:00 1666580400
## 59 0.4433095 2022-10-24 09:00:00 2022-10-24 04:00:00 1666584000
## 60 0.4544854 2022-10-24 10:00:00 2022-10-24 05:00:00 1666587600
## 61 0.4544854 2022-10-24 11:00:00 2022-10-24 06:00:00 1666591200
## 62 0.4507601 2022-10-24 12:00:00 2022-10-24 07:00:00 1666594800
## 63 0.4395843 2022-10-24 13:00:00 2022-10-24 08:00:00 1666598400
## 64 0.4209578 2022-10-24 14:00:00 2022-10-24 09:00:00 1666602000
## 65 0.4097819 2022-10-24 15:00:00 2022-10-24 10:00:00 1666605600
## 66 0.4060566 2022-10-24 16:00:00 2022-10-24 11:00:00 1666609200
## 67 0.3986061 2022-10-24 17:00:00 2022-10-24 12:00:00 1666612800
## 68 0.3911555 2022-10-24 18:00:00 2022-10-24 13:00:00 1666616400
## 69 0.3837049 2022-10-24 19:00:00 2022-10-24 14:00:00 1666620000
## 70 0.3762543 2022-10-24 20:00:00 2022-10-24 15:00:00 1666623600
## 71 0.3725290 2022-10-24 21:00:00 2022-10-24 16:00:00 1666627200
## 72 0.3688037 2022-10-24 22:00:00 2022-10-24 17:00:00 1666630800

3 Eksplorasi Data

Untuk mengecek X mana yang akan kita gunakan, maka kita akan gunakan korelasi peubah X dengan AQI

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.2
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.2
## Warning: package 'lubridate' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some()   masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
colnames(data_air_ql)
##  [1] "V1"              "AQI"             "CO"              "datetime"       
##  [5] "no2"             "o3"              "pm10"            "pm25"           
##  [9] "so2"             "timestamp_local" "timestamp_utc"   "ts"
cor_values <- cor(data_air_ql %>% select(AQI, pm25, pm10, no2, so2, o3, CO), use = "complete.obs")
print(cor_values[1, -1]) 
##        pm25        pm10         no2         so2          o3          CO 
## -0.34665524  0.08257568  0.73765472 -0.74247813  0.97359901  0.79763320

Kita akan mencari X yang korelasinya tinggi dengan AQI, pada dataset ini, O3 memiliki korelasi yang tinggi, sehingga O3 adalah variabel paling cocok dipakai sebagai X tunggal untuk regresi lag.

4 Regresi Lag

4.1 data dengan X = o3 dan Y = AQI

o3 <- data_air_ql %>% select(AQI, o3) %>% na.omit()
o3 <- o3 %>% mutate(
             t = row_number(),
             Yt = AQI,
             Yt_1 = lag(Yt, 1),
             Xt = o3
             ) %>%
            select(t, Yt, Yt_1, Xt)

o3
##     t   Yt Yt_1       Xt
## 1   1 30.2   NA 55.78995
## 2   2 28.2 30.2 54.93164
## 3   3 26.6 28.2 54.64554
## 4   4 25.0 26.6 55.07469
## 5   5 26.0 25.0 55.78995
## 6   6 26.0 26.0 56.50520
## 7   7 26.0 26.0 55.78995
## 8   8 24.0 26.0 52.92892
## 9   9 23.0 24.0 50.06790
## 10 10 24.0 23.0 51.49841
## 11 11 25.0 24.0 53.64418
## 12 12 26.0 25.0 55.78995
## 13 13 27.0 26.0 59.36623
## 14 14 29.0 27.0 62.22725
## 15 15 29.0 29.0 62.94250
## 16 16 29.0 29.0 62.94250
## 17 17 29.0 29.0 62.22725
## 18 18 28.0 29.0 60.79674
## 19 19 27.0 28.0 59.36623
## 20 20 27.0 27.0 58.65097
## 21 21 27.0 27.0 58.65097
## 22 22 27.0 27.0 57.93572
## 23 23 27.0 27.0 57.93572
## 24 24 27.0 27.0 58.65097
## 25 25 27.0 27.0 59.36623
## 26 26 28.0 27.0 61.51199
## 27 27 29.0 28.0 63.65776
## 28 28 31.0 29.0 66.51878
## 29 29 31.0 31.0 67.94930
## 30 30 32.0 31.0 68.66455
## 31 31 32.0 32.0 68.66455
## 32 32 31.0 32.0 67.23404
## 33 33 31.0 31.0 66.51878
## 34 34 30.0 31.0 65.80353
## 35 35 30.0 30.0 64.37302
## 36 36 29.0 30.0 62.22725
## 37 37 27.0 29.0 59.36623
## 38 38 26.0 27.0 56.50520
## 39 39 25.0 26.0 55.07469
## 40 40 25.0 25.0 53.64418
## 41 41 24.0 25.0 52.92892
## 42 42 24.0 24.0 52.92892
## 43 43 25.0 24.0 53.64418
## 44 44 25.0 25.0 53.64418
## 45 45 25.0 25.0 54.35944
## 46 46 26.0 25.0 55.78995
## 47 47 26.0 26.0 57.22046
## 48 48 27.0 26.0 57.93572
## 49 49 27.0 27.0 58.65097
## 50 50 27.0 27.0 58.65097
## 51 51 27.0 27.0 59.36623
## 52 52 28.0 27.0 60.08148
## 53 53 28.0 28.0 60.08148
## 54 54 27.0 28.0 59.36623
## 55 55 27.0 27.0 58.65097
## 56 56 26.0 27.0 57.22046
## 57 57 25.0 26.0 54.35944
## 58 58 23.0 25.0 50.78316
## 59 59 22.0 23.0 47.92213
## 60 60 21.0 22.0 45.41874
## 61 61 20.0 21.0 43.98823
## 62 62 19.0 20.0 41.84246
## 63 63 19.0 19.0 41.48483
## 64 64 20.0 19.0 43.27297
## 65 65 21.0 20.0 45.41874
## 66 66 21.0 21.0 45.41874
## 67 67 21.0 21.0 45.41874
## 68 68 22.0 21.0 47.20688
## 69 69 24.0 22.0 52.21367
## 70 70 26.0 24.0 56.50520
## 71 71 27.0 26.0 59.36623
## 72 72 28.0 27.0 60.79674

4.2 Cek asumsi

library(tidyverse)

#cek linearitas
ggplot(o3, aes(x = Xt, y = Yt)) +
  geom_point(alpha = 0.5, color = "steelblue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Scatterplot AQI vs O3",
       x = "O3",
       y = "Air Quality Index (AQI)")
## `geom_smooth()` using formula = 'y ~ x'

modelo3 <- lm(Yt ~ Xt, data = o3)
#cek normalitas residual
shapiro.test(resid(modelo3))
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(modelo3)
## W = 0.51931, p-value = 5.517e-14
#uji independensi residual
dwtest(modelo3)
## 
##  Durbin-Watson test
## 
## data:  modelo3
## DW = 0.54636, p-value = 3.207e-14
## alternative hypothesis: true autocorrelation is greater than 0
#uji homoskedastisitas
bptest(modelo3)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo3
## BP = 0.039736, df = 1, p-value = 0.842

Dari hasil uji asumsi di atas dapat disimpulkan bahwa,

  1. Hubungan X dan Y linear
  2. Pada tes shapiro-wilk, p-value = 5.517e-14 (< 0.05), artinya asumsi normalitas tidak terpenuhi
  3. Pada tes durbin-watson, p-value = 3.207e-14 (< 0.05), artinya ada autokorelasi
  4. Pada tes breusch pagan, p-value = 0.8412 (> 0.05), artinya residual konstan

Untuk mengatasi autokorelasi, kita dapat menggunakan regresi lag. Akan digunakan 3 macam model, yaitu Koyck, Distributed Lag, Autoregressive.

4.3 Pembagian Data

#split data
train<-o3[1:58,]
test<-o3[59:72,]
#data time series
train.ts<-ts(train)
test.ts<-ts(test)
data.ts<-ts(o3)

4.4 Pemodelan

4.4.1 Model Koyck

model.koyck <- koyckDlm(x = train$Xt, y = train$Yt)
summary(model.koyck)
## 
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.112282 -0.366467 -0.002877  0.258311  1.106529 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.26260    0.93950   0.280    0.781    
## Y.1          0.42943    0.08641   4.969 7.14e-06 ***
## X.t          0.25823    0.04259   6.064 1.35e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5239 on 54 degrees of freedom
## Multiple R-Squared: 0.9449,  Adjusted R-squared: 0.9429 
## Wald test: 417.1 on 2 and 54 DF,  p-value: < 2.2e-16 
## 
## Diagnostic tests:
## NULL
## 
##                             alpha      beta       phi
## Geometric coefficients:  0.460243 0.2582336 0.4294305
AIC(model.koyck)
## [1] 92.98106
BIC(model.koyck)
## [1] 101.1533

Interpretasi Hasil:

  • Dari summary, kita dapatkan model:

    \[ \hat{Y_t}=0.26260+0.25823X_t+0.42943Y_{t-1} \]

    Koefisien \(X_t\) (0.25823) signifikan, artinya nilai $X$ saat ini berpengaruh terhadap \(Y\).

  • Koefisien \(Y_{t−1}\) (0.42943) signifikan. Hal ini menunjukkan bahwa Lag variabel dependen (Yt-1) = 0.4294, signifikan (p < 0.001) → AQI periode sebelumnya berpengaruh positif terhadap AQI periode sekarang

  • AIC = 92.98106, BIC = 101.1533

4.4.1.1 Peramalan dan Akurasi

Berikut adalah hasil peramalan y untuk 5 periode kedepan menggunakan model koyck

fore.koyck <- forecast(model = model.koyck, x=test$Xt, h=14)
fore.koyck
## $forecasts
##  [1] 22.51461 21.65971 20.92318 20.05278 19.58666 19.84824 20.51469 20.80088
##  [9] 20.92378 21.43831 22.95219 24.71052 26.20441 27.21533
## 
## $call
## forecast.koyckDlm(model = model.koyck, x = test$Xt, h = 14)
## 
## attr(,"class")
## [1] "forecast.koyckDlm" "dLagM"
mape.koyck <- MAPE(fore.koyck$forecasts, test$Yt)
mape.koyck #data test
## [1] 0.02909543
#akurasi data training
GoF(model.koyck)
##              n     MAE           MPE       MAPE      sMAPE      MASE      MSE
## model.koyck 57 0.39765 -0.0005611615 0.01494044 0.01491151 0.5986129 0.260024
##                  MRAE    GMRAE
## model.koyck 545222624 5097.033

Interpretasi Hasil:

  • Akurasi pada data uji, MAPE (Mean Absolute Percentage Error) = 0.02909543 (≈ 2.9%) artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 2.9% < 10%. Ini menunjukkan bahwa model memiliki performa yang baik dalam memprediksi nilai AQI berdasarkan konsentrasi O3.

  • Akurasi pada data latih, MAPE = 0.0149(≈ 1.49%), artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 1.49%. Ini menunjukkan bahwa model memiliki performa yang sangat baik.

4.4.2 Regression with Distributed Lag

Kita akan menentukan Lag Optimum

#penentuan lag optimum 
finiteDLMauto(formula = Yt ~ Xt,
              data = data.frame(train), q.min = 1, q.max = 10,
              model.type = "dlm", error.type = "AIC", trace = TRUE)
##    q - k    MASE      AIC      BIC     GMRAE   MBRAE R.Adj.Sq  Ljung-Box
## 10    10 0.33647 24.10757 48.43318 10779.390 0.48008  0.98393 0.41122212
## 5      5 0.36542 27.02479 42.78712  6410.480 0.48192  0.98352 0.18919655
## 3      3 0.38312 27.10177 39.14577  5879.350 0.48128  0.98290 0.11147130
## 4      4 0.38276 27.96420 41.88709  7635.896 0.48443  0.98283 0.17399541
## 6      6 0.36302 29.64290 47.20409  4742.320 0.47166  0.98307 0.19759073
## 9      9 0.36482 30.09649 52.79833  8867.244 0.48588  0.98222 0.76300049
## 7      7 0.37392 30.57737 49.89563  5848.238 0.47755  0.98314 0.27764544
## 8      8 0.38196 31.33341 52.36567  8039.153 0.49778  0.98265 0.41731544
## 2      2 0.41082 47.04815 57.17490  5489.740 0.47425  0.97492 0.08786534
## 1      1 0.48746 91.76306 99.93527  5751.269 0.38586  0.94411 0.16242279

Berdasarkan output tersebut, lag optimum didapatkan ketika lag=10. Selanjutnya dilakukan pemodelan untuk lag=10

#model dlm dengan lag optimum
model.dlm <- dlm(x = train$Xt,y = train$Yt , q = 10)
summary(model.dlm)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48304 -0.17355  0.04636  0.17934  0.52554 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.64267    0.89957  -0.714  0.47958    
## x.t          0.42814    0.06180   6.928 4.08e-08 ***
## x.1          0.13889    0.13339   1.041  0.30473    
## x.2         -0.05734    0.12788  -0.448  0.65654    
## x.3         -0.14994    0.12237  -1.225  0.22840    
## x.4          0.15911    0.12493   1.274  0.21095    
## x.5         -0.01335    0.12436  -0.107  0.91510    
## x.6         -0.12709    0.12430  -1.022  0.31338    
## x.7          0.27371    0.12253   2.234  0.03180 *  
## x.8         -0.37459    0.12097  -3.097  0.00378 ** 
## x.9          0.28924    0.10409   2.779  0.00862 ** 
## x.10        -0.09537    0.04837  -1.971  0.05640 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2739 on 36 degrees of freedom
## Multiple R-squared:  0.9877, Adjusted R-squared:  0.9839 
## F-statistic: 262.6 on 11 and 36 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##        AIC      BIC
## 1 24.10757 48.43318
AIC(model.dlm)
## [1] 24.10757
BIC(model.dlm)
## [1] 48.43318

Dari hasil tersebut terdapat beberapa peubah yang berpengaruh signifikan terhadap taraf nyata 5% yaitu \(x_t\) , \(x_{t-7}\) , \(x_{t-8}\) , \(x_{t-9}\). Adapun keseluruhan model yang terbentuk adalah

\[ \hat{Y_t}=-0.64267+0.42814X_t+0.13889X_{t-1}+ ... -0.09537X_{t10} \] AIC = 24.10757, BIC = 48.43318

4.4.2.1 Peramalan dan Akurasi
#peramalan dan akurasi
fore.dlm <- forecast(model = model.dlm, x=test$Xt, h=14)

#akurasi data test
mape.dlm<- MAPE(fore.dlm$forecasts, test$Yt)
mape.dlm
## [1] 0.0127254
#akurasi data training
GoF(model.dlm)
##            n       MAE           MPE        MAPE       sMAPE     MASE
## model.dlm 48 0.2004478 -7.414126e-05 0.007373085 0.007368538 0.336466
##                  MSE      MRAE    GMRAE
## model.dlm 0.05628638 896417348 10779.39

Interpretasi Hasil:

  • Akurasi pada data uji, MAPE (Mean Absolute Percentage Error) = 0.0127254 (≈ 1.27%) artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 1.27%. Ini menunjukkan bahwa model memiliki performa yang sangat baik dalam memprediksi nilai AQI berdasarkan konsentrasi O3.

  • Akurasi pada data latih, MAPE = 0.007373085 (≈ 0.737%), artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 0.737%. Ini menunjukkan bahwa model memiliki performa yang sangat baik.

4.4.3 Autoregressive Distributed Lag

Kita akan menentukan lag optimum

#penentuan lag optimum
model.ardl.opt <- ardlBoundOrders(data = data.frame(o3), ic = "AIC", 
                                  formula = Yt ~ Xt )
model.ardl.opt
## $p
##   Xt
## 1 13
## 
## $q
## [1] 2
## 
## $Stat.table
##           q = 1    q = 2    q = 3    q = 4    q = 5    q = 6    q = 7    q = 8
## p = 1  38.64746 23.22848 25.42302 27.72031 28.70926 26.39158 27.98723 30.39158
## p = 2  22.86594 24.77455 27.31646 28.69429 28.81115 25.64963 27.33061 30.08845
## p = 3  27.18085 27.18085 28.84077 30.37243 30.71504 25.55412 27.74318 30.55048
## p = 4  27.27918 28.33392 28.33392 27.22651 29.94853 27.52594 29.31474 32.19458
## p = 5  28.22371 28.68011 29.37749 29.37749 30.12962 28.60210 31.06991 33.54724
## p = 6  27.11336 28.13965 27.00217 28.68077 28.68077 30.47844 33.06989 35.19285
## p = 7  30.19026 30.92504 29.24074 30.23773 30.81010 30.81010 29.56882 27.97012
## p = 8  26.81548 26.88306 26.96639 27.84677 26.80343 28.40155 28.40155 29.27146
## p = 9  25.15641 25.64777 25.75789 27.48094 26.12645 27.83358 29.58776 29.58776
## p = 10 28.59615 28.86775 29.29778 31.09265 28.82957 30.63888 32.57903 34.34994
## p = 11 24.53224 23.37847 24.80335 26.48648 23.35632 25.20746 26.60810 26.81938
## p = 12 27.14281 25.70395 27.06234 28.84787 26.03408 27.92602 28.91965 28.33033
## p = 13 19.25858 16.55044 18.53543 20.33824 17.61125 19.60705 21.03414 21.71506
## p = 14 20.31747 18.36045 20.36004 22.34912 19.16521 21.06710 22.41853 23.18530
## p = 15 22.41646 20.32505 22.22270 24.14701 21.64522 23.25026 24.97916 26.17790
##           q = 9   q = 10   q = 11   q = 12   q = 13   q = 14   q = 15
## p = 1  33.09252 33.18190 32.84415 33.19542 35.85663 30.71580 27.67083
## p = 2  32.80513 33.98671 32.34936 32.42167 35.04537 31.25200 27.25097
## p = 3  33.31925 34.28268 33.42041 33.94715 36.44309 32.48438 28.87461
## p = 4  34.95803 36.14844 35.27999 35.94335 38.39874 34.28870 30.56428
## p = 5  36.11311 37.41170 36.41377 37.18234 39.82841 36.17273 32.18350
## p = 6  35.88842 36.25737 35.13490 36.94569 39.67535 32.69633 29.29501
## p = 7  30.90878 33.89102 34.52801 36.37243 38.74818 31.23844 29.15791
## p = 8  31.44233 34.39275 26.01225 26.78247 27.92580 24.10080 23.75594
## p = 9  31.57509 33.98987 27.47278 26.92081 26.62574 21.91896 21.71187
## p = 10 34.34994 33.75383 24.46154 25.13892 26.98653 23.27262 23.65748
## p = 11 23.68447 23.68447 24.90968 27.13805 28.98636 25.18996 25.61717
## p = 12 25.59091 27.59091 27.59091 29.08999 29.84364 23.51211 22.55179
## p = 13 19.56880 19.27526 21.04212 21.04212 22.98266 23.87236 23.91841
## p = 14 22.81463 22.84285 24.84274 22.27955 22.27955 24.27951 25.89778
## p = 15 25.36264 24.86236 26.81668 26.73848 27.78117 27.78117 27.75060
## 
## $min.Stat
## [1] 16.55044
min_p=c()
for(i in 1:10){ # cek sampai lag 10
  min_p[i]=min(model.ardl.opt$Stat.table[[i]])
}
q_opt=which(min_p==min(min_p, na.rm = TRUE))
p_opt=which(model.ardl.opt$Stat.table[[q_opt]] == 
              min(model.ardl.opt$Stat.table[[q_opt]], na.rm = TRUE))
data.frame("q_optimum" = q_opt, "p_optimum" = p_opt, 
           "AIC"=model.ardl.opt$min.Stat)
##   q_optimum p_optimum      AIC
## 1         2        13 16.55044

Dari tabel di atas, dapat terlihat bahwa nilai AIC terendah didapat ketika \(p=6\) dan \(q=1\), yaitu sebesar 15.06976. Artinya, model autoregressive optimum didapat ketika \(p=2\) dan \(q=13\).

model.ardl <- ardlDlm(formula = Yt ~ Xt, 
                         data = train,p = 13, q = 2)
summary(model.ardl)
## 
## Time series regression with "ts" data:
## Start = 14, End = 58
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41430 -0.19394  0.02836  0.16087  0.47584 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.04456    1.06711  -0.979   0.3360    
## Xt.t         0.40658    0.06938   5.860 2.67e-06 ***
## Xt.1         0.20835    0.18174   1.146   0.2613    
## Xt.2        -0.09590    0.20134  -0.476   0.6376    
## Xt.3        -0.13900    0.15910  -0.874   0.3897    
## Xt.4         0.07385    0.15906   0.464   0.6460    
## Xt.5         0.05911    0.14382   0.411   0.6842    
## Xt.6        -0.15248    0.13442  -1.134   0.2663    
## Xt.7         0.25536    0.13453   1.898   0.0680 .  
## Xt.8        -0.28691    0.14185  -2.023   0.0528 .  
## Xt.9         0.12287    0.15600   0.788   0.4375    
## Xt.10        0.13887    0.15975   0.869   0.3921    
## Xt.11       -0.20179    0.14670  -1.376   0.1799    
## Xt.12        0.05814    0.12013   0.484   0.6322    
## Xt.13        0.01800    0.05426   0.332   0.7426    
## Yt.1        -0.11385    0.19128  -0.595   0.5565    
## Yt.2         0.14225    0.18763   0.758   0.4547    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2851 on 28 degrees of freedom
## Multiple R-squared:  0.9892, Adjusted R-squared:  0.9831 
## F-statistic: 160.6 on 16 and 28 DF,  p-value: < 2.2e-16
AIC(model.ardl)
## [1] 29.42385
BIC(model.ardl)
## [1] 61.94377

Interpretasi Hasil:

  • Model ARDL menemukan bahwa pengaruh utama berasal dari O₃ saat ini (Xt.t, koefisien = 0.4066, p < 0.001) yang signifikan pada taraf 5%.
  • Nilai AQI sebelumnya (autoregresif Yt-1, Yt-2) tidak signifikan, artinya AQI lebih banyak dipengaruhi oleh kondisi O₃ dibandingkan “memori” AQI itu sendiri.
4.4.3.1 Peramalan dan Akurasi
fore.ardl <- forecast(model = model.ardl, x=test$Xt, h=14)
fore.ardl
## $forecasts
##  [1] 21.77229 20.52108 20.02939 19.14999 18.73570 19.45331 20.86257 21.11623
##  [9] 20.47743 21.43916 23.65285 26.32716 27.71046 27.92030
## 
## $call
## forecast.ardlDlm(model = model.ardl, x = test$Xt, h = 14)
## 
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"
mape.ardl <- MAPE(fore.ardl$forecasts, test$Yt)
mape.ardl
## [1] 0.01445912
#akurasi data training
GoF(model.ardl)
##             n       MAE           MPE       MAPE       sMAPE      MASE
## model.ardl 45 0.1934879 -6.779947e-05 0.00711318 0.007110808 0.3547279
##                   MSE      MRAE    GMRAE
## model.ardl 0.05058937 932166881 19290.28

Interpretasi Hasil:

  • Akurasi pada data uji, MAPE (Mean Absolute Percentage Error) = 0.01445912 (≈ 1.44%) artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 1.44%. Ini menunjukkan bahwa model memiliki performa yang sangat baik dalam memprediksi nilai AQI berdasarkan konsentrasi O3.

  • Akurasi pada data latih, MAPE = 0.0071 (≈ 0.71%), artinya rata-rata kesalahan prediksi model koyck terhadap data uji adalah sekitar 0.75%. Ini menunjukkan bahwa model memiliki performa yang sangat baik.

4.5 Perbandingan Model

akurasi <- matrix(c(mape.koyck, mape.dlm, mape.ardl))
row.names(akurasi)<- c("Koyck","DLM" ,"Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi
##                      MAPE
## Koyck          0.02909543
## DLM            0.01272540
## Autoregressive 0.01445912

4.6 Plot

par(mfrow=c(1,1))
plot(test$Xt, test$Yt, type="b", col="black",
     ylim=range(c(test$Yt,
                  fore.koyck$forecasts,
                  fore.dlm$forecasts,
                  fore.ardl$forecasts)))
points(test$Xt, fore.koyck$forecasts, col="red")
lines(test$Xt, fore.koyck$forecasts, col="red")
points(test$Xt, fore.dlm$forecasts, col="blue")
lines(test$Xt, fore.dlm$forecasts, col="blue")
points(test$Xt, fore.ardl$forecasts, col="green")
lines(test$Xt, fore.ardl$forecasts, col="green")
legend("topleft", c("aktual", "koyck", "DLM", "autoregressive"),
       lty=1, col=c("black","red","blue","green"), cex=0.8)

Interpretasi Hasil:

  • Dari grafik terlihat bahwa semua model mengikuti pola data aktual dengan sangat baik. Perbedaan antar model kecil sekali. Tetapi jika kita bandingkan nilai MAPE, model DLM memiliki nilai MAPE terendah (1.27%). Hal ini menunjukkan bahwa DLM (Distributed Lag Model) adalah model terbaik untuk data ini karena memiliki MAPE terkecil.