Input Data

data_udara<- read.csv("C:\\Users\\MUTHI'AH IFFA\\Downloads\\Semester 5\\MPDW\\Praktikum\\Minggu 3\\NewDelhi_Air_quality.csv", header = TRUE, sep = ",")
data_udara
##     X  AQI       CO      datetime        no2       o3      pm10     pm25
## 1   0 30.2 198.6027 2022-10-21:18 0.04685717 55.78995 10.486722 5.637410
## 2   1 28.2 197.6013 2022-10-21:19 0.04645554 54.93164 10.719325 4.618169
## 3   2 26.6 198.6027 2022-10-21:20 0.04685717 54.64554 11.155578 3.520902
## 4   3 25.0 201.9405 2022-10-21:21 0.04819594 55.07469 11.116206 2.225919
## 5   4 26.0 205.2784 2022-10-21:22 0.04886533 55.78995 10.405250 1.979471
## 6   5 26.0 208.6163 2022-10-21:23 0.05020411 56.50520  9.402518 1.792677
## 7   6 26.0 208.6163 2022-10-22:00 0.05020411 55.78995  8.482822 1.752237
## 8   7 24.0 205.2784 2022-10-22:01 0.05020411 52.92892  7.586667 1.858459
## 9   8 23.0 200.2716 2022-10-22:02 0.04685717 50.06790  7.636587 2.208076
## 10  9 24.0 196.9338 2022-10-22:03 0.04417961 51.49841  8.487648 2.476681
## 11 10 25.0 198.6027 2022-10-22:04 0.04284084 53.64418  9.357371 2.585729
## 12 11 26.0 200.2716 2022-10-22:05 0.04049798 55.78995 10.338728 2.390081
## 13 12 27.0 203.6095 2022-10-22:06 0.04016329 59.36623 10.523314 2.086022
## 14 13 29.0 205.2784 2022-10-22:07 0.03882451 62.22725 10.635970 1.937047
## 15 14 29.0 205.2784 2022-10-22:08 0.03882451 62.94250  9.964803 1.835311
## 16 15 29.0 205.2784 2022-10-22:09 0.03882451 62.94250  9.214411 1.725189
## 17 16 29.0 203.6095 2022-10-22:10 0.03748573 62.22725  8.772117 1.640662
## 18 17 28.0 201.9405 2022-10-22:11 0.04250614 60.79674  8.753794 1.633373
## 19 18 27.0 200.2716 2022-10-22:12 0.05020411 59.36623  8.807467 1.636810
## 20 19 27.0 201.9405 2022-10-22:13 0.05622860 58.65097  8.774860 1.666231
## 21 20 27.0 201.9405 2022-10-22:14 0.05622860 58.65097  8.675813 1.690578
## 22 21 27.0 201.9405 2022-10-22:15 0.05488982 57.93572  8.915920 1.705802
## 23 22 27.0 200.2716 2022-10-22:16 0.05288166 57.93572  9.227945 1.732754
## 24 23 27.0 200.2716 2022-10-22:17 0.05221227 58.65097  9.491790 1.765387
## 25 24 27.0 200.2716 2022-10-22:18 0.05221227 59.36623  9.907828 1.827327
## 26 25 28.0 201.9405 2022-10-22:19 0.05221227 61.51199 10.095885 1.888416
## 27 26 29.0 203.6095 2022-10-22:20 0.05288166 63.65776  9.879244 1.892860
## 28 27 31.0 205.2784 2022-10-22:21 0.05221227 66.51878  9.503339 1.859564
## 29 28 31.0 206.9473 2022-10-22:22 0.05221227 67.94930  9.008138 1.829196
## 30 29 32.0 206.9473 2022-10-22:23 0.05154289 68.66455  8.729765 1.787945
## 31 30 32.0 205.2784 2022-10-23:00 0.05087350 68.66455  8.912433 1.796048
## 32 31 31.0 206.9473 2022-10-23:01 0.05488982 67.23404  9.337687 1.810690
## 33 32 31.0 206.9473 2022-10-23:02 0.06091431 66.51878  9.351754 1.826056
## 34 33 30.0 208.6163 2022-10-23:03 0.05555921 65.80353  8.640116 1.822618
## 35 34 30.0 206.9473 2022-10-23:04 0.04618778 64.37302  7.750329 1.819336
## 36 35 29.0 205.2784 2022-10-23:05 0.03815512 62.22725  6.815522 1.831419
## 37 36 27.0 203.6095 2022-10-23:06 0.03079185 59.36623  6.965347 1.929624
## 38 37 26.0 200.2716 2022-10-23:07 0.02577144 56.50520  7.022172 2.049507
## 39 38 25.0 200.2716 2022-10-23:08 0.02342858 55.07469  6.946332 2.152871
## 40 39 25.0 198.6027 2022-10-23:09 0.02376328 53.64418  6.834290 2.219349
## 41 40 24.0 196.9338 2022-10-23:10 0.02677552 52.92892  6.686746 2.248050
## 42 41 24.0 195.2648 2022-10-23:11 0.03146124 52.92892  7.593563 2.276788
## 43 42 25.0 195.2648 2022-10-23:12 0.03815512 53.64418  8.999838 2.334924
## 44 43 25.0 195.2648 2022-10-23:13 0.04351023 53.64418 11.022134 2.365020
## 45 44 25.0 196.9338 2022-10-23:14 0.04351023 54.35944 12.198974 2.366476
## 46 45 26.0 198.6027 2022-10-23:15 0.04484900 55.78995 11.969586 2.369681
## 47 46 26.0 198.6027 2022-10-23:16 0.04618778 57.22046  9.593794 2.339545
## 48 47 27.0 198.6027 2022-10-23:17 0.04551839 57.93572  7.763611 2.331504
## 49 48 27.0 198.6027 2022-10-23:18 0.04417961 58.65097  7.517936 2.324217
## 50 49 27.0 198.6027 2022-10-23:19 0.04484900 58.65097  8.041439 2.320275
## 51 50 27.0 200.2716 2022-10-23:20 0.04618778 59.36623  8.654108 2.283845
## 52 51 28.0 200.2716 2022-10-23:21 0.04752655 60.08148  8.514708 2.195546
## 53 52 28.0 200.2716 2022-10-23:22 0.04685717 60.08148  7.923108 2.066149
## 54 53 27.0 200.2716 2022-10-23:23 0.04618778 59.36623  7.097172 2.059171
## 55 54 27.0 198.6027 2022-10-24:00 0.04551839 58.65097  6.815780 2.072363
## 56 55 26.0 198.6027 2022-10-24:01 0.04752655 57.22046  7.661589 2.102422
## 57 56 25.0 198.6027 2022-10-24:02 0.04551839 54.35944  8.718614 2.230021
## 58 57 23.0 198.6027 2022-10-24:03 0.03715104 50.78316  8.457276 2.501037
## 59 58 22.0 196.9338 2022-10-24:04 0.02744491 47.92213  7.184991 2.750613
## 60 59 21.0 193.5959 2022-10-24:05 0.02125307 45.41874  6.950173 2.984422
## 61 60 20.0 193.5959 2022-10-24:06 0.01757144 43.98823  7.205668 3.065950
## 62 61 19.0 191.9270 2022-10-24:07 0.01573062 41.84246  7.898480 3.118563
## 63 62 19.0 191.9270 2022-10-24:08 0.01556327 41.48483  8.744758 3.090378
## 64 63 20.0 191.9270 2022-10-24:09 0.01807348 43.27297  9.623435 2.976335
## 65 64 21.0 191.9270 2022-10-24:10 0.02208981 45.41874 10.253539 2.846886
## 66 65 21.0 191.9270 2022-10-24:11 0.02577144 45.41874 10.703334 2.771884
## 67 66 21.0 191.9270 2022-10-24:12 0.03045716 45.41874 10.663057 2.730851
## 68 67 22.0 193.5959 2022-10-24:13 0.03547757 47.20688 10.423121 2.720472
## 69 68 24.0 195.2648 2022-10-24:14 0.03915921 52.21367 11.240661 2.713109
## 70 69 26.0 196.9338 2022-10-24:15 0.04417961 56.50520 12.098125 2.743044
## 71 70 27.0 198.6027 2022-10-24:16 0.04685717 59.36623 12.845977 2.780022
## 72 71 28.0 198.6027 2022-10-24:17 0.04752655 60.79674 12.583639 2.745875
##          so2     timestamp_local       timestamp_utc         ts
## 1  0.3874302 2022-10-21T23:00:00 2022-10-21T18:00:00 1666375200
## 2  0.4097819 2022-10-22T00:00:00 2022-10-21T19:00:00 1666378800
## 3  0.4023313 2022-10-22T01:00:00 2022-10-21T20:00:00 1666382400
## 4  0.3762543 2022-10-22T02:00:00 2022-10-21T21:00:00 1666386000
## 5  0.3390014 2022-10-22T03:00:00 2022-10-21T22:00:00 1666389600
## 6  0.3129244 2022-10-22T04:00:00 2022-10-21T23:00:00 1666393200
## 7  0.3129244 2022-10-22T05:00:00 2022-10-22T00:00:00 1666396800
## 8  0.3427267 2022-10-22T06:00:00 2022-10-22T01:00:00 1666400400
## 9  0.3948808 2022-10-22T07:00:00 2022-10-22T02:00:00 1666404000
## 10 0.4284084 2022-10-22T08:00:00 2022-10-22T03:00:00 1666407600
## 11 0.4284084 2022-10-22T09:00:00 2022-10-22T04:00:00 1666411200
## 12 0.3986061 2022-10-22T10:00:00 2022-10-22T05:00:00 1666414800
## 13 0.3501773 2022-10-22T11:00:00 2022-10-22T06:00:00 1666418400
## 14 0.3166497 2022-10-22T12:00:00 2022-10-22T07:00:00 1666422000
## 15 0.2980232 2022-10-22T13:00:00 2022-10-22T08:00:00 1666425600
## 16 0.2868474 2022-10-22T14:00:00 2022-10-22T09:00:00 1666429200
## 17 0.2831221 2022-10-22T15:00:00 2022-10-22T10:00:00 1666432800
## 18 0.2942979 2022-10-22T16:00:00 2022-10-22T11:00:00 1666436400
## 19 0.3017485 2022-10-22T17:00:00 2022-10-22T12:00:00 1666440000
## 20 0.3054738 2022-10-22T18:00:00 2022-10-22T13:00:00 1666443600
## 21 0.3091991 2022-10-22T19:00:00 2022-10-22T14:00:00 1666447200
## 22 0.3129244 2022-10-22T20:00:00 2022-10-22T15:00:00 1666450800
## 23 0.3203750 2022-10-22T21:00:00 2022-10-22T16:00:00 1666454400
## 24 0.3241003 2022-10-22T22:00:00 2022-10-22T17:00:00 1666458000
## 25 0.3278256 2022-10-22T23:00:00 2022-10-22T18:00:00 1666461600
## 26 0.3278256 2022-10-23T00:00:00 2022-10-22T19:00:00 1666465200
## 27 0.3203750 2022-10-23T01:00:00 2022-10-22T20:00:00 1666468800
## 28 0.3166497 2022-10-23T02:00:00 2022-10-22T21:00:00 1666472400
## 29 0.3129244 2022-10-23T03:00:00 2022-10-22T22:00:00 1666476000
## 30 0.3166497 2022-10-23T04:00:00 2022-10-22T23:00:00 1666479600
## 31 0.3166497 2022-10-23T05:00:00 2022-10-23T00:00:00 1666483200
## 32 0.3166497 2022-10-23T06:00:00 2022-10-23T01:00:00 1666486800
## 33 0.3203750 2022-10-23T07:00:00 2022-10-23T02:00:00 1666490400
## 34 0.3203750 2022-10-23T08:00:00 2022-10-23T03:00:00 1666494000
## 35 0.3241003 2022-10-23T09:00:00 2022-10-23T04:00:00 1666497600
## 36 0.3278256 2022-10-23T10:00:00 2022-10-23T05:00:00 1666501200
## 37 0.3390014 2022-10-23T11:00:00 2022-10-23T06:00:00 1666504800
## 38 0.3501773 2022-10-23T12:00:00 2022-10-23T07:00:00 1666508400
## 39 0.3576279 2022-10-23T13:00:00 2022-10-23T08:00:00 1666512000
## 40 0.3650784 2022-10-23T14:00:00 2022-10-23T09:00:00 1666515600
## 41 0.3725290 2022-10-23T15:00:00 2022-10-23T10:00:00 1666519200
## 42 0.3762543 2022-10-23T16:00:00 2022-10-23T11:00:00 1666522800
## 43 0.3762543 2022-10-23T17:00:00 2022-10-23T12:00:00 1666526400
## 44 0.3725290 2022-10-23T18:00:00 2022-10-23T13:00:00 1666530000
## 45 0.3725290 2022-10-23T19:00:00 2022-10-23T14:00:00 1666533600
## 46 0.3725290 2022-10-23T20:00:00 2022-10-23T15:00:00 1666537200
## 47 0.3799796 2022-10-23T21:00:00 2022-10-23T16:00:00 1666540800
## 48 0.3911555 2022-10-23T22:00:00 2022-10-23T17:00:00 1666544400
## 49 0.3986061 2022-10-23T23:00:00 2022-10-23T18:00:00 1666548000
## 50 0.3986061 2022-10-24T00:00:00 2022-10-23T19:00:00 1666551600
## 51 0.3874302 2022-10-24T01:00:00 2022-10-23T20:00:00 1666555200
## 52 0.3762543 2022-10-24T02:00:00 2022-10-23T21:00:00 1666558800
## 53 0.3650784 2022-10-24T03:00:00 2022-10-23T22:00:00 1666562400
## 54 0.3688037 2022-10-24T04:00:00 2022-10-23T23:00:00 1666566000
## 55 0.3762543 2022-10-24T05:00:00 2022-10-24T00:00:00 1666569600
## 56 0.3837049 2022-10-24T06:00:00 2022-10-24T01:00:00 1666573200
## 57 0.4023313 2022-10-24T07:00:00 2022-10-24T02:00:00 1666576800
## 58 0.4284084 2022-10-24T08:00:00 2022-10-24T03:00:00 1666580400
## 59 0.4433095 2022-10-24T09:00:00 2022-10-24T04:00:00 1666584000
## 60 0.4544854 2022-10-24T10:00:00 2022-10-24T05:00:00 1666587600
## 61 0.4544854 2022-10-24T11:00:00 2022-10-24T06:00:00 1666591200
## 62 0.4507601 2022-10-24T12:00:00 2022-10-24T07:00:00 1666594800
## 63 0.4395843 2022-10-24T13:00:00 2022-10-24T08:00:00 1666598400
## 64 0.4209578 2022-10-24T14:00:00 2022-10-24T09:00:00 1666602000
## 65 0.4097819 2022-10-24T15:00:00 2022-10-24T10:00:00 1666605600
## 66 0.4060566 2022-10-24T16:00:00 2022-10-24T11:00:00 1666609200
## 67 0.3986061 2022-10-24T17:00:00 2022-10-24T12:00:00 1666612800
## 68 0.3911555 2022-10-24T18:00:00 2022-10-24T13:00:00 1666616400
## 69 0.3837049 2022-10-24T19:00:00 2022-10-24T14:00:00 1666620000
## 70 0.3762543 2022-10-24T20:00:00 2022-10-24T15:00:00 1666623600
## 71 0.3725290 2022-10-24T21:00:00 2022-10-24T16:00:00 1666627200
## 72 0.3688037 2022-10-24T22:00:00 2022-10-24T17:00:00 1666630800

Persiapan Data

# Install package jika belum ada install.packages(c("ggplot2","tseries","dynlm","dLagM","lmtest","forecast")) 

# Load library
library(ggplot2)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(dynlm)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(dLagM)
## Loading required package: nardl
library(lmtest)
library(forecast)
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:dLagM':
## 
##     forecast
library(xts)
library(forecast)
library(zoo)
library(car)
## Loading required package: carData
library(dynlm)
library(ARDL)
## To cite the ARDL package in publications:
## 
## Use this reference to refer to the validity of the ARDL package.
## 
##   Natsiopoulos, Kleanthis, and Tzeremes, Nickolaos G. (2022). ARDL
##   bounds test for cointegration: Replicating the Pesaran et al. (2001)
##   results for the UK earnings equation using R. Journal of Applied
##   Econometrics, 37(5), 1079-1090. https://doi.org/10.1002/jae.2919
## 
## Use this reference to cite this specific version of the ARDL package.
## 
##   Kleanthis Natsiopoulos and Nickolaos Tzeremes (2023). ARDL: ARDL, ECM
##   and Bounds-Test for Cointegration. R package version 0.2.4.
##   https://CRAN.R-project.org/package=ARDL

Eksplorasi Data

str(data_udara)
## 'data.frame':    72 obs. of  12 variables:
##  $ X              : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ AQI            : num  30.2 28.2 26.6 25 26 26 26 24 23 24 ...
##  $ CO             : num  199 198 199 202 205 ...
##  $ datetime       : chr  "2022-10-21:18" "2022-10-21:19" "2022-10-21:20" "2022-10-21:21" ...
##  $ no2            : num  0.0469 0.0465 0.0469 0.0482 0.0489 ...
##  $ o3             : num  55.8 54.9 54.6 55.1 55.8 ...
##  $ pm10           : num  10.5 10.7 11.2 11.1 10.4 ...
##  $ pm25           : num  5.64 4.62 3.52 2.23 1.98 ...
##  $ so2            : num  0.387 0.41 0.402 0.376 0.339 ...
##  $ timestamp_local: chr  "2022-10-21T23:00:00" "2022-10-22T00:00:00" "2022-10-22T01:00:00" "2022-10-22T02:00:00" ...
##  $ timestamp_utc  : chr  "2022-10-21T18:00:00" "2022-10-21T19:00:00" "2022-10-21T20:00:00" "2022-10-21T21:00:00" ...
##  $ ts             : int  1666375200 1666378800 1666382400 1666386000 1666389600 1666393200 1666396800 1666400400 1666404000 1666407600 ...
# Buat data subset hanya AQI dan O3
aqi_o3 = data.frame(
  datetime = as.POSIXct(data_udara$ts, origin="1970-01-01", tz="UTC"),
  AQI = data_udara$AQI,
  O3  = data_udara$o3
)
aqi_o3
##               datetime  AQI       O3
## 1  2022-10-21 18:00:00 30.2 55.78995
## 2  2022-10-21 19:00:00 28.2 54.93164
## 3  2022-10-21 20:00:00 26.6 54.64554
## 4  2022-10-21 21:00:00 25.0 55.07469
## 5  2022-10-21 22:00:00 26.0 55.78995
## 6  2022-10-21 23:00:00 26.0 56.50520
## 7  2022-10-22 00:00:00 26.0 55.78995
## 8  2022-10-22 01:00:00 24.0 52.92892
## 9  2022-10-22 02:00:00 23.0 50.06790
## 10 2022-10-22 03:00:00 24.0 51.49841
## 11 2022-10-22 04:00:00 25.0 53.64418
## 12 2022-10-22 05:00:00 26.0 55.78995
## 13 2022-10-22 06:00:00 27.0 59.36623
## 14 2022-10-22 07:00:00 29.0 62.22725
## 15 2022-10-22 08:00:00 29.0 62.94250
## 16 2022-10-22 09:00:00 29.0 62.94250
## 17 2022-10-22 10:00:00 29.0 62.22725
## 18 2022-10-22 11:00:00 28.0 60.79674
## 19 2022-10-22 12:00:00 27.0 59.36623
## 20 2022-10-22 13:00:00 27.0 58.65097
## 21 2022-10-22 14:00:00 27.0 58.65097
## 22 2022-10-22 15:00:00 27.0 57.93572
## 23 2022-10-22 16:00:00 27.0 57.93572
## 24 2022-10-22 17:00:00 27.0 58.65097
## 25 2022-10-22 18:00:00 27.0 59.36623
## 26 2022-10-22 19:00:00 28.0 61.51199
## 27 2022-10-22 20:00:00 29.0 63.65776
## 28 2022-10-22 21:00:00 31.0 66.51878
## 29 2022-10-22 22:00:00 31.0 67.94930
## 30 2022-10-22 23:00:00 32.0 68.66455
## 31 2022-10-23 00:00:00 32.0 68.66455
## 32 2022-10-23 01:00:00 31.0 67.23404
## 33 2022-10-23 02:00:00 31.0 66.51878
## 34 2022-10-23 03:00:00 30.0 65.80353
## 35 2022-10-23 04:00:00 30.0 64.37302
## 36 2022-10-23 05:00:00 29.0 62.22725
## 37 2022-10-23 06:00:00 27.0 59.36623
## 38 2022-10-23 07:00:00 26.0 56.50520
## 39 2022-10-23 08:00:00 25.0 55.07469
## 40 2022-10-23 09:00:00 25.0 53.64418
## 41 2022-10-23 10:00:00 24.0 52.92892
## 42 2022-10-23 11:00:00 24.0 52.92892
## 43 2022-10-23 12:00:00 25.0 53.64418
## 44 2022-10-23 13:00:00 25.0 53.64418
## 45 2022-10-23 14:00:00 25.0 54.35944
## 46 2022-10-23 15:00:00 26.0 55.78995
## 47 2022-10-23 16:00:00 26.0 57.22046
## 48 2022-10-23 17:00:00 27.0 57.93572
## 49 2022-10-23 18:00:00 27.0 58.65097
## 50 2022-10-23 19:00:00 27.0 58.65097
## 51 2022-10-23 20:00:00 27.0 59.36623
## 52 2022-10-23 21:00:00 28.0 60.08148
## 53 2022-10-23 22:00:00 28.0 60.08148
## 54 2022-10-23 23:00:00 27.0 59.36623
## 55 2022-10-24 00:00:00 27.0 58.65097
## 56 2022-10-24 01:00:00 26.0 57.22046
## 57 2022-10-24 02:00:00 25.0 54.35944
## 58 2022-10-24 03:00:00 23.0 50.78316
## 59 2022-10-24 04:00:00 22.0 47.92213
## 60 2022-10-24 05:00:00 21.0 45.41874
## 61 2022-10-24 06:00:00 20.0 43.98823
## 62 2022-10-24 07:00:00 19.0 41.84246
## 63 2022-10-24 08:00:00 19.0 41.48483
## 64 2022-10-24 09:00:00 20.0 43.27297
## 65 2022-10-24 10:00:00 21.0 45.41874
## 66 2022-10-24 11:00:00 21.0 45.41874
## 67 2022-10-24 12:00:00 21.0 45.41874
## 68 2022-10-24 13:00:00 22.0 47.20688
## 69 2022-10-24 14:00:00 24.0 52.21367
## 70 2022-10-24 15:00:00 26.0 56.50520
## 71 2022-10-24 16:00:00 27.0 59.36623
## 72 2022-10-24 17:00:00 28.0 60.79674

Dalam tugas ini akan diambil peubah Y adalah nilai AQI dan peubah x nya adalah O3 (ozon). Meski ozon berperan sebagai pelindung di atmosfer bumi, namun ozon akan menjadi berbahaya jika berada di dalam tanah. Karena ozon di dalam tanah terbentuk dari Nitrogen Oksida (NOx) serta senyawa organik volatil (VOC) yang berasal dari pembakaran bahan bakar fosil serta aktivitas manusia. Unsur-unsur ini besifat reakti, mudah bereaksi dengan bahan lain dan bersifat korosif dan berbau menyengat. Sehingga akan berbahaya bila terhirup i konsentrasi tinggi terutama di dekat permukaan, termasuk di dalam tanah.

library(performance)
library(see)

# Fit model linear
model = lm(AQI ~ O3, data = aqi_o3)

# Buat diagnostic plot
check_model(model)

# 1. Plot Time Series AQI
ggplot(aqi_o3, aes(x = datetime, y = AQI)) +
  geom_line(color = "blue", linewidth = 1) +
  labs(title = "Time Series AQI", x = "Waktu", y = "AQI") +
  theme_minimal()

# 2. Plot Time Series O3
ggplot(aqi_o3, aes(x = datetime, y = O3)) +
  geom_line(color = "red", linewidth = 1) +
  labs(title = "Time Series O3", x = "Waktu", y = "O3") +
  theme_minimal()

# 3. Gabungan AQI & O3
ggplot(aqi_o3, aes(x = datetime)) +
  geom_line(aes(y = AQI, color = "AQI"), linewidth = 1) +
  geom_line(aes(y = O3, color = "O3"), linewidth = 1) +
  labs(title = "Time Series AQI dan O3", x = "Waktu", y = "Nilai") +
  scale_color_manual(values = c("AQI" = "blue", "O3" = "red")) +
  theme_minimal()

# 4. Scatterplot Hubungan AQI vs O3
ggplot(aqi_o3, aes(x = O3, y = AQI)) +
  geom_point(color = "black", size = 2, alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Scatterplot Hubungan AQI vs O3",
       x = "O3", y = "AQI") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Nilai p-value > 0.05 yaitu, 2.2e-16 artinya ada hubungan yang signifikan antara AQI dengan O3

Dari kedua nilai di atas, menunjukkan bahwa adanya hubungan linear yang cukup kuat dan signifikan antara kadar O3 dengan nilai AQI. Artinya, variasi O3 dapat menjelaskan perubahan dalma nilai AQi.

Statistik Deskriptif

summary(subset(aqi_o3, select = -datetime))
##       AQI              O3       
##  Min.   :19.00   Min.   :41.48  
##  1st Qu.:25.00   1st Qu.:53.64  
##  Median :27.00   Median :57.22  
##  Mean   :26.18   Mean   :56.57  
##  3rd Qu.:28.00   3rd Qu.:60.08  
##  Max.   :32.00   Max.   :68.66

Distribusi Data

# Histogram
ggplot(aqi_o3, aes(x = AQI)) +
  geom_histogram(bins = 15, fill="blue", alpha=0.6) +
  labs(title="Distribusi AQI") +
  theme_minimal()

ggplot(aqi_o3, aes(x = O3)) +
  geom_histogram(bins = 15, fill="red", alpha=0.6) +
  labs(title="Distribusi O3") +
  theme_minimal()

# Boxplot (cek outlier)
ggplot(aqi_o3, aes(y = AQI)) +
  geom_boxplot(fill="skyblue") +
  labs(title="Boxplot AQI") +
  theme_minimal()

ggplot(aqi_o3, aes(y = O3)) +
  geom_boxplot(fill="orange") +
  labs(title="Boxplot O3") +
  theme_minimal()

Autokorelasi (ACF)

# Cek asumsi autokorelasi dengan ACF
acf(aqi_o3$AQI, main="ACF AQI")

acf(aqi_o3$O3, main="ACF O3")

Baik ACF untuk O3 dan AQI keduanya terindikasi autokorelasi

Lag Scatterplot

# misal lag 1 untuk O3
lag1_O3 <- stats::lag(aqi_o3$O3, -1)
plot(lag1_O3, aqi_o3$AQI,
     main="AQI vs O3 lag 1",
     xlab="O3 (t-1)", ylab="AQI",
     pch=19, col="forestgreen")

Berdasarkan scatterplot AQI vs O3 adalah linear

Boxplot

# Boxplot Distribusi AQI (dengan boxplot per jam)
aqi_o3$hour <- format(aqi_o3$datetime, "%H")

ggplot(aqi_o3, aes(x = hour, y = AQI)) +
  geom_boxplot(fill="red") +
  labs(title="Distribusi AQI per Jam") +
  theme_minimal()

# Boxplot Distribusi O3 (dengan boxplot per jam)
ggplot(aqi_o3, aes(x = hour, y = O3)) +
  geom_boxplot(fill="blue") +
  labs(title="Distribusi O3 per jam") +
  theme_minimal()

Data AQI dan O3 yang tercatat perjam cenderung memiliki fluktuasi yang tajam terutama id malam hari. Bisa jadi hal ini disebabkan oleh aktivitas manusia dan dinamika atmosfer harian.

Cek Seasonal

# Ubah data jadi ts (misal datanya harian dengan periode mingguan = 7)
aqi_ts <- ts(aqi_o3$AQI, frequency = 7)

# Decompose
decomp <- decompose(aqi_ts, type = "additive")
plot(decomp)

# Pola musiman AQI 
aqi_ts <- ts(aqi_o3$AQI, frequency = 12) # contoh bulanan
ggseasonplot(aqi_ts, year.labels = TRUE, main = "Seasonal Plot AQI")
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • na.rm = TRUE
## ℹ Did you misspell an argument name?

Plot di atas adalah pola musiman bulanan untuk beberapa tahun. Plot tersebut menunjukkan bahwa terindikasi adanya musiman yang di awal tahun nilai AQI meningkat, kemudian di pertengahan tahun nilai AQI memuncak dan mencapa titik tertinggi, dan di akhir tahun menurun kembali.

# Hitung rolling variance (window 12, misal bulanan)
aqi_var <- rollapply(aqi_o3$AQI, width=12, FUN=var, fill=NA, align="right")
o3_var  <- rollapply(aqi_o3$O3,  width=12, FUN=var, fill=NA, align="right")
# Plot rolling variance
plot(o3_var,  type="l", col="red",  main="Rolling Variance O3", ylab="Variance", xlab="Index")

# Plot rolling variance
plot(aqi_var, type="l", col="blue", main="Rolling Variance AQI", ylab="Variance", xlab="Index")

Cek Asumsi

H0: Data tidak stationer (ada unit root)

H1: Data stationer

p-value < 0.05 tolak H0

# Regresi linear sederhana
model = lm(AQI ~ O3, data = aqi_o3)
summary(model)
## 
## Call:
## lm(formula = AQI ~ O3, data = aqi_o3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5039 -0.4436 -0.1381  0.1810  4.3810 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.02128    0.73771   0.029    0.977    
## O3           0.46241    0.01296  35.685   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7022 on 70 degrees of freedom
## Multiple R-squared:  0.9479, Adjusted R-squared:  0.9472 
## F-statistic:  1273 on 1 and 70 DF,  p-value: < 2.2e-16
# Uji stationer 
# Konversi ke time series
aqi_ts <- ts(aqi_o3$AQI, frequency = 12)
o3_ts  <- ts(aqi_o3$O3, frequency = 12)

# Uji stasioneritas dengan ADF
adf_aqi = adf.test(na.omit(aqi_ts))
adf_o3 = adf.test(na.omit(o3_ts))

adf_aqi
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(aqi_ts)
## Dickey-Fuller = -3.4572, Lag order = 4, p-value = 0.05349
## alternative hypothesis: stationary
adf_o3
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(o3_ts)
## Dickey-Fuller = -3.0518, Lag order = 4, p-value = 0.1471
## alternative hypothesis: stationary
if(adf_aqi$p.value < 0.05){
  cat("AQI stasioner (p =", adf_aqi$p.value, ")\n")
} else {
  cat("AQI TIDAK stasioner (p =", adf_aqi$p.value, ")\n")
}
## AQI TIDAK stasioner (p = 0.05349047 )
if(adf_o3$p.value < 0.05){
  cat("O3 stasioner (p =", adf_o3$p.value, ")\n")
} else {
  cat("O3 TIDAK stasioner (p =", adf_o3$p.value, ")\n")
}
## O3 TIDAK stasioner (p = 0.1470529 )
# Uji Heteroskeditas
bp = bptest(model)
bp 
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 0.039736, df = 1, p-value = 0.842
if(bp$p.value < 0.05){
  cat("Heteroskedastisitas TERDETEKSI (p =", bp$p.value, ")\n")
} else {
  cat("Tidak ada heteroskedastisitas (p =", bp$p.value, ")\n")
}
## Tidak ada heteroskedastisitas (p = 0.8419971 )
dw = dwtest(model)
dw
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 0.54636, p-value = 3.207e-14
## alternative hypothesis: true autocorrelation is greater than 0
if(dw$p.value < 0.05){
  cat("Autokorelasi TERDETEKSI (p =", dw$p.value, ")\n")
} else {
  cat("Tidak ada autokorelasi (p =", dw$p.value, ")\n")
}
## Autokorelasi TERDETEKSI (p = 3.20666e-14 )

Transformasi peubah X dan Y

# Transformasi log 
aqi.log = log(aqi_o3$AQI)
o3.log  = log(aqi_o3$O3)

# Uji stationer
model.log = lm(aqi.log~ o3.log, data = aqi_o3)
summary(model.log)
## 
## Call:
## lm(formula = aqi.log ~ o3.log, data = aqi_o3)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.020615 -0.015948 -0.005027  0.006839  0.157162 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.7941     0.1044  -7.606 9.69e-11 ***
## o3.log        1.0058     0.0259  38.829  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02579 on 70 degrees of freedom
## Multiple R-squared:  0.9556, Adjusted R-squared:  0.955 
## F-statistic:  1508 on 1 and 70 DF,  p-value: < 2.2e-16

Uji asumsi ulang

# Uji Stationer
adf.aqi2 = adf.test(na.omit(aqi.log))
adf.o32 = adf.test(na.omit(o3.log))
adf.aqi2
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(aqi.log)
## Dickey-Fuller = -3.6232, Lag order = 4, p-value = 0.03763
## alternative hypothesis: stationary
adf.o32
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(o3.log)
## Dickey-Fuller = -3.2523, Lag order = 4, p-value = 0.08638
## alternative hypothesis: stationary
cat("Stasioner logAQI:", ifelse(adf.aqi2$p.value < 0.05, "✅ Stasioner", "❌ Tidak Stasioner"),
    "(p =", adf.aqi2$p.value, ")\n")
## Stasioner logAQI: ✅ Stasioner (p = 0.03763491 )
cat("Stasioner logO3 :", ifelse(adf.o32$p.value < 0.05, "✅ Stasioner", "❌ Tidak Stasioner"),
    "(p =", adf.o32$p.value, ")\n")
## Stasioner logO3 : ❌ Tidak Stasioner (p = 0.08637699 )
# Uji heteroskeditas
bp2 = bptest(model.log)
cat("Heteroskedastisitas:", ifelse(bp$p.value > 0.05,
    "✅ Tidak ada heteroskedastisitas",
    "❌ Ada heteroskedastisitas"),
    "(p =", bp$p.value, ")\n")
## Heteroskedastisitas: ✅ Tidak ada heteroskedastisitas (p = 0.8419971 )
# Uji Autokorelasi
dw2 = dwtest(model.log)
cat("Autokorelasi:", ifelse(dw$p.value > 0.05,
    "✅ Tidak ada autokorelasi",
    "❌ Autokorelasi terdeteksi"),
    "(p =", dw$p.value, ")\n")
## Autokorelasi: ❌ Autokorelasi terdeteksi (p = 3.20666e-14 )
# Uji Normalitas
shapiro = shapiro.test(residuals(model.log))
cat("Normalitas residual:", ifelse(shapiro$p.value > 0.05,
    "✅ Residual berdistribusi normal",
    "❌ Residual tidak normal"),
    "(p =", shapiro$p.value, ")\n")
## Normalitas residual: ❌ Residual tidak normal (p = 9.870481e-14 )

Penangan Autokorelasi

# Install jika belum ada
# install.packages("orcutt")

library(orcutt)

# Model awal (sudah log transform)
model.log = lm(aqi.log ~ o3.log, data = aqi_o3)

# Terapkan Cochrane-Orcutt
co_model = cochrane.orcutt(model.log)

summary(co_model)
## Call:
## lm(formula = aqi.log ~ o3.log, data = aqi_o3)
## 
##              Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) -0.814069   0.104399  -7.606 9.694e-11 ***
## o3.log       1.009692   0.025902  38.829 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.013 on 69 degrees of freedom
## Multiple R-squared:  0.9661 ,  Adjusted R-squared:  0.9656
## F-statistic: 1964.8 on 1 and 69 DF,  p-value: < 1.959e-52
## 
## Durbin-Watson statistic 
## (original):    0.55318 , p-value: 4.741e-14
## (transformed): 2.68580 , p-value: 9.98e-01
# Uji lagi autokorelasi
library(lmtest)
dw_co = dwtest(co_model)
cat("Autokorelasi setelah Cochrane-Orcutt:",
    ifelse(dw_co$p.value > 0.05, "✅ Tidak ada autokorelasi",
           "❌ Autokorelasi masih ada"),
    "(p =", dw_co$p.value, ")\n")
## Autokorelasi setelah Cochrane-Orcutt: ✅ Tidak ada autokorelasi (p = 0.9979682 )

Penanganan Normalitas Residual

library(MASS)

# Box-Cox untuk cari lambda terbaik
boxcox(model.log, lambda = seq(-2, 2, 0.1))

# Misal dari grafik, lambda ≈ 0 (log) atau 0.5 (sqrt)
# Kalau misal λ=0.5 → transformasi sqrt
aqi_o3$AQI_sqrt = sqrt(aqi_o3$AQI)
aqi_o3$O3_sqrt  = sqrt(aqi_o3$O3)

model.sqrt = lm(AQI_sqrt ~ O3_sqrt, data = aqi_o3)
summary(model.sqrt)
## 
## Call:
## lm(formula = AQI_sqrt ~ O3_sqrt, data = aqi_o3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.05094 -0.04087 -0.01315  0.01834  0.41477 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.01249    0.13801  -0.091    0.928    
## O3_sqrt      0.68188    0.01835  37.161   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06724 on 70 degrees of freedom
## Multiple R-squared:  0.9518, Adjusted R-squared:  0.9511 
## F-statistic:  1381 on 1 and 70 DF,  p-value: < 2.2e-16
# Cek normalitas residual lagi
shapiro = shapiro.test(residuals(model.sqrt))
cat("Normalitas residual setelah transformasi:",
    ifelse(shapiro$p.value > 0.05,
           "✅ Residual normal",
           "❌ Residual tidak normal"),
    "(p =", shapiro$p.value, ")\n")
## Normalitas residual setelah transformasi: ❌ Residual tidak normal (p = 6.510119e-14 )
# Differencing pada O3 log
o3.diff = diff(o3.log)   # langsung diff vektor o3.log

# Uji ADF pada hasil differencing
library(tseries)
adf_o3_diff = adf.test(na.omit(o3.diff))

cat("Stasioner log(O3) differenced:",
    ifelse(adf_o3_diff$p.value < 0.05,
           "✅ Stasioner",
           "❌ Tidak stasioner"),
    "(p =", adf_o3_diff$p.value, ")\n")
## Stasioner log(O3) differenced: ❌ Tidak stasioner (p = 0.1443159 )
par(mfrow=c(2,2))
plot(model.log)

Pembagian Data

n = (aqi_o3)
n
##               datetime  AQI       O3 hour AQI_sqrt  O3_sqrt
## 1  2022-10-21 18:00:00 30.2 55.78995   18 5.495453 7.469267
## 2  2022-10-21 19:00:00 28.2 54.93164   19 5.310367 7.411588
## 3  2022-10-21 20:00:00 26.6 54.64554   20 5.157519 7.392262
## 4  2022-10-21 21:00:00 25.0 55.07469   21 5.000000 7.421232
## 5  2022-10-21 22:00:00 26.0 55.78995   22 5.099020 7.469267
## 6  2022-10-21 23:00:00 26.0 56.50520   23 5.099020 7.516994
## 7  2022-10-22 00:00:00 26.0 55.78995   00 5.099020 7.469267
## 8  2022-10-22 01:00:00 24.0 52.92892   01 4.898979 7.275227
## 9  2022-10-22 02:00:00 23.0 50.06790   02 4.795832 7.075867
## 10 2022-10-22 03:00:00 24.0 51.49841   03 4.898979 7.176239
## 11 2022-10-22 04:00:00 25.0 53.64418   04 5.000000 7.324219
## 12 2022-10-22 05:00:00 26.0 55.78995   05 5.099020 7.469267
## 13 2022-10-22 06:00:00 27.0 59.36623   06 5.196152 7.704948
## 14 2022-10-22 07:00:00 29.0 62.22725   07 5.385165 7.888425
## 15 2022-10-22 08:00:00 29.0 62.94250   08 5.385165 7.933631
## 16 2022-10-22 09:00:00 29.0 62.94250   09 5.385165 7.933631
## 17 2022-10-22 10:00:00 29.0 62.22725   10 5.385165 7.888425
## 18 2022-10-22 11:00:00 28.0 60.79674   11 5.291503 7.797226
## 19 2022-10-22 12:00:00 27.0 59.36623   12 5.196152 7.704948
## 20 2022-10-22 13:00:00 27.0 58.65097   13 5.196152 7.658392
## 21 2022-10-22 14:00:00 27.0 58.65097   14 5.196152 7.658392
## 22 2022-10-22 15:00:00 27.0 57.93572   15 5.196152 7.611551
## 23 2022-10-22 16:00:00 27.0 57.93572   16 5.196152 7.611551
## 24 2022-10-22 17:00:00 27.0 58.65097   17 5.196152 7.658392
## 25 2022-10-22 18:00:00 27.0 59.36623   18 5.196152 7.704948
## 26 2022-10-22 19:00:00 28.0 61.51199   19 5.291503 7.842958
## 27 2022-10-22 20:00:00 29.0 63.65776   20 5.385165 7.978581
## 28 2022-10-22 21:00:00 31.0 66.51878   21 5.567764 8.155905
## 29 2022-10-22 22:00:00 31.0 67.94930   22 5.567764 8.243136
## 30 2022-10-22 23:00:00 32.0 68.66455   23 5.656854 8.286408
## 31 2022-10-23 00:00:00 32.0 68.66455   00 5.656854 8.286408
## 32 2022-10-23 01:00:00 31.0 67.23404   01 5.567764 8.199637
## 33 2022-10-23 02:00:00 31.0 66.51878   02 5.567764 8.155905
## 34 2022-10-23 03:00:00 30.0 65.80353   03 5.477226 8.111937
## 35 2022-10-23 04:00:00 30.0 64.37302   04 5.477226 8.023280
## 36 2022-10-23 05:00:00 29.0 62.22725   05 5.385165 7.888425
## 37 2022-10-23 06:00:00 27.0 59.36623   06 5.196152 7.704948
## 38 2022-10-23 07:00:00 26.0 56.50520   07 5.099020 7.516994
## 39 2022-10-23 08:00:00 25.0 55.07469   08 5.000000 7.421232
## 40 2022-10-23 09:00:00 25.0 53.64418   09 5.000000 7.324219
## 41 2022-10-23 10:00:00 24.0 52.92892   10 4.898979 7.275227
## 42 2022-10-23 11:00:00 24.0 52.92892   11 4.898979 7.275227
## 43 2022-10-23 12:00:00 25.0 53.64418   12 5.000000 7.324219
## 44 2022-10-23 13:00:00 25.0 53.64418   13 5.000000 7.324219
## 45 2022-10-23 14:00:00 25.0 54.35944   14 5.000000 7.372885
## 46 2022-10-23 15:00:00 26.0 55.78995   15 5.099020 7.469267
## 47 2022-10-23 16:00:00 26.0 57.22046   16 5.099020 7.564421
## 48 2022-10-23 17:00:00 27.0 57.93572   17 5.196152 7.611551
## 49 2022-10-23 18:00:00 27.0 58.65097   18 5.196152 7.658392
## 50 2022-10-23 19:00:00 27.0 58.65097   19 5.196152 7.658392
## 51 2022-10-23 20:00:00 27.0 59.36623   20 5.196152 7.704948
## 52 2022-10-23 21:00:00 28.0 60.08148   21 5.291503 7.751225
## 53 2022-10-23 22:00:00 28.0 60.08148   22 5.291503 7.751225
## 54 2022-10-23 23:00:00 27.0 59.36623   23 5.196152 7.704948
## 55 2022-10-24 00:00:00 27.0 58.65097   00 5.196152 7.658392
## 56 2022-10-24 01:00:00 26.0 57.22046   01 5.099020 7.564421
## 57 2022-10-24 02:00:00 25.0 54.35944   02 5.000000 7.372885
## 58 2022-10-24 03:00:00 23.0 50.78316   03 4.795832 7.126230
## 59 2022-10-24 04:00:00 22.0 47.92213   04 4.690416 6.922581
## 60 2022-10-24 05:00:00 21.0 45.41874   05 4.582576 6.739343
## 61 2022-10-24 06:00:00 20.0 43.98823   06 4.472136 6.632362
## 62 2022-10-24 07:00:00 19.0 41.84246   07 4.358899 6.468575
## 63 2022-10-24 08:00:00 19.0 41.48483   08 4.358899 6.440872
## 64 2022-10-24 09:00:00 20.0 43.27297   09 4.472136 6.578220
## 65 2022-10-24 10:00:00 21.0 45.41874   10 4.582576 6.739343
## 66 2022-10-24 11:00:00 21.0 45.41874   11 4.582576 6.739343
## 67 2022-10-24 12:00:00 21.0 45.41874   12 4.582576 6.739343
## 68 2022-10-24 13:00:00 22.0 47.20688   13 4.690416 6.870726
## 69 2022-10-24 14:00:00 24.0 52.21367   14 4.898979 7.225903
## 70 2022-10-24 15:00:00 26.0 56.50520   15 5.099020 7.516994
## 71 2022-10-24 16:00:00 27.0 59.36623   16 5.196152 7.704948
## 72 2022-10-24 17:00:00 28.0 60.79674   17 5.291503 7.797226
library(dynlm)
library(dplyr)
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lmtest)
library(Metrics)
## 
## Attaching package: 'Metrics'
## The following objects are masked from 'package:performance':
## 
##     mae, mse, rmse
## The following object is masked from 'package:forecast':
## 
##     accuracy
# Buat dataframe log
df_log = data.frame(
  AQI_log = log(aqi_o3$AQI),
  O3_log  = log(aqi_o3$O3)
)

# split data 
n = nrow(df_log) 
train_n = floor(0.7 * n) 
train = df_log[1:train_n, ]
test  = df_log[(train_n+1):n, ]

train_ts <- ts(train, frequency = 12)
test_ts  <- ts(test, frequency = 12)

Proporsi 70:30 dipilih karena jumlah data relatif kecil (72 observasi), sehingga 30% uji (22 data) memberikan evaluasi lebih reliabel dibanding hanya 20% uji (14 data). Ini menjaga keseimbangan antara data latih yang cukup untuk membangun model dan data uji yang cukup untuk mengukur kinerja model.

# Data time series
train_ts = ts(train)
test. = ts(test)
data.ts = ts(df_log)

Koyck

Pemodelan

# MODEL KOYCK
model.koyck = koyckDlm(x = train$O3_log, y = train$AQI_log)
summary(model.koyck)
## 
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0473770 -0.0098381  0.0005975  0.0094096  0.0374549 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.33478    0.16279  -2.056   0.0454 *  
## Y.1          0.42859    0.08665   4.946 1.05e-05 ***
## X.t          0.54519    0.09402   5.798 5.80e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01946 on 46 degrees of freedom
## Multiple R-Squared: 0.9462,  Adjusted R-squared: 0.9438 
## Wald test: 368.5 on 2 and 46 DF,  p-value: < 2.2e-16 
## 
## Diagnostic tests:
## NULL
## 
##                               alpha      beta       phi
## Geometric coefficients:  -0.5858872 0.5451889 0.4285894
AIC(model.koyck)
## [1] -242.1171
BIC(model.koyck)
## [1] -234.5498

hasil summary didapatkan Y_duga_t = -0.33478 + 0.54519Xt + 0.42859Yt

  • Koefisien Xt (0.54519) signifikan, artinya X berpengaruh terhaadp U

  • Koefisien Yt-1 (0.42859) signifikan, ini merupakan estimasi untuk λ. Nilai ini menunjukkan sekitar 42% dari efek di periode sebelumnya masih “terbawa” saat ini

Peramalan dan Akurasi

# Peramalan y untuk 5 periode kedepan dengan menggunakan model koyck
library(dLagM)
fore.koyck = dLagM::forecast(model = model.koyck, x = train$O3_log, h = 5) # Membandingkan dengan data testing 
fore.koyck
## $forecasts
## [1] 3.270307 3.250912 3.239753 3.239235 3.246048
## 
## $call
## forecast.koyckDlm(model = model.koyck, x = train$O3_log, h = 5)
## 
## attr(,"class")
## [1] "forecast.koyckDlm" "dLagM"
# Cek nilai MAPE
mape.koyck = MAPE(model.koyck)
mape.koyck #data test
##                    MAPE
## model.koyck 0.004403018
# Akurasi ata training
GoF(model.koyck)
##              n        MAE           MPE        MAPE       sMAPE     MASE
## model.koyck 49 0.01445862 -4.912283e-05 0.004403018 0.004400766 0.597827
##                      MSE     MRAE    GMRAE
## model.koyck 0.0003553854 21602125 1840.982

Regression with Distributed Lag

Pemodelan (Lag = 2)

model.dlm = dlm(x = train$O3_log, y = train$AQI_log, q = 1)
summary(model.dlm)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.021971 -0.012155 -0.002503  0.007252  0.102908 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.888e-01  1.487e-01  -4.631 2.99e-05 ***
## x.t          9.799e-01  1.119e-01   8.758 2.32e-11 ***
## x.1         -4.518e-05  1.115e-01   0.000        1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02033 on 46 degrees of freedom
## Multiple R-squared:  0.9412, Adjusted R-squared:  0.9387 
## F-statistic: 368.3 on 2 and 46 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##         AIC       BIC
## 1 -237.8021 -230.2349
model.dlm = dlm(x = train$O3_log, y = train$AQI_log, q = 2)
summary(model.dlm)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.01770 -0.01256 -0.00142  0.00917  0.05349 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.767985   0.110632  -6.942 1.39e-08 ***
## x.t          1.035537   0.123678   8.373 1.18e-10 ***
## x.1         -0.028351   0.221844  -0.128    0.899    
## x.2         -0.008469   0.122504  -0.069    0.945    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01348 on 44 degrees of freedom
## Multiple R-squared:  0.9752, Adjusted R-squared:  0.9735 
## F-statistic: 575.7 on 3 and 44 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##         AIC       BIC
## 1 -271.3783 -262.0223
AIC(model.dlm)
## [1] -271.3783
BIC(model.dlm)
## [1] -262.0223

Peramalan dan Akurasi

fore.dlm = dLagM::forecast(model = model.dlm, x = train$O3_log, h = 5) # Membandingkan dengan data testing 
fore.dlm
## $forecasts
## [1] 3.246604 3.231967 3.227422 3.235802 3.248987
## 
## $call
## forecast.dlm(model = model.dlm, x = train$O3_log, h = 5)
## 
## attr(,"class")
## [1] "forecast.dlm" "dLagM"
mape.dlm = MAPE(model.dlm)
mape.dlm
##                  MAPE
## model.dlm 0.003059721

Lag Optimum

kita dapat meminta R untuk mencari nilai yang optimal secara otomatis berdasarkan kriteria informasi seperti AIC. AIC menyeimbangkan antara kecocokan model dengan kompleksitasnya.

train_sub <- train[, c("AQI_log", "O3_log")]
# Penentuan Lag Optimum
finiteDLMauto(formula = AQI_log ~ O3_log,
              data = train_sub, q.min = 1, q.max = 10,
              model.type = "dlm", error.type = "AIC", trace = TRUE)
##    q - k    MASE       AIC       BIC    GMRAE   MBRAE R.Adj.Sq  Ljung-Box
## 3      3 0.39444 -284.7217 -273.6208 1836.352 0.48939  0.98302 0.14598099
## 4      4 0.39409 -277.6753 -264.8748 2324.129 0.49314  0.98304 0.21155323
## 5      5 0.37104 -272.5562 -258.1029 1977.364 0.48976  0.98401 0.21808396
## 2      2 0.42909 -271.3783 -262.0223 1705.440 0.48224  0.97346 0.09693428
## 6      6 0.36809 -263.2521 -247.1944 1438.333 0.47749  0.98343 0.23443482
## 7      7 0.38346 -255.7600 -238.1480 1753.463 0.48394  0.98349 0.38846689
## 8      8 0.39442 -249.9787 -230.8643 2618.044 0.50855  0.98342 0.48254949
## 10    10 0.35467 -244.0683 -222.1129 3527.973 0.49992  0.98439 0.37850557
## 9      9 0.38981 -242.9655 -222.4027 3023.208 0.50479  0.98187 0.96297827
## 1      1 0.51444 -237.8022 -230.2349 1748.893 0.36813  0.93866 0.14745321
model.dlm2 = dlm(x = train$O3_log, y = train$AQI_log, q = 10)
summary(model.dlm2)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.015661 -0.007345  0.002012  0.006617  0.016341 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.72913    0.15931  -4.577 8.81e-05 ***
## x.t          0.82878    0.16262   5.097 2.13e-05 ***
## x.1          0.42295    0.32872   1.287  0.20875    
## x.2         -0.19529    0.28371  -0.688  0.49690    
## x.3         -0.23122    0.25681  -0.900  0.37560    
## x.4          0.23753    0.26776   0.887  0.38260    
## x.5          0.04378    0.26524   0.165  0.87008    
## x.6         -0.34406    0.26369  -1.305  0.20259    
## x.7          0.63702    0.26096   2.441  0.02122 *  
## x.8         -0.81915    0.26185  -3.128  0.00408 ** 
## x.9          0.63513    0.22146   2.868  0.00777 ** 
## x.10        -0.22661    0.10327  -2.194  0.03666 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.009888 on 28 degrees of freedom
## Multiple R-squared:  0.9888, Adjusted R-squared:  0.9844 
## F-statistic: 224.6 on 11 and 28 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##         AIC       BIC
## 1 -244.0683 -222.1129
AIC(model.dlm2)
## [1] -244.0683
BIC(model.dlm2)
## [1] -222.1129

Yt^= -0.7913 + 0.82878Xt + … -0.22661Xt−10 adalah model akhir yang dihasilkan dari q = 10 dan dari model ini didapatkan beberapa peubah yang berpengaruh signifikan pada taraf nyata 5% yairu Xt, Xt-7, Xt-8, Xt-9, Xt-10.

# peramalan 5 model kedepan dan akurasi 
fore.dlm2 = dLagM::forecast(model = model.dlm2, x = test$O3_log, h = 5)
fore.dlm2
## $forecasts
## [1] 3.302422 3.326722 3.326823 3.311281 3.292729
## 
## $call
## forecast.dlm(model = model.dlm2, x = test$O3_log, h = 5)
## 
## attr(,"class")
## [1] "forecast.dlm" "dLagM"
# Akurasi data training
GoF(model.dlm2)
##             n         MAE           MPE        MAPE       sMAPE      MASE
## model.dlm2 40 0.007232035 -5.998011e-06 0.002185186 0.002184896 0.3546706
##                     MSE     MRAE    GMRAE
## model.dlm2 6.843785e-05 36070938 3527.973
# MAPE 
mape.dlm2 = MAPE(model.dlm2)
mape.dlm2
##                   MAPE
## model.dlm2 0.002185186

Autoregressive

Pemodelan

model.ardl = dLagM::ardlDlm(x = train$O3_log, y = train$AQI_log, p = 1, q = 1)
summary(model.ardl)
## 
## Time series regression with "ts" data:
## Start = 2, End = 50
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.041268 -0.007823  0.003968  0.011199  0.032492 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.42361    0.11442  -3.702 0.000581 ***
## X.t          1.03217    0.08099  12.744  < 2e-16 ***
## X.1         -0.49927    0.11028  -4.527 4.36e-05 ***
## Y.1          0.47048    0.07123   6.605 3.94e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01465 on 45 degrees of freedom
## Multiple R-squared:  0.9702, Adjusted R-squared:  0.9682 
## F-statistic: 487.5 on 3 and 45 DF,  p-value: < 2.2e-16
AIC(model.ardl)
## [1] -269.0102
BIC(model.ardl)
## [1] -259.5511

Peramalan dan Pemodelan

# Koefisien hasil model
alpha <- -0.42361
beta0 <-  1.03217   # X_t
beta1 <- -0.49927   # X_(t-1)
phi1  <-  0.47048   # Y_(t-1)

# Simpan hasil forecast
h <- 5
y_fore <- numeric(h)

# Ambil nilai terakhir dari data training
last_y <- tail(train$AQI_log, 1)     # Y_(t)
last_y1 <- tail(train$AQI_log, 2)[1] # Y_(t-1)
last_x <- tail(train$O3_log, 1)      # X_(t)
last_x1 <- tail(train$O3_log, 2)[1]  # X_(t-1)

# Forecast 5 periode ke depan (pakai test$O3_log sebagai X masa depan)
for (i in 1:h) {
  x_now <- test$O3_log[i]             # X_t (baru)
  x_lag <- if (i == 1) last_x else test$O3_log[i-1]  # X_(t-1)
  
  y_fore[i] <- alpha + beta0*x_now + beta1*x_lag + phi1*last_y
  
  # update Y untuk periode berikutnya
  last_y <- y_fore[i]
}

# Hasil forecast
y_fore
## [1] 3.309284 3.321921 3.321887 3.309509 3.297154
fore.ardl <- list(forecasts = y_fore)
fore.ardl$forecasts
## [1] 3.309284 3.321921 3.321887 3.309509 3.297154
# Hitung AIC dan BIC manual
resid <- residuals(model.ardl)
## Time Series:
## Start = 2 
## End = 50 
## Frequency = 1 
##             2             3             4             5             6 
##  0.0324924097  0.0039678287 -0.0412681389  0.0177257721 -0.0074332395 
##             7             8             9            10            11 
##  0.0120757785 -0.0199896939  0.0061833290  0.0119448160  0.0046729893 
##            12            13            14            15            16 
##  0.0045864327 -0.0206747415  0.0154668149 -0.0064500604 -0.0007441003 
##            17            18            19            20            21 
##  0.0110522695 -0.0057399152 -0.0126326046  0.0051009807 -0.0009508119 
##            22            23            24            25            26 
##  0.0117140235  0.0055879826 -0.0070768528 -0.0134621479 -0.0076917244 
##            27            28            29            30            31 
## -0.0073754431  0.0145482155 -0.0168411932  0.0147224522  0.0050134066 
##            32            33            34            35            36 
## -0.0050045704  0.0104606331 -0.0165103845  0.0162049563  0.0063223661 
##            37            38            39            40            41 
## -0.0175308608  0.0058311467 -0.0138263566  0.0189876035 -0.0211188764 
##            42            43            44            45            46 
## -0.0086147538  0.0183523866  0.0058482640 -0.0078230958  0.0111993343 
##            47            48            49            50 
## -0.0204167316  0.0171417700 -0.0070768528 -0.0009508119
n <- length(resid)
SSR <- sum(resid^2)
k <- length(coef(model.ardl))   # jumlah parameter
## (Intercept)         X.t         X.1         Y.1 
##  -0.4236062   1.0321722  -0.4992666   0.4704763
AIC_manual <- n * log(SSR/n) + 2*k
BIC_manual <- n * log(SSR/n) + k*log(n)

AIC_manual
## [1] -410.0662
BIC_manual
## [1] -402.4989
# Cek MAPE
# Ambil aktual 5 periode dari test set
y_actual <- test$AQI_log[1:5]

# Prediksi dari hasil manual forecast
y_pred <- y_fore

# Hitung MAPE
mape.ardl <- mean(abs((y_actual - y_pred) / y_actual)) * 100
mape.ardl
## [1] 0.296213

Penentuan Lag Optimum

model.ardl.opt = ardlBoundOrders(data = train, ic = "AIC", 
                                  formula = AQI_log ~ O3_log)

model.ardl.opt
## $p
##   O3_log
## 1     15
## 
## $q
## [1] 15
## 
## $Stat.table
##            q = 1     q = 2     q = 3     q = 4     q = 5     q = 6     q = 7
## p = 1  -275.7390 -284.7719 -275.9041 -267.0183 -261.0469 -253.7952 -246.0026
## p = 2  -285.0219 -283.0314 -273.9358 -265.5602 -260.1754 -253.8573 -246.2720
## p = 3  -273.8276 -273.8276 -271.9382 -263.5603 -258.1755 -255.7508 -247.2979
## p = 4  -268.2262 -266.7780 -266.7780 -267.7959 -258.7916 -253.7901 -245.9096
## p = 5  -260.0049 -258.9449 -258.4394 -258.4394 -257.0981 -252.6690 -243.9543
## p = 6  -253.1012 -251.6493 -254.5898 -252.6202 -252.6202 -250.7113 -241.9989
## p = 7  -245.0969 -244.6364 -247.7682 -246.8492 -245.6697 -245.6697 -248.6396
## p = 8  -243.5679 -242.7303 -242.5931 -241.5319 -240.8883 -240.3514 -240.3514
## p = 9  -238.7282 -237.4394 -238.4595 -236.5916 -235.9478 -236.8380 -234.9054
## p = 10 -231.0538 -229.6288 -231.5062 -230.0122 -233.3532 -234.7091 -233.0446
## p = 11 -223.0758 -223.0994 -223.8181 -221.8228 -224.8143 -226.7739 -224.8411
## p = 12 -214.8635 -214.0487 -214.3586 -213.0192 -215.7648 -218.2223 -216.4459
## p = 13 -221.3814 -222.6847 -220.7045 -223.3387 -230.2926 -228.4019 -230.0788
## p = 14 -216.0280 -216.3867 -214.5065 -213.0725 -216.1957 -217.0531 -221.2528
## p = 15 -210.3730 -212.0891 -211.7275 -211.1672 -210.8344 -215.9220 -219.4113
##            q = 8     q = 9    q = 10    q = 11    q = 12    q = 13    q = 14
## p = 1  -236.6298 -229.6171 -224.7879 -217.9685 -209.7507 -201.3572 -200.9019
## p = 2  -237.1516 -229.3079 -222.8042 -216.9680 -208.3625 -199.9496 -199.0294
## p = 3  -238.1459 -231.7460 -225.2639 -216.4849 -207.4047 -200.1997 -198.6616
## p = 4  -236.9393 -229.9563 -223.4111 -214.5347 -205.4499 -200.9169 -197.4795
## p = 5  -235.4326 -228.6579 -223.4742 -216.4748 -207.2902 -200.3631 -195.5100
## p = 6  -233.5833 -230.1508 -225.9635 -222.8015 -214.2202 -205.6979 -202.7809
## p = 7  -242.2727 -233.2961 -225.0926 -221.3101 -212.4654 -203.7303 -202.2846
## p = 8  -240.2829 -231.8621 -224.7498 -238.6583 -228.5829 -228.1909 -217.7993
## p = 9  -234.9054 -233.1397 -224.4303 -237.4801 -235.2153 -239.9404 -232.0455
## p = 10 -231.6375 -231.6375 -237.6965 -252.2616 -244.1164 -238.1409 -231.8288
## p = 11 -229.9796 -248.4595 -248.4595 -252.1382 -246.2793 -239.3288 -230.1098
## p = 12 -229.1448 -247.7917 -246.4877 -246.4877 -257.4849 -272.0086 -260.8402
## p = 13 -239.0941 -248.9423 -259.5639 -261.6608 -261.6608 -270.5157 -260.8955
## p = 14 -221.9679 -234.9639 -238.0353 -261.8054 -272.1681 -272.1681 -271.2612
## p = 15 -217.6369 -229.8101 -255.9484 -254.9184 -253.2774 -404.8238 -404.8238
##           q = 15
## p = 1  -210.0048
## p = 2  -209.3416
## p = 3  -209.6315
## p = 4  -207.7036
## p = 5  -206.3965
## p = 6  -217.0534
## p = 7  -235.3812
## p = 8  -233.7247
## p = 9  -249.1436
## p = 10 -273.5476
## p = 11 -272.8086
## p = 12 -302.5317
## p = 13 -317.1075
## p = 14 -315.2294
## p = 15        NA
## 
## $min.Stat
## [1] -404.8238
min_p=c()
for(i in 1:10){
  min_p[i]=min(model.ardl.opt$Stat.table[[i]])
}
q_opt=which(min_p==min(min_p, na.rm = TRUE))
p_opt=which(model.ardl.opt$Stat.table[[q_opt]] == 
              min(model.ardl.opt$Stat.table[[q_opt]], na.rm = TRUE))
data.frame("q_optimum" = q_opt, "p_optimum" = p_opt, 
           "AIC"=model.ardl.opt$min.Stat)
##   q_optimum p_optimum       AIC
## 1         1         2 -404.8238
model.ardl2 <- ardlDlm(formula = AQI_log ~ O3_log,
                         data = train, p = 10, q = 1)
summary(model.ardl2)
## 
## Time series regression with "ts" data:
## Start = 11, End = 50
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.015614 -0.007399  0.002319  0.006541  0.016456 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.74869    0.20089  -3.727 0.000907 ***
## O3_log.t     0.82459    0.16746   4.924 3.73e-05 ***
## O3_log.1     0.45865    0.39847   1.151 0.259814    
## O3_log.2    -0.19868    0.28950  -0.686 0.498387    
## O3_log.3    -0.22840    0.26195  -0.872 0.390945    
## O3_log.4     0.22922    0.27715   0.827 0.415461    
## O3_log.5     0.05235    0.27492   0.190 0.850403    
## O3_log.6    -0.34505    0.26846  -1.285 0.209617    
## O3_log.7     0.62911    0.26991   2.331 0.027480 *  
## O3_log.8    -0.80583    0.27848  -2.894 0.007445 ** 
## O3_log.9     0.62265    0.23777   2.619 0.014297 *  
## O3_log.10   -0.22199    0.10877  -2.041 0.051145 .  
## AQI_log.1   -0.02835    0.17187  -0.165 0.870205    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01006 on 27 degrees of freedom
## Multiple R-squared:  0.9888, Adjusted R-squared:  0.9838 
## F-statistic: 198.7 on 12 and 27 DF,  p-value: < 2.2e-16
# Cek AIC 
AIC(model.ardl2)
## [1] -242.1086

Berdasarkan hasil di atas terlihat bahwa nilai AIC terendah yaitu -404.8238 didapat ketika nilai optimum q =1 dan p = 2

Perbandingan Model

akurasi <- matrix(c(mape.koyck, 
                    mape.dlm, 
                    mape.dlm2,
                    mape.ardl))
row.names(akurasi)<- c("Koyck",
                       "DLM 1",
                       "DLM 2",
                       "Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi
##                MAPE       
## Koyck          0.004403018
## DLM 1          0.003059721
## DLM 2          0.002185186
## Autoregressive 0.296213

MAPE terbaik adalah model DLM 2 karena memiliki nilai yang paling kecil.

Plot

# Ambil subset aktual (5 titik pertama test set)
y_actual <- test$AQI_log[1:5]
x_actual <- test$O3_log[1:5]

# Forecast dari semua model
y_koyck <- fore.koyck$forecasts
y_dlm1  <- fore.dlm$forecasts
y_dlm2  <- fore.dlm2$forecasts
y_ardl  <- fore.ardl$forecasts

# Plot aktual
plot(x_actual, y_actual, type="b", col="black", ylim=c(min(y_actual, y_koyck, y_dlm1, y_dlm2, y_ardl)-0.1, 
                                                      max(y_actual, y_koyck, y_dlm1, y_dlm2, y_ardl)+0.1),
     xlab="O3_log", ylab="AQI_log", main="Aktual vs Forecast (5 Periode Test)")

# Tambahkan hasil forecast
lines(x_actual, y_koyck, col="red", type="b", pch=19)
lines(x_actual, y_dlm1,  col="blue", type="b", pch=19)
lines(x_actual, y_dlm2,  col="orange", type="b", pch=19)
lines(x_actual, y_ardl,  col="green", type="b", pch=19)

# Legend
legend("topleft", c("Aktual", "Koyck", "DLM1", "DLM2", "ARDL"),
       col=c("black","red","blue","orange","green"), lty=1, pch=19, cex=0.8)

Berdasarkan data plot dari perbandingan model, model DLM terlihat cenderung mirip dengan data aktual. Sehingga dapat disimpulkan bahwa DLM adalah model terbaik karena juga memiliki nilai MAPE terkecil.