data_udara<- read.csv("C:\\Users\\MUTHI'AH IFFA\\Downloads\\Semester 5\\MPDW\\Praktikum\\Minggu 3\\NewDelhi_Air_quality.csv", header = TRUE, sep = ",")
data_udara
## X AQI CO datetime no2 o3 pm10 pm25
## 1 0 30.2 198.6027 2022-10-21:18 0.04685717 55.78995 10.486722 5.637410
## 2 1 28.2 197.6013 2022-10-21:19 0.04645554 54.93164 10.719325 4.618169
## 3 2 26.6 198.6027 2022-10-21:20 0.04685717 54.64554 11.155578 3.520902
## 4 3 25.0 201.9405 2022-10-21:21 0.04819594 55.07469 11.116206 2.225919
## 5 4 26.0 205.2784 2022-10-21:22 0.04886533 55.78995 10.405250 1.979471
## 6 5 26.0 208.6163 2022-10-21:23 0.05020411 56.50520 9.402518 1.792677
## 7 6 26.0 208.6163 2022-10-22:00 0.05020411 55.78995 8.482822 1.752237
## 8 7 24.0 205.2784 2022-10-22:01 0.05020411 52.92892 7.586667 1.858459
## 9 8 23.0 200.2716 2022-10-22:02 0.04685717 50.06790 7.636587 2.208076
## 10 9 24.0 196.9338 2022-10-22:03 0.04417961 51.49841 8.487648 2.476681
## 11 10 25.0 198.6027 2022-10-22:04 0.04284084 53.64418 9.357371 2.585729
## 12 11 26.0 200.2716 2022-10-22:05 0.04049798 55.78995 10.338728 2.390081
## 13 12 27.0 203.6095 2022-10-22:06 0.04016329 59.36623 10.523314 2.086022
## 14 13 29.0 205.2784 2022-10-22:07 0.03882451 62.22725 10.635970 1.937047
## 15 14 29.0 205.2784 2022-10-22:08 0.03882451 62.94250 9.964803 1.835311
## 16 15 29.0 205.2784 2022-10-22:09 0.03882451 62.94250 9.214411 1.725189
## 17 16 29.0 203.6095 2022-10-22:10 0.03748573 62.22725 8.772117 1.640662
## 18 17 28.0 201.9405 2022-10-22:11 0.04250614 60.79674 8.753794 1.633373
## 19 18 27.0 200.2716 2022-10-22:12 0.05020411 59.36623 8.807467 1.636810
## 20 19 27.0 201.9405 2022-10-22:13 0.05622860 58.65097 8.774860 1.666231
## 21 20 27.0 201.9405 2022-10-22:14 0.05622860 58.65097 8.675813 1.690578
## 22 21 27.0 201.9405 2022-10-22:15 0.05488982 57.93572 8.915920 1.705802
## 23 22 27.0 200.2716 2022-10-22:16 0.05288166 57.93572 9.227945 1.732754
## 24 23 27.0 200.2716 2022-10-22:17 0.05221227 58.65097 9.491790 1.765387
## 25 24 27.0 200.2716 2022-10-22:18 0.05221227 59.36623 9.907828 1.827327
## 26 25 28.0 201.9405 2022-10-22:19 0.05221227 61.51199 10.095885 1.888416
## 27 26 29.0 203.6095 2022-10-22:20 0.05288166 63.65776 9.879244 1.892860
## 28 27 31.0 205.2784 2022-10-22:21 0.05221227 66.51878 9.503339 1.859564
## 29 28 31.0 206.9473 2022-10-22:22 0.05221227 67.94930 9.008138 1.829196
## 30 29 32.0 206.9473 2022-10-22:23 0.05154289 68.66455 8.729765 1.787945
## 31 30 32.0 205.2784 2022-10-23:00 0.05087350 68.66455 8.912433 1.796048
## 32 31 31.0 206.9473 2022-10-23:01 0.05488982 67.23404 9.337687 1.810690
## 33 32 31.0 206.9473 2022-10-23:02 0.06091431 66.51878 9.351754 1.826056
## 34 33 30.0 208.6163 2022-10-23:03 0.05555921 65.80353 8.640116 1.822618
## 35 34 30.0 206.9473 2022-10-23:04 0.04618778 64.37302 7.750329 1.819336
## 36 35 29.0 205.2784 2022-10-23:05 0.03815512 62.22725 6.815522 1.831419
## 37 36 27.0 203.6095 2022-10-23:06 0.03079185 59.36623 6.965347 1.929624
## 38 37 26.0 200.2716 2022-10-23:07 0.02577144 56.50520 7.022172 2.049507
## 39 38 25.0 200.2716 2022-10-23:08 0.02342858 55.07469 6.946332 2.152871
## 40 39 25.0 198.6027 2022-10-23:09 0.02376328 53.64418 6.834290 2.219349
## 41 40 24.0 196.9338 2022-10-23:10 0.02677552 52.92892 6.686746 2.248050
## 42 41 24.0 195.2648 2022-10-23:11 0.03146124 52.92892 7.593563 2.276788
## 43 42 25.0 195.2648 2022-10-23:12 0.03815512 53.64418 8.999838 2.334924
## 44 43 25.0 195.2648 2022-10-23:13 0.04351023 53.64418 11.022134 2.365020
## 45 44 25.0 196.9338 2022-10-23:14 0.04351023 54.35944 12.198974 2.366476
## 46 45 26.0 198.6027 2022-10-23:15 0.04484900 55.78995 11.969586 2.369681
## 47 46 26.0 198.6027 2022-10-23:16 0.04618778 57.22046 9.593794 2.339545
## 48 47 27.0 198.6027 2022-10-23:17 0.04551839 57.93572 7.763611 2.331504
## 49 48 27.0 198.6027 2022-10-23:18 0.04417961 58.65097 7.517936 2.324217
## 50 49 27.0 198.6027 2022-10-23:19 0.04484900 58.65097 8.041439 2.320275
## 51 50 27.0 200.2716 2022-10-23:20 0.04618778 59.36623 8.654108 2.283845
## 52 51 28.0 200.2716 2022-10-23:21 0.04752655 60.08148 8.514708 2.195546
## 53 52 28.0 200.2716 2022-10-23:22 0.04685717 60.08148 7.923108 2.066149
## 54 53 27.0 200.2716 2022-10-23:23 0.04618778 59.36623 7.097172 2.059171
## 55 54 27.0 198.6027 2022-10-24:00 0.04551839 58.65097 6.815780 2.072363
## 56 55 26.0 198.6027 2022-10-24:01 0.04752655 57.22046 7.661589 2.102422
## 57 56 25.0 198.6027 2022-10-24:02 0.04551839 54.35944 8.718614 2.230021
## 58 57 23.0 198.6027 2022-10-24:03 0.03715104 50.78316 8.457276 2.501037
## 59 58 22.0 196.9338 2022-10-24:04 0.02744491 47.92213 7.184991 2.750613
## 60 59 21.0 193.5959 2022-10-24:05 0.02125307 45.41874 6.950173 2.984422
## 61 60 20.0 193.5959 2022-10-24:06 0.01757144 43.98823 7.205668 3.065950
## 62 61 19.0 191.9270 2022-10-24:07 0.01573062 41.84246 7.898480 3.118563
## 63 62 19.0 191.9270 2022-10-24:08 0.01556327 41.48483 8.744758 3.090378
## 64 63 20.0 191.9270 2022-10-24:09 0.01807348 43.27297 9.623435 2.976335
## 65 64 21.0 191.9270 2022-10-24:10 0.02208981 45.41874 10.253539 2.846886
## 66 65 21.0 191.9270 2022-10-24:11 0.02577144 45.41874 10.703334 2.771884
## 67 66 21.0 191.9270 2022-10-24:12 0.03045716 45.41874 10.663057 2.730851
## 68 67 22.0 193.5959 2022-10-24:13 0.03547757 47.20688 10.423121 2.720472
## 69 68 24.0 195.2648 2022-10-24:14 0.03915921 52.21367 11.240661 2.713109
## 70 69 26.0 196.9338 2022-10-24:15 0.04417961 56.50520 12.098125 2.743044
## 71 70 27.0 198.6027 2022-10-24:16 0.04685717 59.36623 12.845977 2.780022
## 72 71 28.0 198.6027 2022-10-24:17 0.04752655 60.79674 12.583639 2.745875
## so2 timestamp_local timestamp_utc ts
## 1 0.3874302 2022-10-21T23:00:00 2022-10-21T18:00:00 1666375200
## 2 0.4097819 2022-10-22T00:00:00 2022-10-21T19:00:00 1666378800
## 3 0.4023313 2022-10-22T01:00:00 2022-10-21T20:00:00 1666382400
## 4 0.3762543 2022-10-22T02:00:00 2022-10-21T21:00:00 1666386000
## 5 0.3390014 2022-10-22T03:00:00 2022-10-21T22:00:00 1666389600
## 6 0.3129244 2022-10-22T04:00:00 2022-10-21T23:00:00 1666393200
## 7 0.3129244 2022-10-22T05:00:00 2022-10-22T00:00:00 1666396800
## 8 0.3427267 2022-10-22T06:00:00 2022-10-22T01:00:00 1666400400
## 9 0.3948808 2022-10-22T07:00:00 2022-10-22T02:00:00 1666404000
## 10 0.4284084 2022-10-22T08:00:00 2022-10-22T03:00:00 1666407600
## 11 0.4284084 2022-10-22T09:00:00 2022-10-22T04:00:00 1666411200
## 12 0.3986061 2022-10-22T10:00:00 2022-10-22T05:00:00 1666414800
## 13 0.3501773 2022-10-22T11:00:00 2022-10-22T06:00:00 1666418400
## 14 0.3166497 2022-10-22T12:00:00 2022-10-22T07:00:00 1666422000
## 15 0.2980232 2022-10-22T13:00:00 2022-10-22T08:00:00 1666425600
## 16 0.2868474 2022-10-22T14:00:00 2022-10-22T09:00:00 1666429200
## 17 0.2831221 2022-10-22T15:00:00 2022-10-22T10:00:00 1666432800
## 18 0.2942979 2022-10-22T16:00:00 2022-10-22T11:00:00 1666436400
## 19 0.3017485 2022-10-22T17:00:00 2022-10-22T12:00:00 1666440000
## 20 0.3054738 2022-10-22T18:00:00 2022-10-22T13:00:00 1666443600
## 21 0.3091991 2022-10-22T19:00:00 2022-10-22T14:00:00 1666447200
## 22 0.3129244 2022-10-22T20:00:00 2022-10-22T15:00:00 1666450800
## 23 0.3203750 2022-10-22T21:00:00 2022-10-22T16:00:00 1666454400
## 24 0.3241003 2022-10-22T22:00:00 2022-10-22T17:00:00 1666458000
## 25 0.3278256 2022-10-22T23:00:00 2022-10-22T18:00:00 1666461600
## 26 0.3278256 2022-10-23T00:00:00 2022-10-22T19:00:00 1666465200
## 27 0.3203750 2022-10-23T01:00:00 2022-10-22T20:00:00 1666468800
## 28 0.3166497 2022-10-23T02:00:00 2022-10-22T21:00:00 1666472400
## 29 0.3129244 2022-10-23T03:00:00 2022-10-22T22:00:00 1666476000
## 30 0.3166497 2022-10-23T04:00:00 2022-10-22T23:00:00 1666479600
## 31 0.3166497 2022-10-23T05:00:00 2022-10-23T00:00:00 1666483200
## 32 0.3166497 2022-10-23T06:00:00 2022-10-23T01:00:00 1666486800
## 33 0.3203750 2022-10-23T07:00:00 2022-10-23T02:00:00 1666490400
## 34 0.3203750 2022-10-23T08:00:00 2022-10-23T03:00:00 1666494000
## 35 0.3241003 2022-10-23T09:00:00 2022-10-23T04:00:00 1666497600
## 36 0.3278256 2022-10-23T10:00:00 2022-10-23T05:00:00 1666501200
## 37 0.3390014 2022-10-23T11:00:00 2022-10-23T06:00:00 1666504800
## 38 0.3501773 2022-10-23T12:00:00 2022-10-23T07:00:00 1666508400
## 39 0.3576279 2022-10-23T13:00:00 2022-10-23T08:00:00 1666512000
## 40 0.3650784 2022-10-23T14:00:00 2022-10-23T09:00:00 1666515600
## 41 0.3725290 2022-10-23T15:00:00 2022-10-23T10:00:00 1666519200
## 42 0.3762543 2022-10-23T16:00:00 2022-10-23T11:00:00 1666522800
## 43 0.3762543 2022-10-23T17:00:00 2022-10-23T12:00:00 1666526400
## 44 0.3725290 2022-10-23T18:00:00 2022-10-23T13:00:00 1666530000
## 45 0.3725290 2022-10-23T19:00:00 2022-10-23T14:00:00 1666533600
## 46 0.3725290 2022-10-23T20:00:00 2022-10-23T15:00:00 1666537200
## 47 0.3799796 2022-10-23T21:00:00 2022-10-23T16:00:00 1666540800
## 48 0.3911555 2022-10-23T22:00:00 2022-10-23T17:00:00 1666544400
## 49 0.3986061 2022-10-23T23:00:00 2022-10-23T18:00:00 1666548000
## 50 0.3986061 2022-10-24T00:00:00 2022-10-23T19:00:00 1666551600
## 51 0.3874302 2022-10-24T01:00:00 2022-10-23T20:00:00 1666555200
## 52 0.3762543 2022-10-24T02:00:00 2022-10-23T21:00:00 1666558800
## 53 0.3650784 2022-10-24T03:00:00 2022-10-23T22:00:00 1666562400
## 54 0.3688037 2022-10-24T04:00:00 2022-10-23T23:00:00 1666566000
## 55 0.3762543 2022-10-24T05:00:00 2022-10-24T00:00:00 1666569600
## 56 0.3837049 2022-10-24T06:00:00 2022-10-24T01:00:00 1666573200
## 57 0.4023313 2022-10-24T07:00:00 2022-10-24T02:00:00 1666576800
## 58 0.4284084 2022-10-24T08:00:00 2022-10-24T03:00:00 1666580400
## 59 0.4433095 2022-10-24T09:00:00 2022-10-24T04:00:00 1666584000
## 60 0.4544854 2022-10-24T10:00:00 2022-10-24T05:00:00 1666587600
## 61 0.4544854 2022-10-24T11:00:00 2022-10-24T06:00:00 1666591200
## 62 0.4507601 2022-10-24T12:00:00 2022-10-24T07:00:00 1666594800
## 63 0.4395843 2022-10-24T13:00:00 2022-10-24T08:00:00 1666598400
## 64 0.4209578 2022-10-24T14:00:00 2022-10-24T09:00:00 1666602000
## 65 0.4097819 2022-10-24T15:00:00 2022-10-24T10:00:00 1666605600
## 66 0.4060566 2022-10-24T16:00:00 2022-10-24T11:00:00 1666609200
## 67 0.3986061 2022-10-24T17:00:00 2022-10-24T12:00:00 1666612800
## 68 0.3911555 2022-10-24T18:00:00 2022-10-24T13:00:00 1666616400
## 69 0.3837049 2022-10-24T19:00:00 2022-10-24T14:00:00 1666620000
## 70 0.3762543 2022-10-24T20:00:00 2022-10-24T15:00:00 1666623600
## 71 0.3725290 2022-10-24T21:00:00 2022-10-24T16:00:00 1666627200
## 72 0.3688037 2022-10-24T22:00:00 2022-10-24T17:00:00 1666630800
# Install package jika belum ada install.packages(c("ggplot2","tseries","dynlm","dLagM","lmtest","forecast"))
# Load library
library(ggplot2)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(dynlm)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(dLagM)
## Loading required package: nardl
library(lmtest)
library(forecast)
##
## Attaching package: 'forecast'
## The following object is masked from 'package:dLagM':
##
## forecast
library(xts)
library(forecast)
library(zoo)
library(car)
## Loading required package: carData
library(dynlm)
library(ARDL)
## To cite the ARDL package in publications:
##
## Use this reference to refer to the validity of the ARDL package.
##
## Natsiopoulos, Kleanthis, and Tzeremes, Nickolaos G. (2022). ARDL
## bounds test for cointegration: Replicating the Pesaran et al. (2001)
## results for the UK earnings equation using R. Journal of Applied
## Econometrics, 37(5), 1079-1090. https://doi.org/10.1002/jae.2919
##
## Use this reference to cite this specific version of the ARDL package.
##
## Kleanthis Natsiopoulos and Nickolaos Tzeremes (2023). ARDL: ARDL, ECM
## and Bounds-Test for Cointegration. R package version 0.2.4.
## https://CRAN.R-project.org/package=ARDL
str(data_udara)
## 'data.frame': 72 obs. of 12 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ AQI : num 30.2 28.2 26.6 25 26 26 26 24 23 24 ...
## $ CO : num 199 198 199 202 205 ...
## $ datetime : chr "2022-10-21:18" "2022-10-21:19" "2022-10-21:20" "2022-10-21:21" ...
## $ no2 : num 0.0469 0.0465 0.0469 0.0482 0.0489 ...
## $ o3 : num 55.8 54.9 54.6 55.1 55.8 ...
## $ pm10 : num 10.5 10.7 11.2 11.1 10.4 ...
## $ pm25 : num 5.64 4.62 3.52 2.23 1.98 ...
## $ so2 : num 0.387 0.41 0.402 0.376 0.339 ...
## $ timestamp_local: chr "2022-10-21T23:00:00" "2022-10-22T00:00:00" "2022-10-22T01:00:00" "2022-10-22T02:00:00" ...
## $ timestamp_utc : chr "2022-10-21T18:00:00" "2022-10-21T19:00:00" "2022-10-21T20:00:00" "2022-10-21T21:00:00" ...
## $ ts : int 1666375200 1666378800 1666382400 1666386000 1666389600 1666393200 1666396800 1666400400 1666404000 1666407600 ...
# Buat data subset hanya AQI dan O3
aqi_o3 = data.frame(
datetime = as.POSIXct(data_udara$ts, origin="1970-01-01", tz="UTC"),
AQI = data_udara$AQI,
O3 = data_udara$o3
)
aqi_o3
## datetime AQI O3
## 1 2022-10-21 18:00:00 30.2 55.78995
## 2 2022-10-21 19:00:00 28.2 54.93164
## 3 2022-10-21 20:00:00 26.6 54.64554
## 4 2022-10-21 21:00:00 25.0 55.07469
## 5 2022-10-21 22:00:00 26.0 55.78995
## 6 2022-10-21 23:00:00 26.0 56.50520
## 7 2022-10-22 00:00:00 26.0 55.78995
## 8 2022-10-22 01:00:00 24.0 52.92892
## 9 2022-10-22 02:00:00 23.0 50.06790
## 10 2022-10-22 03:00:00 24.0 51.49841
## 11 2022-10-22 04:00:00 25.0 53.64418
## 12 2022-10-22 05:00:00 26.0 55.78995
## 13 2022-10-22 06:00:00 27.0 59.36623
## 14 2022-10-22 07:00:00 29.0 62.22725
## 15 2022-10-22 08:00:00 29.0 62.94250
## 16 2022-10-22 09:00:00 29.0 62.94250
## 17 2022-10-22 10:00:00 29.0 62.22725
## 18 2022-10-22 11:00:00 28.0 60.79674
## 19 2022-10-22 12:00:00 27.0 59.36623
## 20 2022-10-22 13:00:00 27.0 58.65097
## 21 2022-10-22 14:00:00 27.0 58.65097
## 22 2022-10-22 15:00:00 27.0 57.93572
## 23 2022-10-22 16:00:00 27.0 57.93572
## 24 2022-10-22 17:00:00 27.0 58.65097
## 25 2022-10-22 18:00:00 27.0 59.36623
## 26 2022-10-22 19:00:00 28.0 61.51199
## 27 2022-10-22 20:00:00 29.0 63.65776
## 28 2022-10-22 21:00:00 31.0 66.51878
## 29 2022-10-22 22:00:00 31.0 67.94930
## 30 2022-10-22 23:00:00 32.0 68.66455
## 31 2022-10-23 00:00:00 32.0 68.66455
## 32 2022-10-23 01:00:00 31.0 67.23404
## 33 2022-10-23 02:00:00 31.0 66.51878
## 34 2022-10-23 03:00:00 30.0 65.80353
## 35 2022-10-23 04:00:00 30.0 64.37302
## 36 2022-10-23 05:00:00 29.0 62.22725
## 37 2022-10-23 06:00:00 27.0 59.36623
## 38 2022-10-23 07:00:00 26.0 56.50520
## 39 2022-10-23 08:00:00 25.0 55.07469
## 40 2022-10-23 09:00:00 25.0 53.64418
## 41 2022-10-23 10:00:00 24.0 52.92892
## 42 2022-10-23 11:00:00 24.0 52.92892
## 43 2022-10-23 12:00:00 25.0 53.64418
## 44 2022-10-23 13:00:00 25.0 53.64418
## 45 2022-10-23 14:00:00 25.0 54.35944
## 46 2022-10-23 15:00:00 26.0 55.78995
## 47 2022-10-23 16:00:00 26.0 57.22046
## 48 2022-10-23 17:00:00 27.0 57.93572
## 49 2022-10-23 18:00:00 27.0 58.65097
## 50 2022-10-23 19:00:00 27.0 58.65097
## 51 2022-10-23 20:00:00 27.0 59.36623
## 52 2022-10-23 21:00:00 28.0 60.08148
## 53 2022-10-23 22:00:00 28.0 60.08148
## 54 2022-10-23 23:00:00 27.0 59.36623
## 55 2022-10-24 00:00:00 27.0 58.65097
## 56 2022-10-24 01:00:00 26.0 57.22046
## 57 2022-10-24 02:00:00 25.0 54.35944
## 58 2022-10-24 03:00:00 23.0 50.78316
## 59 2022-10-24 04:00:00 22.0 47.92213
## 60 2022-10-24 05:00:00 21.0 45.41874
## 61 2022-10-24 06:00:00 20.0 43.98823
## 62 2022-10-24 07:00:00 19.0 41.84246
## 63 2022-10-24 08:00:00 19.0 41.48483
## 64 2022-10-24 09:00:00 20.0 43.27297
## 65 2022-10-24 10:00:00 21.0 45.41874
## 66 2022-10-24 11:00:00 21.0 45.41874
## 67 2022-10-24 12:00:00 21.0 45.41874
## 68 2022-10-24 13:00:00 22.0 47.20688
## 69 2022-10-24 14:00:00 24.0 52.21367
## 70 2022-10-24 15:00:00 26.0 56.50520
## 71 2022-10-24 16:00:00 27.0 59.36623
## 72 2022-10-24 17:00:00 28.0 60.79674
Dalam tugas ini akan diambil peubah Y adalah nilai AQI dan peubah x
nya adalah O3 (ozon). Meski ozon berperan sebagai pelindung di atmosfer
bumi, namun ozon akan menjadi berbahaya jika berada di dalam tanah.
Karena ozon di dalam tanah terbentuk dari Nitrogen Oksida (NOx) serta
senyawa organik volatil (VOC) yang berasal dari pembakaran bahan bakar
fosil serta aktivitas manusia. Unsur-unsur ini besifat reakti, mudah
bereaksi dengan bahan lain dan bersifat korosif dan berbau menyengat.
Sehingga akan berbahaya bila terhirup i konsentrasi tinggi terutama di
dekat permukaan, termasuk di dalam tanah.
library(performance)
library(see)
# Fit model linear
model = lm(AQI ~ O3, data = aqi_o3)
# Buat diagnostic plot
check_model(model)
# 1. Plot Time Series AQI
ggplot(aqi_o3, aes(x = datetime, y = AQI)) +
geom_line(color = "blue", linewidth = 1) +
labs(title = "Time Series AQI", x = "Waktu", y = "AQI") +
theme_minimal()
# 2. Plot Time Series O3
ggplot(aqi_o3, aes(x = datetime, y = O3)) +
geom_line(color = "red", linewidth = 1) +
labs(title = "Time Series O3", x = "Waktu", y = "O3") +
theme_minimal()
# 3. Gabungan AQI & O3
ggplot(aqi_o3, aes(x = datetime)) +
geom_line(aes(y = AQI, color = "AQI"), linewidth = 1) +
geom_line(aes(y = O3, color = "O3"), linewidth = 1) +
labs(title = "Time Series AQI dan O3", x = "Waktu", y = "Nilai") +
scale_color_manual(values = c("AQI" = "blue", "O3" = "red")) +
theme_minimal()
# 4. Scatterplot Hubungan AQI vs O3
ggplot(aqi_o3, aes(x = O3, y = AQI)) +
geom_point(color = "black", size = 2, alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "Scatterplot Hubungan AQI vs O3",
x = "O3", y = "AQI") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Nilai p-value > 0.05 yaitu, 2.2e-16 artinya ada hubungan yang signifikan antara AQI dengan O3
Dari kedua nilai di atas, menunjukkan bahwa adanya hubungan linear yang cukup kuat dan signifikan antara kadar O3 dengan nilai AQI. Artinya, variasi O3 dapat menjelaskan perubahan dalma nilai AQi.
summary(subset(aqi_o3, select = -datetime))
## AQI O3
## Min. :19.00 Min. :41.48
## 1st Qu.:25.00 1st Qu.:53.64
## Median :27.00 Median :57.22
## Mean :26.18 Mean :56.57
## 3rd Qu.:28.00 3rd Qu.:60.08
## Max. :32.00 Max. :68.66
# Histogram
ggplot(aqi_o3, aes(x = AQI)) +
geom_histogram(bins = 15, fill="blue", alpha=0.6) +
labs(title="Distribusi AQI") +
theme_minimal()
ggplot(aqi_o3, aes(x = O3)) +
geom_histogram(bins = 15, fill="red", alpha=0.6) +
labs(title="Distribusi O3") +
theme_minimal()
# Boxplot (cek outlier)
ggplot(aqi_o3, aes(y = AQI)) +
geom_boxplot(fill="skyblue") +
labs(title="Boxplot AQI") +
theme_minimal()
ggplot(aqi_o3, aes(y = O3)) +
geom_boxplot(fill="orange") +
labs(title="Boxplot O3") +
theme_minimal()
# Cek asumsi autokorelasi dengan ACF
acf(aqi_o3$AQI, main="ACF AQI")
acf(aqi_o3$O3, main="ACF O3")
Baik ACF untuk O3 dan AQI keduanya terindikasi autokorelasi
# misal lag 1 untuk O3
lag1_O3 <- stats::lag(aqi_o3$O3, -1)
plot(lag1_O3, aqi_o3$AQI,
main="AQI vs O3 lag 1",
xlab="O3 (t-1)", ylab="AQI",
pch=19, col="forestgreen")
Berdasarkan scatterplot AQI vs O3 adalah linear
# Boxplot Distribusi AQI (dengan boxplot per jam)
aqi_o3$hour <- format(aqi_o3$datetime, "%H")
ggplot(aqi_o3, aes(x = hour, y = AQI)) +
geom_boxplot(fill="red") +
labs(title="Distribusi AQI per Jam") +
theme_minimal()
# Boxplot Distribusi O3 (dengan boxplot per jam)
ggplot(aqi_o3, aes(x = hour, y = O3)) +
geom_boxplot(fill="blue") +
labs(title="Distribusi O3 per jam") +
theme_minimal()
Data AQI dan O3 yang tercatat perjam cenderung memiliki fluktuasi yang tajam terutama id malam hari. Bisa jadi hal ini disebabkan oleh aktivitas manusia dan dinamika atmosfer harian.
# Ubah data jadi ts (misal datanya harian dengan periode mingguan = 7)
aqi_ts <- ts(aqi_o3$AQI, frequency = 7)
# Decompose
decomp <- decompose(aqi_ts, type = "additive")
plot(decomp)
# Pola musiman AQI
aqi_ts <- ts(aqi_o3$AQI, frequency = 12) # contoh bulanan
ggseasonplot(aqi_ts, year.labels = TRUE, main = "Seasonal Plot AQI")
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • na.rm = TRUE
## ℹ Did you misspell an argument name?
Plot di atas adalah pola musiman bulanan untuk beberapa tahun. Plot tersebut menunjukkan bahwa terindikasi adanya musiman yang di awal tahun nilai AQI meningkat, kemudian di pertengahan tahun nilai AQI memuncak dan mencapa titik tertinggi, dan di akhir tahun menurun kembali.
# Hitung rolling variance (window 12, misal bulanan)
aqi_var <- rollapply(aqi_o3$AQI, width=12, FUN=var, fill=NA, align="right")
o3_var <- rollapply(aqi_o3$O3, width=12, FUN=var, fill=NA, align="right")
# Plot rolling variance
plot(o3_var, type="l", col="red", main="Rolling Variance O3", ylab="Variance", xlab="Index")
# Plot rolling variance
plot(aqi_var, type="l", col="blue", main="Rolling Variance AQI", ylab="Variance", xlab="Index")
H0: Data tidak stationer (ada unit root)
H1: Data stationer
p-value < 0.05 tolak H0
# Regresi linear sederhana
model = lm(AQI ~ O3, data = aqi_o3)
summary(model)
##
## Call:
## lm(formula = AQI ~ O3, data = aqi_o3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5039 -0.4436 -0.1381 0.1810 4.3810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02128 0.73771 0.029 0.977
## O3 0.46241 0.01296 35.685 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7022 on 70 degrees of freedom
## Multiple R-squared: 0.9479, Adjusted R-squared: 0.9472
## F-statistic: 1273 on 1 and 70 DF, p-value: < 2.2e-16
# Uji stationer
# Konversi ke time series
aqi_ts <- ts(aqi_o3$AQI, frequency = 12)
o3_ts <- ts(aqi_o3$O3, frequency = 12)
# Uji stasioneritas dengan ADF
adf_aqi = adf.test(na.omit(aqi_ts))
adf_o3 = adf.test(na.omit(o3_ts))
adf_aqi
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(aqi_ts)
## Dickey-Fuller = -3.4572, Lag order = 4, p-value = 0.05349
## alternative hypothesis: stationary
adf_o3
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(o3_ts)
## Dickey-Fuller = -3.0518, Lag order = 4, p-value = 0.1471
## alternative hypothesis: stationary
if(adf_aqi$p.value < 0.05){
cat("AQI stasioner (p =", adf_aqi$p.value, ")\n")
} else {
cat("AQI TIDAK stasioner (p =", adf_aqi$p.value, ")\n")
}
## AQI TIDAK stasioner (p = 0.05349047 )
if(adf_o3$p.value < 0.05){
cat("O3 stasioner (p =", adf_o3$p.value, ")\n")
} else {
cat("O3 TIDAK stasioner (p =", adf_o3$p.value, ")\n")
}
## O3 TIDAK stasioner (p = 0.1470529 )
# Uji Heteroskeditas
bp = bptest(model)
bp
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 0.039736, df = 1, p-value = 0.842
if(bp$p.value < 0.05){
cat("Heteroskedastisitas TERDETEKSI (p =", bp$p.value, ")\n")
} else {
cat("Tidak ada heteroskedastisitas (p =", bp$p.value, ")\n")
}
## Tidak ada heteroskedastisitas (p = 0.8419971 )
dw = dwtest(model)
dw
##
## Durbin-Watson test
##
## data: model
## DW = 0.54636, p-value = 3.207e-14
## alternative hypothesis: true autocorrelation is greater than 0
if(dw$p.value < 0.05){
cat("Autokorelasi TERDETEKSI (p =", dw$p.value, ")\n")
} else {
cat("Tidak ada autokorelasi (p =", dw$p.value, ")\n")
}
## Autokorelasi TERDETEKSI (p = 3.20666e-14 )
# Transformasi log
aqi.log = log(aqi_o3$AQI)
o3.log = log(aqi_o3$O3)
# Uji stationer
model.log = lm(aqi.log~ o3.log, data = aqi_o3)
summary(model.log)
##
## Call:
## lm(formula = aqi.log ~ o3.log, data = aqi_o3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.020615 -0.015948 -0.005027 0.006839 0.157162
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.7941 0.1044 -7.606 9.69e-11 ***
## o3.log 1.0058 0.0259 38.829 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02579 on 70 degrees of freedom
## Multiple R-squared: 0.9556, Adjusted R-squared: 0.955
## F-statistic: 1508 on 1 and 70 DF, p-value: < 2.2e-16
# Uji Stationer
adf.aqi2 = adf.test(na.omit(aqi.log))
adf.o32 = adf.test(na.omit(o3.log))
adf.aqi2
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(aqi.log)
## Dickey-Fuller = -3.6232, Lag order = 4, p-value = 0.03763
## alternative hypothesis: stationary
adf.o32
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(o3.log)
## Dickey-Fuller = -3.2523, Lag order = 4, p-value = 0.08638
## alternative hypothesis: stationary
cat("Stasioner logAQI:", ifelse(adf.aqi2$p.value < 0.05, "✅ Stasioner", "❌ Tidak Stasioner"),
"(p =", adf.aqi2$p.value, ")\n")
## Stasioner logAQI: ✅ Stasioner (p = 0.03763491 )
cat("Stasioner logO3 :", ifelse(adf.o32$p.value < 0.05, "✅ Stasioner", "❌ Tidak Stasioner"),
"(p =", adf.o32$p.value, ")\n")
## Stasioner logO3 : ❌ Tidak Stasioner (p = 0.08637699 )
# Uji heteroskeditas
bp2 = bptest(model.log)
cat("Heteroskedastisitas:", ifelse(bp$p.value > 0.05,
"✅ Tidak ada heteroskedastisitas",
"❌ Ada heteroskedastisitas"),
"(p =", bp$p.value, ")\n")
## Heteroskedastisitas: ✅ Tidak ada heteroskedastisitas (p = 0.8419971 )
# Uji Autokorelasi
dw2 = dwtest(model.log)
cat("Autokorelasi:", ifelse(dw$p.value > 0.05,
"✅ Tidak ada autokorelasi",
"❌ Autokorelasi terdeteksi"),
"(p =", dw$p.value, ")\n")
## Autokorelasi: ❌ Autokorelasi terdeteksi (p = 3.20666e-14 )
# Uji Normalitas
shapiro = shapiro.test(residuals(model.log))
cat("Normalitas residual:", ifelse(shapiro$p.value > 0.05,
"✅ Residual berdistribusi normal",
"❌ Residual tidak normal"),
"(p =", shapiro$p.value, ")\n")
## Normalitas residual: ❌ Residual tidak normal (p = 9.870481e-14 )
# Install jika belum ada
# install.packages("orcutt")
library(orcutt)
# Model awal (sudah log transform)
model.log = lm(aqi.log ~ o3.log, data = aqi_o3)
# Terapkan Cochrane-Orcutt
co_model = cochrane.orcutt(model.log)
summary(co_model)
## Call:
## lm(formula = aqi.log ~ o3.log, data = aqi_o3)
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.814069 0.104399 -7.606 9.694e-11 ***
## o3.log 1.009692 0.025902 38.829 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.013 on 69 degrees of freedom
## Multiple R-squared: 0.9661 , Adjusted R-squared: 0.9656
## F-statistic: 1964.8 on 1 and 69 DF, p-value: < 1.959e-52
##
## Durbin-Watson statistic
## (original): 0.55318 , p-value: 4.741e-14
## (transformed): 2.68580 , p-value: 9.98e-01
# Uji lagi autokorelasi
library(lmtest)
dw_co = dwtest(co_model)
cat("Autokorelasi setelah Cochrane-Orcutt:",
ifelse(dw_co$p.value > 0.05, "✅ Tidak ada autokorelasi",
"❌ Autokorelasi masih ada"),
"(p =", dw_co$p.value, ")\n")
## Autokorelasi setelah Cochrane-Orcutt: ✅ Tidak ada autokorelasi (p = 0.9979682 )
library(MASS)
# Box-Cox untuk cari lambda terbaik
boxcox(model.log, lambda = seq(-2, 2, 0.1))
# Misal dari grafik, lambda ≈ 0 (log) atau 0.5 (sqrt)
# Kalau misal λ=0.5 → transformasi sqrt
aqi_o3$AQI_sqrt = sqrt(aqi_o3$AQI)
aqi_o3$O3_sqrt = sqrt(aqi_o3$O3)
model.sqrt = lm(AQI_sqrt ~ O3_sqrt, data = aqi_o3)
summary(model.sqrt)
##
## Call:
## lm(formula = AQI_sqrt ~ O3_sqrt, data = aqi_o3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.05094 -0.04087 -0.01315 0.01834 0.41477
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01249 0.13801 -0.091 0.928
## O3_sqrt 0.68188 0.01835 37.161 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06724 on 70 degrees of freedom
## Multiple R-squared: 0.9518, Adjusted R-squared: 0.9511
## F-statistic: 1381 on 1 and 70 DF, p-value: < 2.2e-16
# Cek normalitas residual lagi
shapiro = shapiro.test(residuals(model.sqrt))
cat("Normalitas residual setelah transformasi:",
ifelse(shapiro$p.value > 0.05,
"✅ Residual normal",
"❌ Residual tidak normal"),
"(p =", shapiro$p.value, ")\n")
## Normalitas residual setelah transformasi: ❌ Residual tidak normal (p = 6.510119e-14 )
# Differencing pada O3 log
o3.diff = diff(o3.log) # langsung diff vektor o3.log
# Uji ADF pada hasil differencing
library(tseries)
adf_o3_diff = adf.test(na.omit(o3.diff))
cat("Stasioner log(O3) differenced:",
ifelse(adf_o3_diff$p.value < 0.05,
"✅ Stasioner",
"❌ Tidak stasioner"),
"(p =", adf_o3_diff$p.value, ")\n")
## Stasioner log(O3) differenced: ❌ Tidak stasioner (p = 0.1443159 )
par(mfrow=c(2,2))
plot(model.log)
n = (aqi_o3)
n
## datetime AQI O3 hour AQI_sqrt O3_sqrt
## 1 2022-10-21 18:00:00 30.2 55.78995 18 5.495453 7.469267
## 2 2022-10-21 19:00:00 28.2 54.93164 19 5.310367 7.411588
## 3 2022-10-21 20:00:00 26.6 54.64554 20 5.157519 7.392262
## 4 2022-10-21 21:00:00 25.0 55.07469 21 5.000000 7.421232
## 5 2022-10-21 22:00:00 26.0 55.78995 22 5.099020 7.469267
## 6 2022-10-21 23:00:00 26.0 56.50520 23 5.099020 7.516994
## 7 2022-10-22 00:00:00 26.0 55.78995 00 5.099020 7.469267
## 8 2022-10-22 01:00:00 24.0 52.92892 01 4.898979 7.275227
## 9 2022-10-22 02:00:00 23.0 50.06790 02 4.795832 7.075867
## 10 2022-10-22 03:00:00 24.0 51.49841 03 4.898979 7.176239
## 11 2022-10-22 04:00:00 25.0 53.64418 04 5.000000 7.324219
## 12 2022-10-22 05:00:00 26.0 55.78995 05 5.099020 7.469267
## 13 2022-10-22 06:00:00 27.0 59.36623 06 5.196152 7.704948
## 14 2022-10-22 07:00:00 29.0 62.22725 07 5.385165 7.888425
## 15 2022-10-22 08:00:00 29.0 62.94250 08 5.385165 7.933631
## 16 2022-10-22 09:00:00 29.0 62.94250 09 5.385165 7.933631
## 17 2022-10-22 10:00:00 29.0 62.22725 10 5.385165 7.888425
## 18 2022-10-22 11:00:00 28.0 60.79674 11 5.291503 7.797226
## 19 2022-10-22 12:00:00 27.0 59.36623 12 5.196152 7.704948
## 20 2022-10-22 13:00:00 27.0 58.65097 13 5.196152 7.658392
## 21 2022-10-22 14:00:00 27.0 58.65097 14 5.196152 7.658392
## 22 2022-10-22 15:00:00 27.0 57.93572 15 5.196152 7.611551
## 23 2022-10-22 16:00:00 27.0 57.93572 16 5.196152 7.611551
## 24 2022-10-22 17:00:00 27.0 58.65097 17 5.196152 7.658392
## 25 2022-10-22 18:00:00 27.0 59.36623 18 5.196152 7.704948
## 26 2022-10-22 19:00:00 28.0 61.51199 19 5.291503 7.842958
## 27 2022-10-22 20:00:00 29.0 63.65776 20 5.385165 7.978581
## 28 2022-10-22 21:00:00 31.0 66.51878 21 5.567764 8.155905
## 29 2022-10-22 22:00:00 31.0 67.94930 22 5.567764 8.243136
## 30 2022-10-22 23:00:00 32.0 68.66455 23 5.656854 8.286408
## 31 2022-10-23 00:00:00 32.0 68.66455 00 5.656854 8.286408
## 32 2022-10-23 01:00:00 31.0 67.23404 01 5.567764 8.199637
## 33 2022-10-23 02:00:00 31.0 66.51878 02 5.567764 8.155905
## 34 2022-10-23 03:00:00 30.0 65.80353 03 5.477226 8.111937
## 35 2022-10-23 04:00:00 30.0 64.37302 04 5.477226 8.023280
## 36 2022-10-23 05:00:00 29.0 62.22725 05 5.385165 7.888425
## 37 2022-10-23 06:00:00 27.0 59.36623 06 5.196152 7.704948
## 38 2022-10-23 07:00:00 26.0 56.50520 07 5.099020 7.516994
## 39 2022-10-23 08:00:00 25.0 55.07469 08 5.000000 7.421232
## 40 2022-10-23 09:00:00 25.0 53.64418 09 5.000000 7.324219
## 41 2022-10-23 10:00:00 24.0 52.92892 10 4.898979 7.275227
## 42 2022-10-23 11:00:00 24.0 52.92892 11 4.898979 7.275227
## 43 2022-10-23 12:00:00 25.0 53.64418 12 5.000000 7.324219
## 44 2022-10-23 13:00:00 25.0 53.64418 13 5.000000 7.324219
## 45 2022-10-23 14:00:00 25.0 54.35944 14 5.000000 7.372885
## 46 2022-10-23 15:00:00 26.0 55.78995 15 5.099020 7.469267
## 47 2022-10-23 16:00:00 26.0 57.22046 16 5.099020 7.564421
## 48 2022-10-23 17:00:00 27.0 57.93572 17 5.196152 7.611551
## 49 2022-10-23 18:00:00 27.0 58.65097 18 5.196152 7.658392
## 50 2022-10-23 19:00:00 27.0 58.65097 19 5.196152 7.658392
## 51 2022-10-23 20:00:00 27.0 59.36623 20 5.196152 7.704948
## 52 2022-10-23 21:00:00 28.0 60.08148 21 5.291503 7.751225
## 53 2022-10-23 22:00:00 28.0 60.08148 22 5.291503 7.751225
## 54 2022-10-23 23:00:00 27.0 59.36623 23 5.196152 7.704948
## 55 2022-10-24 00:00:00 27.0 58.65097 00 5.196152 7.658392
## 56 2022-10-24 01:00:00 26.0 57.22046 01 5.099020 7.564421
## 57 2022-10-24 02:00:00 25.0 54.35944 02 5.000000 7.372885
## 58 2022-10-24 03:00:00 23.0 50.78316 03 4.795832 7.126230
## 59 2022-10-24 04:00:00 22.0 47.92213 04 4.690416 6.922581
## 60 2022-10-24 05:00:00 21.0 45.41874 05 4.582576 6.739343
## 61 2022-10-24 06:00:00 20.0 43.98823 06 4.472136 6.632362
## 62 2022-10-24 07:00:00 19.0 41.84246 07 4.358899 6.468575
## 63 2022-10-24 08:00:00 19.0 41.48483 08 4.358899 6.440872
## 64 2022-10-24 09:00:00 20.0 43.27297 09 4.472136 6.578220
## 65 2022-10-24 10:00:00 21.0 45.41874 10 4.582576 6.739343
## 66 2022-10-24 11:00:00 21.0 45.41874 11 4.582576 6.739343
## 67 2022-10-24 12:00:00 21.0 45.41874 12 4.582576 6.739343
## 68 2022-10-24 13:00:00 22.0 47.20688 13 4.690416 6.870726
## 69 2022-10-24 14:00:00 24.0 52.21367 14 4.898979 7.225903
## 70 2022-10-24 15:00:00 26.0 56.50520 15 5.099020 7.516994
## 71 2022-10-24 16:00:00 27.0 59.36623 16 5.196152 7.704948
## 72 2022-10-24 17:00:00 28.0 60.79674 17 5.291503 7.797226
library(dynlm)
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lmtest)
library(Metrics)
##
## Attaching package: 'Metrics'
## The following objects are masked from 'package:performance':
##
## mae, mse, rmse
## The following object is masked from 'package:forecast':
##
## accuracy
# Buat dataframe log
df_log = data.frame(
AQI_log = log(aqi_o3$AQI),
O3_log = log(aqi_o3$O3)
)
# split data
n = nrow(df_log)
train_n = floor(0.7 * n)
train = df_log[1:train_n, ]
test = df_log[(train_n+1):n, ]
train_ts <- ts(train, frequency = 12)
test_ts <- ts(test, frequency = 12)
Proporsi 70:30 dipilih karena jumlah data relatif kecil (72 observasi), sehingga 30% uji (22 data) memberikan evaluasi lebih reliabel dibanding hanya 20% uji (14 data). Ini menjaga keseimbangan antara data latih yang cukup untuk membangun model dan data uji yang cukup untuk mengukur kinerja model.
# Data time series
train_ts = ts(train)
test. = ts(test)
data.ts = ts(df_log)
# MODEL KOYCK
model.koyck = koyckDlm(x = train$O3_log, y = train$AQI_log)
summary(model.koyck)
##
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0473770 -0.0098381 0.0005975 0.0094096 0.0374549
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.33478 0.16279 -2.056 0.0454 *
## Y.1 0.42859 0.08665 4.946 1.05e-05 ***
## X.t 0.54519 0.09402 5.798 5.80e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01946 on 46 degrees of freedom
## Multiple R-Squared: 0.9462, Adjusted R-squared: 0.9438
## Wald test: 368.5 on 2 and 46 DF, p-value: < 2.2e-16
##
## Diagnostic tests:
## NULL
##
## alpha beta phi
## Geometric coefficients: -0.5858872 0.5451889 0.4285894
AIC(model.koyck)
## [1] -242.1171
BIC(model.koyck)
## [1] -234.5498
hasil summary didapatkan Y_duga_t = -0.33478 + 0.54519Xt + 0.42859Yt
Koefisien Xt (0.54519) signifikan, artinya X berpengaruh terhaadp U
Koefisien Yt-1 (0.42859) signifikan, ini merupakan estimasi untuk λ. Nilai ini menunjukkan sekitar 42% dari efek di periode sebelumnya masih “terbawa” saat ini
# Peramalan y untuk 5 periode kedepan dengan menggunakan model koyck
library(dLagM)
fore.koyck = dLagM::forecast(model = model.koyck, x = train$O3_log, h = 5) # Membandingkan dengan data testing
fore.koyck
## $forecasts
## [1] 3.270307 3.250912 3.239753 3.239235 3.246048
##
## $call
## forecast.koyckDlm(model = model.koyck, x = train$O3_log, h = 5)
##
## attr(,"class")
## [1] "forecast.koyckDlm" "dLagM"
# Cek nilai MAPE
mape.koyck = MAPE(model.koyck)
mape.koyck #data test
## MAPE
## model.koyck 0.004403018
# Akurasi ata training
GoF(model.koyck)
## n MAE MPE MAPE sMAPE MASE
## model.koyck 49 0.01445862 -4.912283e-05 0.004403018 0.004400766 0.597827
## MSE MRAE GMRAE
## model.koyck 0.0003553854 21602125 1840.982
model.dlm = dlm(x = train$O3_log, y = train$AQI_log, q = 1)
summary(model.dlm)
##
## Call:
## lm(formula = model.formula, data = design)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.021971 -0.012155 -0.002503 0.007252 0.102908
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.888e-01 1.487e-01 -4.631 2.99e-05 ***
## x.t 9.799e-01 1.119e-01 8.758 2.32e-11 ***
## x.1 -4.518e-05 1.115e-01 0.000 1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02033 on 46 degrees of freedom
## Multiple R-squared: 0.9412, Adjusted R-squared: 0.9387
## F-statistic: 368.3 on 2 and 46 DF, p-value: < 2.2e-16
##
## AIC and BIC values for the model:
## AIC BIC
## 1 -237.8021 -230.2349
model.dlm = dlm(x = train$O3_log, y = train$AQI_log, q = 2)
summary(model.dlm)
##
## Call:
## lm(formula = model.formula, data = design)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.01770 -0.01256 -0.00142 0.00917 0.05349
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.767985 0.110632 -6.942 1.39e-08 ***
## x.t 1.035537 0.123678 8.373 1.18e-10 ***
## x.1 -0.028351 0.221844 -0.128 0.899
## x.2 -0.008469 0.122504 -0.069 0.945
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01348 on 44 degrees of freedom
## Multiple R-squared: 0.9752, Adjusted R-squared: 0.9735
## F-statistic: 575.7 on 3 and 44 DF, p-value: < 2.2e-16
##
## AIC and BIC values for the model:
## AIC BIC
## 1 -271.3783 -262.0223
AIC(model.dlm)
## [1] -271.3783
BIC(model.dlm)
## [1] -262.0223
fore.dlm = dLagM::forecast(model = model.dlm, x = train$O3_log, h = 5) # Membandingkan dengan data testing
fore.dlm
## $forecasts
## [1] 3.246604 3.231967 3.227422 3.235802 3.248987
##
## $call
## forecast.dlm(model = model.dlm, x = train$O3_log, h = 5)
##
## attr(,"class")
## [1] "forecast.dlm" "dLagM"
mape.dlm = MAPE(model.dlm)
mape.dlm
## MAPE
## model.dlm 0.003059721
kita dapat meminta R untuk mencari nilai yang optimal secara otomatis berdasarkan kriteria informasi seperti AIC. AIC menyeimbangkan antara kecocokan model dengan kompleksitasnya.
train_sub <- train[, c("AQI_log", "O3_log")]
# Penentuan Lag Optimum
finiteDLMauto(formula = AQI_log ~ O3_log,
data = train_sub, q.min = 1, q.max = 10,
model.type = "dlm", error.type = "AIC", trace = TRUE)
## q - k MASE AIC BIC GMRAE MBRAE R.Adj.Sq Ljung-Box
## 3 3 0.39444 -284.7217 -273.6208 1836.352 0.48939 0.98302 0.14598099
## 4 4 0.39409 -277.6753 -264.8748 2324.129 0.49314 0.98304 0.21155323
## 5 5 0.37104 -272.5562 -258.1029 1977.364 0.48976 0.98401 0.21808396
## 2 2 0.42909 -271.3783 -262.0223 1705.440 0.48224 0.97346 0.09693428
## 6 6 0.36809 -263.2521 -247.1944 1438.333 0.47749 0.98343 0.23443482
## 7 7 0.38346 -255.7600 -238.1480 1753.463 0.48394 0.98349 0.38846689
## 8 8 0.39442 -249.9787 -230.8643 2618.044 0.50855 0.98342 0.48254949
## 10 10 0.35467 -244.0683 -222.1129 3527.973 0.49992 0.98439 0.37850557
## 9 9 0.38981 -242.9655 -222.4027 3023.208 0.50479 0.98187 0.96297827
## 1 1 0.51444 -237.8022 -230.2349 1748.893 0.36813 0.93866 0.14745321
model.dlm2 = dlm(x = train$O3_log, y = train$AQI_log, q = 10)
summary(model.dlm2)
##
## Call:
## lm(formula = model.formula, data = design)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.015661 -0.007345 0.002012 0.006617 0.016341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.72913 0.15931 -4.577 8.81e-05 ***
## x.t 0.82878 0.16262 5.097 2.13e-05 ***
## x.1 0.42295 0.32872 1.287 0.20875
## x.2 -0.19529 0.28371 -0.688 0.49690
## x.3 -0.23122 0.25681 -0.900 0.37560
## x.4 0.23753 0.26776 0.887 0.38260
## x.5 0.04378 0.26524 0.165 0.87008
## x.6 -0.34406 0.26369 -1.305 0.20259
## x.7 0.63702 0.26096 2.441 0.02122 *
## x.8 -0.81915 0.26185 -3.128 0.00408 **
## x.9 0.63513 0.22146 2.868 0.00777 **
## x.10 -0.22661 0.10327 -2.194 0.03666 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.009888 on 28 degrees of freedom
## Multiple R-squared: 0.9888, Adjusted R-squared: 0.9844
## F-statistic: 224.6 on 11 and 28 DF, p-value: < 2.2e-16
##
## AIC and BIC values for the model:
## AIC BIC
## 1 -244.0683 -222.1129
AIC(model.dlm2)
## [1] -244.0683
BIC(model.dlm2)
## [1] -222.1129
Yt^= -0.7913 + 0.82878Xt + … -0.22661Xt−10 adalah model akhir yang dihasilkan dari q = 10 dan dari model ini didapatkan beberapa peubah yang berpengaruh signifikan pada taraf nyata 5% yairu Xt, Xt-7, Xt-8, Xt-9, Xt-10.
# peramalan 5 model kedepan dan akurasi
fore.dlm2 = dLagM::forecast(model = model.dlm2, x = test$O3_log, h = 5)
fore.dlm2
## $forecasts
## [1] 3.302422 3.326722 3.326823 3.311281 3.292729
##
## $call
## forecast.dlm(model = model.dlm2, x = test$O3_log, h = 5)
##
## attr(,"class")
## [1] "forecast.dlm" "dLagM"
# Akurasi data training
GoF(model.dlm2)
## n MAE MPE MAPE sMAPE MASE
## model.dlm2 40 0.007232035 -5.998011e-06 0.002185186 0.002184896 0.3546706
## MSE MRAE GMRAE
## model.dlm2 6.843785e-05 36070938 3527.973
# MAPE
mape.dlm2 = MAPE(model.dlm2)
mape.dlm2
## MAPE
## model.dlm2 0.002185186
model.ardl = dLagM::ardlDlm(x = train$O3_log, y = train$AQI_log, p = 1, q = 1)
summary(model.ardl)
##
## Time series regression with "ts" data:
## Start = 2, End = 50
##
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.041268 -0.007823 0.003968 0.011199 0.032492
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.42361 0.11442 -3.702 0.000581 ***
## X.t 1.03217 0.08099 12.744 < 2e-16 ***
## X.1 -0.49927 0.11028 -4.527 4.36e-05 ***
## Y.1 0.47048 0.07123 6.605 3.94e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01465 on 45 degrees of freedom
## Multiple R-squared: 0.9702, Adjusted R-squared: 0.9682
## F-statistic: 487.5 on 3 and 45 DF, p-value: < 2.2e-16
AIC(model.ardl)
## [1] -269.0102
BIC(model.ardl)
## [1] -259.5511
# Koefisien hasil model
alpha <- -0.42361
beta0 <- 1.03217 # X_t
beta1 <- -0.49927 # X_(t-1)
phi1 <- 0.47048 # Y_(t-1)
# Simpan hasil forecast
h <- 5
y_fore <- numeric(h)
# Ambil nilai terakhir dari data training
last_y <- tail(train$AQI_log, 1) # Y_(t)
last_y1 <- tail(train$AQI_log, 2)[1] # Y_(t-1)
last_x <- tail(train$O3_log, 1) # X_(t)
last_x1 <- tail(train$O3_log, 2)[1] # X_(t-1)
# Forecast 5 periode ke depan (pakai test$O3_log sebagai X masa depan)
for (i in 1:h) {
x_now <- test$O3_log[i] # X_t (baru)
x_lag <- if (i == 1) last_x else test$O3_log[i-1] # X_(t-1)
y_fore[i] <- alpha + beta0*x_now + beta1*x_lag + phi1*last_y
# update Y untuk periode berikutnya
last_y <- y_fore[i]
}
# Hasil forecast
y_fore
## [1] 3.309284 3.321921 3.321887 3.309509 3.297154
fore.ardl <- list(forecasts = y_fore)
fore.ardl$forecasts
## [1] 3.309284 3.321921 3.321887 3.309509 3.297154
# Hitung AIC dan BIC manual
resid <- residuals(model.ardl)
## Time Series:
## Start = 2
## End = 50
## Frequency = 1
## 2 3 4 5 6
## 0.0324924097 0.0039678287 -0.0412681389 0.0177257721 -0.0074332395
## 7 8 9 10 11
## 0.0120757785 -0.0199896939 0.0061833290 0.0119448160 0.0046729893
## 12 13 14 15 16
## 0.0045864327 -0.0206747415 0.0154668149 -0.0064500604 -0.0007441003
## 17 18 19 20 21
## 0.0110522695 -0.0057399152 -0.0126326046 0.0051009807 -0.0009508119
## 22 23 24 25 26
## 0.0117140235 0.0055879826 -0.0070768528 -0.0134621479 -0.0076917244
## 27 28 29 30 31
## -0.0073754431 0.0145482155 -0.0168411932 0.0147224522 0.0050134066
## 32 33 34 35 36
## -0.0050045704 0.0104606331 -0.0165103845 0.0162049563 0.0063223661
## 37 38 39 40 41
## -0.0175308608 0.0058311467 -0.0138263566 0.0189876035 -0.0211188764
## 42 43 44 45 46
## -0.0086147538 0.0183523866 0.0058482640 -0.0078230958 0.0111993343
## 47 48 49 50
## -0.0204167316 0.0171417700 -0.0070768528 -0.0009508119
n <- length(resid)
SSR <- sum(resid^2)
k <- length(coef(model.ardl)) # jumlah parameter
## (Intercept) X.t X.1 Y.1
## -0.4236062 1.0321722 -0.4992666 0.4704763
AIC_manual <- n * log(SSR/n) + 2*k
BIC_manual <- n * log(SSR/n) + k*log(n)
AIC_manual
## [1] -410.0662
BIC_manual
## [1] -402.4989
# Cek MAPE
# Ambil aktual 5 periode dari test set
y_actual <- test$AQI_log[1:5]
# Prediksi dari hasil manual forecast
y_pred <- y_fore
# Hitung MAPE
mape.ardl <- mean(abs((y_actual - y_pred) / y_actual)) * 100
mape.ardl
## [1] 0.296213
model.ardl.opt = ardlBoundOrders(data = train, ic = "AIC",
formula = AQI_log ~ O3_log)
model.ardl.opt
## $p
## O3_log
## 1 15
##
## $q
## [1] 15
##
## $Stat.table
## q = 1 q = 2 q = 3 q = 4 q = 5 q = 6 q = 7
## p = 1 -275.7390 -284.7719 -275.9041 -267.0183 -261.0469 -253.7952 -246.0026
## p = 2 -285.0219 -283.0314 -273.9358 -265.5602 -260.1754 -253.8573 -246.2720
## p = 3 -273.8276 -273.8276 -271.9382 -263.5603 -258.1755 -255.7508 -247.2979
## p = 4 -268.2262 -266.7780 -266.7780 -267.7959 -258.7916 -253.7901 -245.9096
## p = 5 -260.0049 -258.9449 -258.4394 -258.4394 -257.0981 -252.6690 -243.9543
## p = 6 -253.1012 -251.6493 -254.5898 -252.6202 -252.6202 -250.7113 -241.9989
## p = 7 -245.0969 -244.6364 -247.7682 -246.8492 -245.6697 -245.6697 -248.6396
## p = 8 -243.5679 -242.7303 -242.5931 -241.5319 -240.8883 -240.3514 -240.3514
## p = 9 -238.7282 -237.4394 -238.4595 -236.5916 -235.9478 -236.8380 -234.9054
## p = 10 -231.0538 -229.6288 -231.5062 -230.0122 -233.3532 -234.7091 -233.0446
## p = 11 -223.0758 -223.0994 -223.8181 -221.8228 -224.8143 -226.7739 -224.8411
## p = 12 -214.8635 -214.0487 -214.3586 -213.0192 -215.7648 -218.2223 -216.4459
## p = 13 -221.3814 -222.6847 -220.7045 -223.3387 -230.2926 -228.4019 -230.0788
## p = 14 -216.0280 -216.3867 -214.5065 -213.0725 -216.1957 -217.0531 -221.2528
## p = 15 -210.3730 -212.0891 -211.7275 -211.1672 -210.8344 -215.9220 -219.4113
## q = 8 q = 9 q = 10 q = 11 q = 12 q = 13 q = 14
## p = 1 -236.6298 -229.6171 -224.7879 -217.9685 -209.7507 -201.3572 -200.9019
## p = 2 -237.1516 -229.3079 -222.8042 -216.9680 -208.3625 -199.9496 -199.0294
## p = 3 -238.1459 -231.7460 -225.2639 -216.4849 -207.4047 -200.1997 -198.6616
## p = 4 -236.9393 -229.9563 -223.4111 -214.5347 -205.4499 -200.9169 -197.4795
## p = 5 -235.4326 -228.6579 -223.4742 -216.4748 -207.2902 -200.3631 -195.5100
## p = 6 -233.5833 -230.1508 -225.9635 -222.8015 -214.2202 -205.6979 -202.7809
## p = 7 -242.2727 -233.2961 -225.0926 -221.3101 -212.4654 -203.7303 -202.2846
## p = 8 -240.2829 -231.8621 -224.7498 -238.6583 -228.5829 -228.1909 -217.7993
## p = 9 -234.9054 -233.1397 -224.4303 -237.4801 -235.2153 -239.9404 -232.0455
## p = 10 -231.6375 -231.6375 -237.6965 -252.2616 -244.1164 -238.1409 -231.8288
## p = 11 -229.9796 -248.4595 -248.4595 -252.1382 -246.2793 -239.3288 -230.1098
## p = 12 -229.1448 -247.7917 -246.4877 -246.4877 -257.4849 -272.0086 -260.8402
## p = 13 -239.0941 -248.9423 -259.5639 -261.6608 -261.6608 -270.5157 -260.8955
## p = 14 -221.9679 -234.9639 -238.0353 -261.8054 -272.1681 -272.1681 -271.2612
## p = 15 -217.6369 -229.8101 -255.9484 -254.9184 -253.2774 -404.8238 -404.8238
## q = 15
## p = 1 -210.0048
## p = 2 -209.3416
## p = 3 -209.6315
## p = 4 -207.7036
## p = 5 -206.3965
## p = 6 -217.0534
## p = 7 -235.3812
## p = 8 -233.7247
## p = 9 -249.1436
## p = 10 -273.5476
## p = 11 -272.8086
## p = 12 -302.5317
## p = 13 -317.1075
## p = 14 -315.2294
## p = 15 NA
##
## $min.Stat
## [1] -404.8238
min_p=c()
for(i in 1:10){
min_p[i]=min(model.ardl.opt$Stat.table[[i]])
}
q_opt=which(min_p==min(min_p, na.rm = TRUE))
p_opt=which(model.ardl.opt$Stat.table[[q_opt]] ==
min(model.ardl.opt$Stat.table[[q_opt]], na.rm = TRUE))
data.frame("q_optimum" = q_opt, "p_optimum" = p_opt,
"AIC"=model.ardl.opt$min.Stat)
## q_optimum p_optimum AIC
## 1 1 2 -404.8238
model.ardl2 <- ardlDlm(formula = AQI_log ~ O3_log,
data = train, p = 10, q = 1)
summary(model.ardl2)
##
## Time series regression with "ts" data:
## Start = 11, End = 50
##
## Call:
## dynlm(formula = as.formula(model.text), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.015614 -0.007399 0.002319 0.006541 0.016456
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.74869 0.20089 -3.727 0.000907 ***
## O3_log.t 0.82459 0.16746 4.924 3.73e-05 ***
## O3_log.1 0.45865 0.39847 1.151 0.259814
## O3_log.2 -0.19868 0.28950 -0.686 0.498387
## O3_log.3 -0.22840 0.26195 -0.872 0.390945
## O3_log.4 0.22922 0.27715 0.827 0.415461
## O3_log.5 0.05235 0.27492 0.190 0.850403
## O3_log.6 -0.34505 0.26846 -1.285 0.209617
## O3_log.7 0.62911 0.26991 2.331 0.027480 *
## O3_log.8 -0.80583 0.27848 -2.894 0.007445 **
## O3_log.9 0.62265 0.23777 2.619 0.014297 *
## O3_log.10 -0.22199 0.10877 -2.041 0.051145 .
## AQI_log.1 -0.02835 0.17187 -0.165 0.870205
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01006 on 27 degrees of freedom
## Multiple R-squared: 0.9888, Adjusted R-squared: 0.9838
## F-statistic: 198.7 on 12 and 27 DF, p-value: < 2.2e-16
# Cek AIC
AIC(model.ardl2)
## [1] -242.1086
Berdasarkan hasil di atas terlihat bahwa nilai AIC terendah yaitu -404.8238 didapat ketika nilai optimum q =1 dan p = 2
akurasi <- matrix(c(mape.koyck,
mape.dlm,
mape.dlm2,
mape.ardl))
row.names(akurasi)<- c("Koyck",
"DLM 1",
"DLM 2",
"Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi
## MAPE
## Koyck 0.004403018
## DLM 1 0.003059721
## DLM 2 0.002185186
## Autoregressive 0.296213
MAPE terbaik adalah model DLM 2 karena memiliki nilai yang paling kecil.
# Ambil subset aktual (5 titik pertama test set)
y_actual <- test$AQI_log[1:5]
x_actual <- test$O3_log[1:5]
# Forecast dari semua model
y_koyck <- fore.koyck$forecasts
y_dlm1 <- fore.dlm$forecasts
y_dlm2 <- fore.dlm2$forecasts
y_ardl <- fore.ardl$forecasts
# Plot aktual
plot(x_actual, y_actual, type="b", col="black", ylim=c(min(y_actual, y_koyck, y_dlm1, y_dlm2, y_ardl)-0.1,
max(y_actual, y_koyck, y_dlm1, y_dlm2, y_ardl)+0.1),
xlab="O3_log", ylab="AQI_log", main="Aktual vs Forecast (5 Periode Test)")
# Tambahkan hasil forecast
lines(x_actual, y_koyck, col="red", type="b", pch=19)
lines(x_actual, y_dlm1, col="blue", type="b", pch=19)
lines(x_actual, y_dlm2, col="orange", type="b", pch=19)
lines(x_actual, y_ardl, col="green", type="b", pch=19)
# Legend
legend("topleft", c("Aktual", "Koyck", "DLM1", "DLM2", "ARDL"),
col=c("black","red","blue","orange","green"), lty=1, pch=19, cex=0.8)
Berdasarkan data plot dari perbandingan model, model DLM terlihat cenderung mirip dengan data aktual. Sehingga dapat disimpulkan bahwa DLM adalah model terbaik karena juga memiliki nilai MAPE terkecil.