library(dLagM)
## Warning: package 'dLagM' was built under R version 4.4.3
## Loading required package: nardl
## Warning: package 'nardl' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Loading required package: dynlm
## Warning: package 'dynlm' was built under R version 4.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(dynlm)
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.4.3
## 
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:dLagM':
## 
##     MAPE
## The following object is masked from 'package:base':
## 
##     Recall
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.2
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
#library read file csv
library(readr)
## Warning: package 'readr' was built under R version 4.4.2
data <- read_csv("C:/Users/LENOVO/Downloads/NewDelhi_Air_quality.csv")
## New names:
## Rows: 72 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): datetime dbl (9): ...1, AQI, CO, no2, o3, pm10, pm25, so2, ts dttm (2):
## timestamp_local, timestamp_utc
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
data
## # A tibble: 72 × 12
##     ...1   AQI    CO datetime    no2    o3  pm10  pm25   so2 timestamp_local    
##    <dbl> <dbl> <dbl> <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dttm>             
##  1     0  30.2  199. 2022-10… 0.0469  55.8 10.5   5.64 0.387 2022-10-21 23:00:00
##  2     1  28.2  198. 2022-10… 0.0465  54.9 10.7   4.62 0.410 2022-10-22 00:00:00
##  3     2  26.6  199. 2022-10… 0.0469  54.6 11.2   3.52 0.402 2022-10-22 01:00:00
##  4     3  25    202. 2022-10… 0.0482  55.1 11.1   2.23 0.376 2022-10-22 02:00:00
##  5     4  26    205. 2022-10… 0.0489  55.8 10.4   1.98 0.339 2022-10-22 03:00:00
##  6     5  26    209. 2022-10… 0.0502  56.5  9.40  1.79 0.313 2022-10-22 04:00:00
##  7     6  26    209. 2022-10… 0.0502  55.8  8.48  1.75 0.313 2022-10-22 05:00:00
##  8     7  24    205. 2022-10… 0.0502  52.9  7.59  1.86 0.343 2022-10-22 06:00:00
##  9     8  23    200. 2022-10… 0.0469  50.1  7.64  2.21 0.395 2022-10-22 07:00:00
## 10     9  24    197. 2022-10… 0.0442  51.5  8.49  2.48 0.428 2022-10-22 08:00:00
## # ℹ 62 more rows
## # ℹ 2 more variables: timestamp_utc <dttm>, ts <dbl>
#eksplorasi data
str(data)
## spc_tbl_ [72 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ...1           : num [1:72] 0 1 2 3 4 5 6 7 8 9 ...
##  $ AQI            : num [1:72] 30.2 28.2 26.6 25 26 26 26 24 23 24 ...
##  $ CO             : num [1:72] 199 198 199 202 205 ...
##  $ datetime       : chr [1:72] "2022-10-21:18" "2022-10-21:19" "2022-10-21:20" "2022-10-21:21" ...
##  $ no2            : num [1:72] 0.0469 0.0465 0.0469 0.0482 0.0489 ...
##  $ o3             : num [1:72] 55.8 54.9 54.6 55.1 55.8 ...
##  $ pm10           : num [1:72] 10.5 10.7 11.2 11.1 10.4 ...
##  $ pm25           : num [1:72] 5.64 4.62 3.52 2.23 1.98 ...
##  $ so2            : num [1:72] 0.387 0.41 0.402 0.376 0.339 ...
##  $ timestamp_local: POSIXct[1:72], format: "2022-10-21 23:00:00" "2022-10-22 00:00:00" ...
##  $ timestamp_utc  : POSIXct[1:72], format: "2022-10-21 18:00:00" "2022-10-21 19:00:00" ...
##  $ ts             : num [1:72] 1.67e+09 1.67e+09 1.67e+09 1.67e+09 1.67e+09 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   ...1 = col_double(),
##   ..   AQI = col_double(),
##   ..   CO = col_double(),
##   ..   datetime = col_character(),
##   ..   no2 = col_double(),
##   ..   o3 = col_double(),
##   ..   pm10 = col_double(),
##   ..   pm25 = col_double(),
##   ..   so2 = col_double(),
##   ..   timestamp_local = col_datetime(format = ""),
##   ..   timestamp_utc = col_datetime(format = ""),
##   ..   ts = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(data)
##       ...1            AQI              CO          datetime        
##  Min.   : 0.00   Min.   :19.00   Min.   :191.9   Length:72         
##  1st Qu.:17.75   1st Qu.:25.00   1st Qu.:197.4   Class :character  
##  Median :35.50   Median :27.00   Median :200.3   Mode  :character  
##  Mean   :35.50   Mean   :26.18   Mean   :200.1                     
##  3rd Qu.:53.25   3rd Qu.:28.00   3rd Qu.:203.6                     
##  Max.   :71.00   Max.   :32.00   Max.   :208.6                     
##       no2                o3             pm10             pm25      
##  Min.   :0.01556   Min.   :41.48   Min.   : 6.687   Min.   :1.633  
##  1st Qu.:0.03799   1st Qu.:53.64   1st Qu.: 7.865   1st Qu.:1.829  
##  Median :0.04552   Median :57.22   Median : 8.914   Median :2.202  
##  Mean   :0.04200   Mean   :56.57   Mean   : 9.115   Mean   :2.295  
##  3rd Qu.:0.05020   3rd Qu.:60.08   3rd Qu.:10.275   3rd Qu.:2.522  
##  Max.   :0.06091   Max.   :68.66   Max.   :12.846   Max.   :5.637  
##       so2         timestamp_local               timestamp_utc                
##  Min.   :0.2831   Min.   :2022-10-21 23:00:00   Min.   :2022-10-21 18:00:00  
##  1st Qu.:0.3204   1st Qu.:2022-10-22 16:45:00   1st Qu.:2022-10-22 11:45:00  
##  Median :0.3725   Median :2022-10-23 10:30:00   Median :2022-10-23 05:30:00  
##  Mean   :0.3634   Mean   :2022-10-23 10:30:00   Mean   :2022-10-23 05:30:00  
##  3rd Qu.:0.3958   3rd Qu.:2022-10-24 04:15:00   3rd Qu.:2022-10-23 23:15:00  
##  Max.   :0.4545   Max.   :2022-10-24 22:00:00   Max.   :2022-10-24 17:00:00  
##        ts           
##  Min.   :1.666e+09  
##  1st Qu.:1.666e+09  
##  Median :1.667e+09  
##  Mean   :1.667e+09  
##  3rd Qu.:1.667e+09  
##  Max.   :1.667e+09
#Membuat matriks korelasi
cor_matrix <- cor(data[, c("AQI", "CO", "no2", "o3", "pm10", "pm25", "so2")], use = "complete.obs")
cor_matrix
##              AQI          CO        no2          o3        pm10       pm25
## AQI   1.00000000  0.79763320  0.7376547  0.97359901  0.08257568 -0.3466552
## CO    0.79763320  1.00000000  0.6520228  0.82919809 -0.09598578 -0.5829151
## no2   0.73765472  0.65202279  1.0000000  0.73204188  0.25652368 -0.3588091
## o3    0.97359901  0.82919809  0.7320419  1.00000000  0.03381079 -0.5219779
## pm10  0.08257568 -0.09598578  0.2565237  0.03381079  1.00000000  0.2684827
## pm25 -0.34665524 -0.58291511 -0.3588091 -0.52197786  0.26848271  1.0000000
## so2  -0.74247813 -0.79872002 -0.5859130 -0.78562485 -0.04779446  0.6743428
##              so2
## AQI  -0.74247813
## CO   -0.79872002
## no2  -0.58591304
## o3   -0.78562485
## pm10 -0.04779446
## pm25  0.67434283
## so2   1.00000000
#Visualisasi matriks korelasi
corrplot(cor_matrix, method = "circle", type = "upper", tl.col = "black", tl.srt = 45)

##Interpretasi matriks korelasi Dari matriks korelasi diatas, dapat dilihat bahwa terdapat korelasi yang cukup kuat antara beberapa variabel. Misalnya, AQI memiliki korelasi positif yang cukup kuat dengan CO (0.798) dan O3 (0.974), yang menunjukkan bahwa peningkatan konsentrasi CO dan O3 cenderung diikuti oleh peningkatan AQI. Sehingga variabel yang dapat digunakan sebagai variabel untuk menjadi regresi lag adalah CO dan O3 karena memiliki korelasi yang cukup kuat dengan AQI.

#menghapus data selain AQI dan o3
data <- data[, c("AQI", "o3")]
colnames(data) <- c("Yt", "Xt")
data
## # A tibble: 72 × 2
##       Yt    Xt
##    <dbl> <dbl>
##  1  30.2  55.8
##  2  28.2  54.9
##  3  26.6  54.6
##  4  25    55.1
##  5  26    55.8
##  6  26    56.5
##  7  26    55.8
##  8  24    52.9
##  9  23    50.1
## 10  24    51.5
## # ℹ 62 more rows

##Pembagian Data

#Split Data
train <- data[1:58,]
test <- data[59:72,]
#data time series
train.ts<-ts(train)
test.ts<-ts(test)
data.ts<-ts(data)

##Model koyck

#MODEL KOYCK
model.koyck <- koyckDlm(x = train$Xt, y = train$Yt)
summary(model.koyck)
## 
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.112282 -0.366467 -0.002877  0.258311  1.106529 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.26260    0.93950   0.280    0.781    
## Y.1          0.42943    0.08641   4.969 7.14e-06 ***
## X.t          0.25823    0.04259   6.064 1.35e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5239 on 54 degrees of freedom
## Multiple R-Squared: 0.9449,  Adjusted R-squared: 0.9429 
## Wald test: 417.1 on 2 and 54 DF,  p-value: < 2.2e-16 
## 
## Diagnostic tests:
## NULL
## 
##                             alpha      beta       phi
## Geometric coefficients:  0.460243 0.2582336 0.4294305
AIC(model.koyck)
## [1] 92.98106
BIC(model.koyck)
## [1] 101.1533

##Interpretasi Model Koyck Dari hasil model Koyck di atas, dapat dilihat bahwa koefisien regresi untuk variabel Xt (O3) adalah positif dan signifikan, yang menunjukkan bahwa peningkatan konsentrasi O3 cenderung diikuti oleh peningkatan AQI. Koefisien autokorelasi (rho) juga positif, menunjukkan adanya efek lag pada hubungan antara O3 dan AQI. Nilai AIC dan BIC yang relatif rendah menunjukkan bahwa model ini memiliki kecocokan yang baik dengan data pelatihan.

fore.koyck <- forecast(model = model.koyck, x=test$Xt, h=14)
fore.koyck
## $forecasts
##  [1] 22.51461 21.65971 20.92318 20.05278 19.58666 19.84824 20.51469 20.80088
##  [9] 20.92378 21.43831 22.95219 24.71052 26.20441 27.21533
## 
## $call
## forecast.koyckDlm(model = model.koyck, x = test$Xt, h = 14)
## 
## attr(,"class")
## [1] "forecast.koyckDlm" "dLagM"
mape.koyck <- MAPE(fore.koyck$forecasts, test$Yt)
mape.koyck #data test
## [1] 0.02909543
#akurasi data training
GoF(model.koyck)
##              n     MAE           MPE       MAPE      sMAPE      MASE      MSE
## model.koyck 57 0.39765 -0.0005611615 0.01494044 0.01491151 0.5986129 0.260024
##                  MRAE    GMRAE
## model.koyck 545222624 5097.033

##Regresi dengan Distributed Lag

model.dlm <- dlm(x = train$Xt,y = train$Yt , q = 2)
summary(model.dlm)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48041 -0.34171 -0.01844  0.25829  1.41892 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.170522   0.676995  -0.252    0.802    
## x.t          0.487583   0.052341   9.315 1.13e-12 ***
## x.1         -0.017540   0.098118  -0.179    0.859    
## x.2         -0.005899   0.055262  -0.107    0.915    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3496 on 52 degrees of freedom
## Multiple R-squared:  0.9763, Adjusted R-squared:  0.9749 
## F-statistic: 713.6 on 3 and 52 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##        AIC     BIC
## 1 47.04815 57.1749
AIC(model.dlm)
## [1] 47.04815
BIC(model.dlm)
## [1] 57.1749

Dari hasil model Distributed Lag di atas, dapat dilihat bahwa koefisien regresi untuk variabel Xt (O3) pada lag 0, lag 1, dan lag 2 semuanya positif dan signifikan, serta nilai P-Value dari intercept dan \(x_{t-1}<0.05\) yang intercept dan \(x_{t-1}\) berpengaruh signifikan terhadap AQI. Nilai AIC dan BIC yang relatif rendah menunjukkan bahwa model ini memiliki kecocokan yang baik dengan data pelatihan.

Peramalan dan Akurasi

fore.dlm <- forecast(model = model.dlm, x=test$Xt, h=14)
fore.dlm
## $forecasts
##  [1] 21.98407 20.83473 20.19803 19.19165 19.06335 19.95415 20.97114 20.92295
##  [9] 20.91029 21.78216 24.19202 26.18613 27.47631 28.09830
## 
## $call
## forecast.dlm(model = model.dlm, x = test$Xt, h = 14)
## 
## attr(,"class")
## [1] "forecast.dlm" "dLagM"
mape.dlm <- MAPE(fore.dlm$forecasts, test$Yt)
mape.dlm
## [1] 0.006409836
#akurasi data training
GoF(model.dlm)
##            n       MAE          MPE        MAPE       sMAPE      MASE       MSE
## model.dlm 56 0.2659115 -0.000153498 0.009883436 0.009895147 0.4108184 0.1134602
##                 MRAE   GMRAE
## model.dlm 1020290187 5489.74

###Lag Optimum

#penentuan lag optimum 
finiteDLMauto(formula = Yt ~ Xt,
              data = data.frame(train), q.min = 1, q.max = 20,
              model.type = "dlm", error.type = "AIC", trace = TRUE)
##    q - k    MASE      AIC      BIC     GMRAE   MBRAE R.Adj.Sq   Ljung-Box
## 20    20 0.19768  7.37876 45.04324  5794.456 0.49242  0.99072 0.004907684
## 19    19 0.22518 11.63823 48.23659  8872.570 0.51006  0.98952 0.008978678
## 14    14 0.29922 12.98760 43.31883 16384.653 0.51592  0.98825 0.254876195
## 18    18 0.25240 13.28125 48.74771 12656.558 0.52254  0.98891 0.001782351
## 15    15 0.28853 15.37352 47.07512 12079.709 0.50452  0.98781 0.246870375
## 16    16 0.28898 18.08564 51.10137 10365.391 0.49271  0.98718 0.266860265
## 17    17 0.27844 18.59912 52.87057 11228.549 0.51302  0.98713 0.059660051
## 10    10 0.33647 24.10757 48.43318 10779.390 0.48008  0.98393 0.411222122
## 12    12 0.34265 24.68572 52.11534 19408.016 0.51678  0.98404 0.370606458
## 11    11 0.33724 26.25501 52.15708 13097.266 0.49613  0.98321 0.454299817
## 5      5 0.36542 27.02479 42.78712  6410.480 0.48192  0.98352 0.189196553
## 13    13 0.36574 27.07270 55.97930 25489.522 0.52237  0.98360 0.412694470
## 3      3 0.38312 27.10177 39.14577  5879.350 0.48128  0.98290 0.111471296
## 4      4 0.38276 27.96420 41.88709  7635.896 0.48443  0.98283 0.173995415
## 6      6 0.36302 29.64290 47.20409  4742.320 0.47166  0.98307 0.197590728
## 9      9 0.36482 30.09649 52.79833  8867.244 0.48588  0.98222 0.763000495
## 7      7 0.37392 30.57737 49.89563  5848.238 0.47755  0.98314 0.277645440
## 8      8 0.38196 31.33341 52.36567  8039.153 0.49778  0.98265 0.417315437
## 2      2 0.41082 47.04815 57.17490  5489.740 0.47425  0.97492 0.087865341
## 1      1 0.48746 91.76306 99.93527  5751.269 0.38586  0.94411 0.162422786

Berdasarkan hasil di atas, lag optimum yang dipilih adalah lag ke-20 dengan nilai AIC terendah yaitu 7.378. Hal ini menunjukkan bahwa model Distributed Lag dengan lag ke-20 memiliki kecocokan terbaik dengan data pelatihan dibandingkan dengan model dengan lag lainnya dalam rentang yang diuji (1 hingga 20). Selanjutnya akan dilakukan permodelan untuk lag ke 20

#model dlm dengan lag optimum
model.dlm2 <- dlm(x = train$Xt,y = train$Yt , q = 20)
summary(model.dlm2)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30810 -0.09114  0.00787  0.09984  0.32321 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.63320    1.73395  -0.942  0.36025    
## x.t          0.45147    0.05794   7.792 7.79e-07 ***
## x.1          0.13360    0.12926   1.034  0.31668    
## x.2         -0.09471    0.14036  -0.675  0.50948    
## x.3         -0.10610    0.14071  -0.754  0.46179    
## x.4          0.02698    0.14027   0.192  0.84988    
## x.5          0.11985    0.14221   0.843  0.41179    
## x.6         -0.10800    0.14088  -0.767  0.45446    
## x.7          0.24917    0.14003   1.779  0.09418 .  
## x.8         -0.49515    0.13971  -3.544  0.00270 ** 
## x.9          0.42529    0.14468   2.940  0.00962 ** 
## x.10         0.09799    0.14124   0.694  0.49775    
## x.11        -0.42495    0.14292  -2.973  0.00897 ** 
## x.12         0.30033    0.12577   2.388  0.02963 *  
## x.13        -0.24379    0.11608  -2.100  0.05192 .  
## x.14         0.21499    0.11829   1.818  0.08791 .  
## x.15        -0.08719    0.11930  -0.731  0.47544    
## x.16         0.03590    0.12092   0.297  0.77039    
## x.17        -0.06223    0.11973  -0.520  0.61033    
## x.18         0.17404    0.11688   1.489  0.15590    
## x.19        -0.15534    0.09706  -1.600  0.12908    
## x.20         0.03578    0.04547   0.787  0.44279    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2243 on 16 degrees of freedom
## Multiple R-squared:  0.996,  Adjusted R-squared:  0.9907 
## F-statistic:   189 on 21 and 16 DF,  p-value: 2.141e-15
## 
## AIC and BIC values for the model:
##        AIC      BIC
## 1 7.378756 45.04324
AIC(model.dlm2)
## [1] 7.378756
BIC(model.dlm2)
## [1] 45.04324

Dari hasil model Distributed Lag dengan lag optimum (lag ke-20) di atas, dapat dilihat bahwa koefisien regresi untuk variabel Xt (O3) pada lag 0 hingga lag 20 semuanya positif dan signifikan, serta nilai P-Value dari intercept \(x_{t-1}<0.05\) dan \(x_{t-1}\) berpengaruh signifikan terhadap AQI. Nilai AIC dan BIC yang relatif rendah menunjukkan bahwa model ini memiliki kecocokan yang baik dengan data pelatihan.

Peramalan dan Akurasi

#peramalan dan akurasi
fore.dlm2 <- forecast(model = model.dlm2, x=test$Xt, h=14)

#akurasi data test
mape.dlm2<- MAPE(fore.dlm2$forecasts, test$Yt)
mape.dlm2
## [1] 0.02298732
#akurasi data training
GoF(model.dlm2)
##             n       MAE           MPE       MAPE       sMAPE      MASE
## model.dlm2 38 0.1175398 -2.864864e-05 0.00431907 0.004318774 0.1976806
##                   MSE      MRAE    GMRAE
## model.dlm2 0.02119001 538582405 5794.456

Model diatas merupakan model yang sangat baik karena nilai MAPE kuranng dari 10%.

Model Autoregressive

model.ardl <- ardlDlm(x = train$Xt, y = train$Yt, p = 1 , q = 1)
summary(model.ardl)
## 
## Time series regression with "ts" data:
## Start = 2, End = 58
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0233 -0.2713  0.1217  0.2984  0.8689 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.24530    0.66489  -0.369    0.714    
## X.t          0.48660    0.03236  15.036  < 2e-16 ***
## X.1         -0.23184    0.04540  -5.106 4.56e-06 ***
## Y.1          0.45632    0.06613   6.901 6.51e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3797 on 53 degrees of freedom
## Multiple R-squared:  0.9716, Adjusted R-squared:   0.97 
## F-statistic: 604.7 on 3 and 53 DF,  p-value: < 2.2e-16
AIC(model.ardl)
## [1] 57.2222
BIC(model.ardl)
## [1] 67.43746
model.ardl <- ardlDlm(formula = Yt ~ Xt, 
                         data = train,p = 1 , q = 1)
summary(model.ardl)
## 
## Time series regression with "ts" data:
## Start = 2, End = 58
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0233 -0.2713  0.1217  0.2984  0.8689 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.24530    0.66489  -0.369    0.714    
## Xt.t         0.48660    0.03236  15.036  < 2e-16 ***
## Xt.1        -0.23184    0.04540  -5.106 4.56e-06 ***
## Yt.1         0.45632    0.06613   6.901 6.51e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3797 on 53 degrees of freedom
## Multiple R-squared:  0.9716, Adjusted R-squared:   0.97 
## F-statistic: 604.7 on 3 and 53 DF,  p-value: < 2.2e-16
AIC(model.ardl)
## [1] 57.2222
BIC(model.ardl)
## [1] 67.43746

Hasil di atas menunjukkan bahwa selain peubah \(x_{t-1}\), hasil uji t menunjukkan nilai-p pada peubah \(\ge0.05\) Hal ini menunjukkan bahwa peubah \(x_{t-1}\) berpengaruh signifikan terhadap \(y_t\), sementara \(x_t\) dan \(y_{t-1}\) berpengaruh signifikan terhadap \(y_t\).”

Peramalan dan Akurasi

fore.ardl <- forecast(model = model.ardl, x=test$Xt, h=14)
fore.ardl
## $forecasts
##  [1] 21.79557 20.69111 20.07141 19.07614 18.94543 19.83881 20.87605 20.85189
##  [9] 20.84087 21.70595 24.12246 26.15266 27.47632 28.11312
## 
## $call
## forecast.ardlDlm(model = model.ardl, x = test$Xt, h = 14)
## 
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"

Data di atas merupakan hasil peramalan untuk 14 periode kedepan dari model autoregressive dengan nilai \(p=1\) dan \(q=1\).

mape.ardl <- MAPE(fore.ardl$forecasts, test$Yt)
mape.ardl
## [1] 0.00779035
#akurasi data training
GoF(model.ardl)
##             n       MAE           MPE       MAPE      sMAPE     MASE       MSE
## model.ardl 57 0.3138843 -0.0001781471 0.01165753 0.01164777 0.472514 0.1340668
##                  MRAE    GMRAE
## model.ardl 1108253474 6593.929

Berdasarkan akurasi di atas, terlihat bahwa nilai MAPE keduanya tidak jauh berbeda. Artinya, model regresi dengan distribusi lag ini tidakoverfitted atau underfitted

Lag Optimum

#penentuan lag optimum
model.ardl.opt <- ardlBoundOrders(data = data.frame(data), ic = "AIC", 
                                  formula = Yt ~ Xt )
model.ardl.opt
## $p
##   Xt
## 1 13
## 
## $q
## [1] 2
## 
## $Stat.table
##           q = 1    q = 2    q = 3    q = 4    q = 5    q = 6    q = 7    q = 8
## p = 1  38.64746 23.22848 25.42302 27.72031 28.70926 26.39158 27.98723 30.39158
## p = 2  22.86594 24.77455 27.31646 28.69429 28.81115 25.64963 27.33061 30.08845
## p = 3  27.18085 27.18085 28.84077 30.37243 30.71504 25.55412 27.74318 30.55048
## p = 4  27.27918 28.33392 28.33392 27.22651 29.94853 27.52594 29.31474 32.19458
## p = 5  28.22371 28.68011 29.37749 29.37749 30.12962 28.60210 31.06991 33.54724
## p = 6  27.11336 28.13965 27.00217 28.68077 28.68077 30.47844 33.06989 35.19285
## p = 7  30.19026 30.92504 29.24074 30.23773 30.81010 30.81010 29.56882 27.97012
## p = 8  26.81548 26.88306 26.96639 27.84677 26.80343 28.40155 28.40155 29.27146
## p = 9  25.15641 25.64777 25.75789 27.48094 26.12645 27.83358 29.58776 29.58776
## p = 10 28.59615 28.86775 29.29778 31.09265 28.82957 30.63888 32.57903 34.34994
## p = 11 24.53224 23.37847 24.80335 26.48648 23.35632 25.20746 26.60810 26.81938
## p = 12 27.14281 25.70395 27.06234 28.84787 26.03408 27.92602 28.91965 28.33033
## p = 13 19.25858 16.55044 18.53543 20.33824 17.61125 19.60705 21.03414 21.71506
## p = 14 20.31747 18.36045 20.36004 22.34912 19.16521 21.06710 22.41853 23.18530
## p = 15 22.41646 20.32505 22.22270 24.14701 21.64522 23.25026 24.97916 26.17790
##           q = 9   q = 10   q = 11   q = 12   q = 13   q = 14   q = 15
## p = 1  33.09252 33.18190 32.84415 33.19542 35.85663 30.71580 27.67083
## p = 2  32.80513 33.98671 32.34936 32.42167 35.04537 31.25200 27.25097
## p = 3  33.31925 34.28268 33.42041 33.94715 36.44309 32.48438 28.87461
## p = 4  34.95803 36.14844 35.27999 35.94335 38.39874 34.28870 30.56428
## p = 5  36.11311 37.41170 36.41377 37.18234 39.82841 36.17273 32.18350
## p = 6  35.88842 36.25737 35.13490 36.94569 39.67535 32.69633 29.29501
## p = 7  30.90878 33.89102 34.52801 36.37243 38.74818 31.23844 29.15791
## p = 8  31.44233 34.39275 26.01225 26.78247 27.92580 24.10080 23.75594
## p = 9  31.57509 33.98987 27.47278 26.92081 26.62574 21.91896 21.71187
## p = 10 34.34994 33.75383 24.46154 25.13892 26.98653 23.27262 23.65748
## p = 11 23.68447 23.68447 24.90968 27.13805 28.98636 25.18996 25.61717
## p = 12 25.59091 27.59091 27.59091 29.08999 29.84364 23.51211 22.55179
## p = 13 19.56880 19.27526 21.04212 21.04212 22.98266 23.87236 23.91841
## p = 14 22.81463 22.84285 24.84274 22.27955 22.27955 24.27951 25.89778
## p = 15 25.36264 24.86236 26.81668 26.73848 27.78117 27.78117 27.75060
## 
## $min.Stat
## [1] 16.55044
min_p=c()
for(i in 1:6){
  min_p[i]=min(model.ardl.opt$Stat.table[[i]])
}
q_opt=which(min_p==min(min_p, na.rm = TRUE))
p_opt=which(model.ardl.opt$Stat.table[[q_opt]] == 
              min(model.ardl.opt$Stat.table[[q_opt]], na.rm = TRUE))
data.frame("q_optimum" = q_opt, "p_optimum" = p_opt, 
           "AIC"=model.ardl.opt$min.Stat)
##   q_optimum p_optimum      AIC
## 1         2        13 16.55044
model.ardl2 <- ardlDlm(formula = Yt ~ Xt, 
                         data = train,p = 13, q = 2)
summary(model.ardl2)
## 
## Time series regression with "ts" data:
## Start = 14, End = 58
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41430 -0.19394  0.02836  0.16087  0.47584 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.04456    1.06711  -0.979   0.3360    
## Xt.t         0.40658    0.06938   5.860 2.67e-06 ***
## Xt.1         0.20835    0.18174   1.146   0.2613    
## Xt.2        -0.09590    0.20134  -0.476   0.6376    
## Xt.3        -0.13900    0.15910  -0.874   0.3897    
## Xt.4         0.07385    0.15906   0.464   0.6460    
## Xt.5         0.05911    0.14382   0.411   0.6842    
## Xt.6        -0.15248    0.13442  -1.134   0.2663    
## Xt.7         0.25536    0.13453   1.898   0.0680 .  
## Xt.8        -0.28691    0.14185  -2.023   0.0528 .  
## Xt.9         0.12287    0.15600   0.788   0.4375    
## Xt.10        0.13887    0.15975   0.869   0.3921    
## Xt.11       -0.20179    0.14670  -1.376   0.1799    
## Xt.12        0.05814    0.12013   0.484   0.6322    
## Xt.13        0.01800    0.05426   0.332   0.7426    
## Yt.1        -0.11385    0.19128  -0.595   0.5565    
## Yt.2         0.14225    0.18763   0.758   0.4547    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2851 on 28 degrees of freedom
## Multiple R-squared:  0.9892, Adjusted R-squared:  0.9831 
## F-statistic: 160.6 on 16 and 28 DF,  p-value: < 2.2e-16

Dari tabel di atas, dapat terlihat bahwa nilai AIC terendah didapat ketika \(p=13\) dan \(q=2\), yaitu sebesar 16,5504. Artinya, model autoregressive optimum didapat ketika \(p=13\) dan \(q=2\).

Selanjutnya dapat dilakukan pemodelan dengan nilai \(p\) dan \(q\) optimum seperti inisialisasi di langkah sebelumnya.”

model.ardl2 <- ardlDlm(formula = Yt ~ Xt, 
                         data = train,p = 13 , q = 2)
summary(model.ardl2)
## 
## Time series regression with "ts" data:
## Start = 14, End = 58
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41430 -0.19394  0.02836  0.16087  0.47584 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.04456    1.06711  -0.979   0.3360    
## Xt.t         0.40658    0.06938   5.860 2.67e-06 ***
## Xt.1         0.20835    0.18174   1.146   0.2613    
## Xt.2        -0.09590    0.20134  -0.476   0.6376    
## Xt.3        -0.13900    0.15910  -0.874   0.3897    
## Xt.4         0.07385    0.15906   0.464   0.6460    
## Xt.5         0.05911    0.14382   0.411   0.6842    
## Xt.6        -0.15248    0.13442  -1.134   0.2663    
## Xt.7         0.25536    0.13453   1.898   0.0680 .  
## Xt.8        -0.28691    0.14185  -2.023   0.0528 .  
## Xt.9         0.12287    0.15600   0.788   0.4375    
## Xt.10        0.13887    0.15975   0.869   0.3921    
## Xt.11       -0.20179    0.14670  -1.376   0.1799    
## Xt.12        0.05814    0.12013   0.484   0.6322    
## Xt.13        0.01800    0.05426   0.332   0.7426    
## Yt.1        -0.11385    0.19128  -0.595   0.5565    
## Yt.2         0.14225    0.18763   0.758   0.4547    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2851 on 28 degrees of freedom
## Multiple R-squared:  0.9892, Adjusted R-squared:  0.9831 
## F-statistic: 160.6 on 16 and 28 DF,  p-value: < 2.2e-16
AIC(model.ardl2)
## [1] 29.42385
BIC(model.ardl2)
## [1] 61.94377

Hasil di atas menunjukkan bahwa selain peubah \(x_{t-1}\), hasil uji t menunjukkan nilai-p pada peubah \(\ge0.05\) Hal ini menunjukkan bahwa peubah \(x_{t-1}\) berpengaruh signifikan terhadap \(y_t\), sementara \(x_t\) dan \(y_{t-1}\) berpengaruh signifikan terhadap \(y_t\).”

fore.ardl2 <- forecast(model = model.ardl2, x=test$Xt, h=14)
fore.ardl2
## $forecasts
##  [1] 21.77229 20.52108 20.02939 19.14999 18.73570 19.45331 20.86257 21.11623
##  [9] 20.47743 21.43916 23.65285 26.32716 27.71046 27.92030
## 
## $call
## forecast.ardlDlm(model = model.ardl2, x = test$Xt, h = 14)
## 
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"

Data di atas merupakan hasil peramalan untuk 14 periode kedepan dari model autoregressive dengan nilai \(p=13\) dan \(q=2\).

mape.ardl2 <- MAPE(fore.ardl2$forecasts, test$Yt)
mape.ardl2
## [1] 0.01445912
#akurasi data training
GoF(model.ardl2)
##              n       MAE           MPE       MAPE       sMAPE      MASE
## model.ardl2 45 0.1934879 -6.779947e-05 0.00711318 0.007110808 0.3547279
##                    MSE      MRAE    GMRAE
## model.ardl2 0.05058937 932166881 19290.28

Berdasarkan akurasi di atas, terlihat bahwa nilai MAPE keduanya tidak jauh berbeda. Artinya, model regresi dengan distribusi lag ini tidakoverfitted atau underfitted

Pemodelan DLM & ARDL dengan Library dynlm

#sama dengan model dlm q=1
cons_lm1 <- dynlm(Yt ~ Xt+L(Xt),data = train.ts)
#sama dengan model ardl p=1 q=0
cons_lm2 <- dynlm(Yt ~ Xt+L(Yt),data = train.ts)
#sama dengan ardl p=1 q=1
cons_lm3 <- dynlm(Yt ~ Xt+L(Xt)+L(Yt),data = train.ts)
#sama dengan dlm p=2
cons_lm4 <- dynlm(Yt ~ Xt+L(Xt)+L(Xt,2),data = train.ts)

Ringkasan Model

summary(cons_lm1)
## 
## Time series regression with "ts" data:
## Start = 2, End = 58
## 
## Call:
## dynlm(formula = Yt ~ Xt + L(Xt), data = train.ts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48930 -0.40406 -0.07391  0.21622  2.81151 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.31531    0.90081   0.350    0.728    
## Xt           0.47373    0.04410  10.741 5.12e-15 ***
## L(Xt)       -0.01702    0.04511  -0.377    0.707    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5183 on 54 degrees of freedom
## Multiple R-squared:  0.9461, Adjusted R-squared:  0.9441 
## F-statistic:   474 on 2 and 54 DF,  p-value: < 2.2e-16
summary(cons_lm2)
## 
## Time series regression with "ts" data:
## Start = 2, End = 58
## 
## Call:
## dynlm(formula = Yt ~ Xt + L(Yt), data = train.ts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76190 -0.29995 -0.04978  0.23740  1.79589 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.54580    0.80143  -0.681 0.498762    
## Xt           0.36702    0.02703  13.579  < 2e-16 ***
## L(Yt)        0.22481    0.05825   3.859 0.000306 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4595 on 54 degrees of freedom
## Multiple R-squared:  0.9576, Adjusted R-squared:  0.9561 
## F-statistic: 610.5 on 2 and 54 DF,  p-value: < 2.2e-16
summary(cons_lm3)
## 
## Time series regression with "ts" data:
## Start = 2, End = 58
## 
## Call:
## dynlm(formula = Yt ~ Xt + L(Xt) + L(Yt), data = train.ts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0233 -0.2713  0.1217  0.2984  0.8689 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.24530    0.66489  -0.369    0.714    
## Xt           0.48660    0.03236  15.036  < 2e-16 ***
## L(Xt)       -0.23184    0.04540  -5.106 4.56e-06 ***
## L(Yt)        0.45632    0.06613   6.901 6.51e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3797 on 53 degrees of freedom
## Multiple R-squared:  0.9716, Adjusted R-squared:   0.97 
## F-statistic: 604.7 on 3 and 53 DF,  p-value: < 2.2e-16
summary(cons_lm4)
## 
## Time series regression with "ts" data:
## Start = 3, End = 58
## 
## Call:
## dynlm(formula = Yt ~ Xt + L(Xt) + L(Xt, 2), data = train.ts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48041 -0.34171 -0.01844  0.25829  1.41892 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.170522   0.676995  -0.252    0.802    
## Xt           0.487583   0.052341   9.315 1.13e-12 ***
## L(Xt)       -0.017540   0.098118  -0.179    0.859    
## L(Xt, 2)    -0.005899   0.055262  -0.107    0.915    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3496 on 52 degrees of freedom
## Multiple R-squared:  0.9763, Adjusted R-squared:  0.9749 
## F-statistic: 713.6 on 3 and 52 DF,  p-value: < 2.2e-16

SSE

deviance(cons_lm1)
## [1] 14.50802
deviance(cons_lm2)
## [1] 11.40143
deviance(cons_lm3)
## [1] 7.641805
deviance(cons_lm4)
## [1] 6.353772

Uji Diagnostik

#uji model
if(require("lmtest")) encomptest(cons_lm1, cons_lm2)
## Encompassing test
## 
## Model 1: Yt ~ Xt + L(Xt)
## Model 2: Yt ~ Xt + L(Yt)
## Model E: Yt ~ Xt + L(Xt) + L(Yt)
##           Res.Df Df      F    Pr(>F)    
## M1 vs. ME     53 -1 47.621 6.514e-09 ***
## M2 vs. ME     53 -1 26.075 4.562e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Autokorelasi

#durbin watson
dwtest(cons_lm1)
## 
##  Durbin-Watson test
## 
## data:  cons_lm1
## DW = 1.0808, p-value = 4.224e-05
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm2)
## 
##  Durbin-Watson test
## 
## data:  cons_lm2
## DW = 1.5997, p-value = 0.03841
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm3)
## 
##  Durbin-Watson test
## 
## data:  cons_lm3
## DW = 2.7648, p-value = 0.9961
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm4)
## 
##  Durbin-Watson test
## 
## data:  cons_lm4
## DW = 2.1132, p-value = 0.5554
## alternative hypothesis: true autocorrelation is greater than 0

Heterogenitas

bptest(cons_lm1)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm1
## BP = 1.0373, df = 2, p-value = 0.5953
bptest(cons_lm2)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm2
## BP = 23.613, df = 2, p-value = 7.456e-06
bptest(cons_lm3)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm3
## BP = 13.789, df = 3, p-value = 0.003207
bptest(cons_lm4)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm4
## BP = 0.94346, df = 3, p-value = 0.8149

Kenormalan

shapiro.test(residuals(cons_lm1))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm1)
## W = 0.68131, p-value = 7.651e-10
shapiro.test(residuals(cons_lm2))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm2)
## W = 0.93215, p-value = 0.003296
shapiro.test(residuals(cons_lm3))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm3)
## W = 0.96815, p-value = 0.1375
shapiro.test(residuals(cons_lm4))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm4)
## W = 0.85348, p-value = 7.243e-06

Perbandingan Model

akurasi <- matrix(c(mape.koyck, mape.dlm, mape.dlm2, mape.ardl))
row.names(akurasi)<- c("Koyck","DLM 1","DLM 2","Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi
##                       MAPE
## Koyck          0.029095435
## DLM 1          0.006409836
## DLM 2          0.022987324
## Autoregressive 0.007790350

Berdasarkan nilai MAPE, model paling optimum didapat pada Model DLM 1 karena memiliki nilai MAPE yang terkecil.”

Plot

par(mfrow=c(1,1))
plot(test$Xt, test$Yt, type="b", col="black", ylim=c(20,30))
points(test$Xt, fore.koyck$forecasts,col="red")
lines(test$Xt, fore.koyck$forecasts,col="red")
points(test$Xt, fore.dlm$forecasts,col="blue")
lines(test$Xt, fore.dlm$forecasts,col="blue")
points(test$Xt, fore.dlm2$forecasts,col="orange")
lines(test$Xt, fore.dlm2$forecasts,col="orange")
points(test$Xt, fore.ardl$forecasts,col="green")
lines(test$Xt, fore.ardl2$forecasts,col="green")
legend("topleft",c("aktual", "koyck","DLM 1","DLM 2", "autoregressive"), lty=1, col=c("black","red","blue","orange","green"), cex=0.8)

Berdasarkan plot tersebut, terlihat bahwa plot yang paling mendekati data aktualnya adalah Model DLM 1, sehingga dapat disimpulkan model terbaik dalam hal ini adalah model regresi DLM 1”