## Warning: package 'dLagM' was built under R version 4.3.1
## Loading required package: nardl
## Warning: package 'nardl' was built under R version 4.3.1
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: dynlm
## Warning: package 'dynlm' was built under R version 4.3.1
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Warning: package 'MLmetrics' was built under R version 4.3.1
##
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:dLagM':
##
## MAPE
## The following object is masked from 'package:base':
##
## Recall
## Loading required package: carData
## Warning: package 'rio' was built under R version 4.3.1
data <- import("https://raw.githubusercontent.com/divanm/mpdw/main/pertemuan%203/newdelhip3.csv")
str(data)
## 'data.frame': 72 obs. of 2 variables:
## $ Yt: num 30.2 28.2 26.6 25 26 26 26 24 23 24 ...
## $ Xt: num 199 198 199 202 205 ...
data
## Yt Xt
## 1 30.2 198.6027
## 2 28.2 197.6013
## 3 26.6 198.6027
## 4 25.0 201.9405
## 5 26.0 205.2784
## 6 26.0 208.6163
## 7 26.0 208.6163
## 8 24.0 205.2784
## 9 23.0 200.2716
## 10 24.0 196.9338
## 11 25.0 198.6027
## 12 26.0 200.2716
## 13 27.0 203.6095
## 14 29.0 205.2784
## 15 29.0 205.2784
## 16 29.0 205.2784
## 17 29.0 203.6095
## 18 28.0 201.9405
## 19 27.0 200.2716
## 20 27.0 201.9405
## 21 27.0 201.9405
## 22 27.0 201.9405
## 23 27.0 200.2716
## 24 27.0 200.2716
## 25 27.0 200.2716
## 26 28.0 201.9405
## 27 29.0 203.6095
## 28 31.0 205.2784
## 29 31.0 206.9473
## 30 32.0 206.9473
## 31 32.0 205.2784
## 32 31.0 206.9473
## 33 31.0 206.9473
## 34 30.0 208.6163
## 35 30.0 206.9473
## 36 29.0 205.2784
## 37 27.0 203.6095
## 38 26.0 200.2716
## 39 25.0 200.2716
## 40 25.0 198.6027
## 41 24.0 196.9338
## 42 24.0 195.2648
## 43 25.0 195.2648
## 44 25.0 195.2648
## 45 25.0 196.9338
## 46 26.0 198.6027
## 47 26.0 198.6027
## 48 27.0 198.6027
## 49 27.0 198.6027
## 50 27.0 198.6027
## 51 27.0 200.2716
## 52 28.0 200.2716
## 53 28.0 200.2716
## 54 27.0 200.2716
## 55 27.0 198.6027
## 56 26.0 198.6027
## 57 25.0 198.6027
## 58 23.0 198.6027
## 59 22.0 196.9338
## 60 21.0 193.5959
## 61 20.0 193.5959
## 62 19.0 191.9270
## 63 19.0 191.9270
## 64 20.0 191.9270
## 65 21.0 191.9270
## 66 21.0 191.9270
## 67 21.0 191.9270
## 68 22.0 193.5959
## 69 24.0 195.2648
## 70 26.0 196.9338
## 71 27.0 198.6027
## 72 28.0 198.6027
Membagi 80% data menjadi data train dan 20% data menjadi data test
#SPLIT DATA
train<-data[1:57,]
test<-data[58:72,]
test
## Yt Xt
## 58 23 198.6027
## 59 22 196.9338
## 60 21 193.5959
## 61 20 193.5959
## 62 19 191.9270
## 63 19 191.9270
## 64 20 191.9270
## 65 21 191.9270
## 66 21 191.9270
## 67 21 191.9270
## 68 22 193.5959
## 69 24 195.2648
## 70 26 196.9338
## 71 27 198.6027
## 72 28 198.6027
Mengubah format data menjadi time series
#data time series
train.ts<-ts(train)
test.ts<-ts(test)
data.ts<-ts(data)
\[ y_t=a(1-\lambda)+\beta_0X_t+\beta_1Z_t+\lambda Y_{t-1}+V_t \]
dengan \[V_t=u_t-\lambda u_{t-1}\]
#MODEL KOYCK
model.koyck <- koyckDlm(x = train$Xt, y = train$Yt)
summary(model.koyck)
##
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0320 -0.6498 0.0663 0.5993 2.2743
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.8427209 9.6005572 0.296 0.768
## Y.1 0.8979076 0.0828445 10.838 4.67e-15 ***
## X.t -0.0007616 0.0549301 -0.014 0.989
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9314 on 53 degrees of freedom
## Multiple R-Squared: 0.8177, Adjusted R-squared: 0.8108
## Wald test: 118.9 on 2 and 53 DF, p-value: < 2.2e-16
##
## Diagnostic tests:
## NULL
##
## alpha beta phi
## Geometric coefficients: 27.8446 -0.0007616174 0.8979076
AIC(model.koyck)
## [1] 155.8773
BIC(model.koyck)
## [1] 163.9787
Dari hasil tersebut, didapat bahwa peubah \(y_{t-1}\) memiliki nilai \(P-Value<0.05\). Hal ini menunjukkan bahwa peubah\(y_{t-1}\) berpengaruh signifikan terhadap \(y\). Adapun model keseluruhannya adalah sebagai berikut
\[ \hat{Y_t}=2.8427209+0.8979076Y_{t-1}-0.0007616X_t \]
Berikut adalah hasil peramalan y untuk 15 periode kedepan menggunakan model koyck
fore.koyck <- forecast(model = model.koyck, x=test$Xt, h=15)
fore.koyck
## $forecasts
## [1] 25.13915 25.26537 25.38124 25.48529 25.57998 25.66500 25.74135 25.80990
## [9] 25.87145 25.92672 25.97507 26.01722 26.05379 26.08536 26.11371
##
## $call
## forecast.koyckDlm(model = model.koyck, x = test$Xt, h = 15)
##
## attr(,"class")
## [1] "forecast.koyckDlm" "dLagM"
mape.koyck <- MAPE(fore.koyck$forecasts, test$Yt)
mape.koyck
## [1] 0.1848113
#akurasi data training
mape.koyck.training<-GoF(model.koyck)["MAPE"]
# akurasi data testing
mape.koyck.testing <- MAPE(fore.koyck$forecasts, test$Yt)
mape.koyck.testing
## [1] 0.1848113
c("MAPE Testing"= mape.koyck.testing,"MAPE Training"=mape.koyck.training)
## $`MAPE Testing`
## [1] 0.1848113
##
## $`MAPE Training.MAPE`
## [1] 0.0256365
Berdasarkan akurasi di atas, terlihat bahwa nilai MAPE keduanya tidak jauh berbeda. Artinya, model regresi dengan distribusi lag ini tidak overfitted atau underfitted
#penentuan lag optimum
finiteDLMauto(formula = Yt~ Xt,
data = data.frame(train),
model.type = "dlm", error.type = "AIC", trace = FALSE)
## q - k MASE AIC BIC GMRAE MBRAE R.Adj.Sq Ljung-Box
## 10 10 0.82953 114.1996 138.2515 31256.63 0.50605 0.88174 0.0003339483
Berdasarkan output tersebut, lag optimum didapatkan ketika lag=10. Selanjutnya dilakukan pemodelan untuk lag=10
model.dlm <- dlm(x = train$Xt,y = train$Yt, q = 10)
summary(model.dlm)
##
## Call:
## lm(formula = model.formula, data = design)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.35115 -0.35642 -0.03291 0.29053 1.96454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -51.33432 9.90300 -5.184 9.20e-06 ***
## x.t 0.44298 0.09418 4.703 3.92e-05 ***
## x.1 0.21228 0.14746 1.440 0.159
## x.2 -0.13492 0.14681 -0.919 0.364
## x.3 0.05548 0.15486 0.358 0.722
## x.4 0.09490 0.15591 0.609 0.547
## x.5 -0.28676 0.15906 -1.803 0.080 .
## x.6 0.05577 0.15655 0.356 0.724
## x.7 0.06729 0.15663 0.430 0.670
## x.8 -0.14244 0.14833 -0.960 0.344
## x.9 -0.04177 0.14606 -0.286 0.777
## x.10 0.06903 0.08409 0.821 0.417
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7166 on 35 degrees of freedom
## Multiple R-squared: 0.91, Adjusted R-squared: 0.8817
## F-statistic: 32.18 on 11 and 35 DF, p-value: 4.785e-15
##
## AIC and BIC values for the model:
## AIC BIC
## 1 114.1996 138.2515
AIC(model.dlm)
## [1] 114.1996
BIC(model.dlm)
## [1] 138.2515
Dari hasil diatas, didapat bahwa \(P-value\) dari intercept,\(X_t\), dan \(X-{t-5}<0.05\). Hal ini menunjukkan bahwa intercept,\(X_t\), dan \(X-{t-5}<0.05\) berpengaruh signifikan terhadap \(y\). Adapun model keseluruhan yang terbentuk adalah sebagai berikut
\[ \hat{Y_t}=-51.33432+0.44298X_t+0.21228X_{t-1}-0.13492X_{t-2}+0.05548X_{t-3}+0.09490X_{t-4}-0.28676X_{t-5}+0.05577X_{t-6}+0.06729X_{t-7}-0.14244X_{t-8} -0.04177X_{t-9}+0.06903X_{t-10} \]
Berikut merupakan hasil peramalan \(y\) untuk 15 periode kedepan
fore.dlm <- forecast(model = model.dlm, x=test$Yt, h=15)
fore.dlm
## $forecasts
## [1] -51.41792 -89.53392 -66.08825 -76.32966 -93.68292 -43.31686 -52.32573
## [8] -63.52062 -38.36148 -30.79943 -42.26667 -41.31238 -40.25259 -39.50361
## [15] -39.05650
##
## $call
## forecast.dlm(model = model.dlm, x = test$Yt, h = 15)
##
## attr(,"class")
## [1] "forecast.dlm" "dLagM"
mape.dlm <- MAPE(fore.dlm$forecasts, test$Xt)
akurasinya
#akurasi data testing
mape.dlm<- MAPE(fore.dlm$forecasts, test$Yt)
#akurasi data training
mape.dlm.train = GoF(model.dlm)["MAPE"]
c("MAPE Testing"=mape.dlm,"MAPE Training"=mape.dlm.train)
## $`MAPE Testing`
## [1] 3.4975
##
## $`MAPE Training.MAPE`
## [1] 0.01694742
Model tersebut merupakan model yang sangat baik dengan nilai MAPE yang Berdasarkan akurasi di atas, terlihat bahwa nilai MAPE keduanya tidak jauh berbeda. Artinya, model regresi dengan distribusi lag ini tidak overfitted atau underfitted
#penentuan lag optimum
model.ardl.opt <- ardlBoundOrders(data = data.frame(data), ic = "AIC",
formula = Yt ~ Xt )
min_p=c()
for(i in 1:15){
min_p[i]=min(model.ardl.opt$Stat.table[[i]])
}
q_opt=which(min_p==min(min_p, na.rm = TRUE))
p_opt=which(model.ardl.opt$Stat.table[[q_opt]] ==
min(model.ardl.opt$Stat.table[[q_opt]], na.rm = TRUE))
data.frame("q_optimum" = q_opt, "p_optimum" = p_opt,
"AIC"=model.ardl.opt$min.Stat)
## q_optimum p_optimum AIC
## 1 6 15 121.7055
Dari tabel di atas, dapat terlihat bahwa nilai AIC terendah didapat
ketika \(p=15\) dan \(q=6\), yaitu sebesar 121.7055.
Artinya, model autoregressive optimum didapat ketika \(p=15\) dan \(q=6\).
model.ardl <- ardlDlm(x = train$Xt, y = train$Yt, p =15 , q = 6)
summary(model.ardl)
##
## Time series regression with "ts" data:
## Start = 16, End = 57
##
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71571 -0.29034 -0.00811 0.26973 0.99533
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -31.48686 23.68714 -1.329 0.19950
## X.t 0.16013 0.11731 1.365 0.18820
## X.1 0.16148 0.14829 1.089 0.28980
## X.2 -0.22167 0.14169 -1.564 0.13421
## X.3 0.04328 0.15719 0.275 0.78603
## X.4 0.05855 0.15933 0.367 0.71735
## X.5 -0.23618 0.15895 -1.486 0.15371
## X.6 0.33729 0.17283 1.952 0.06588 .
## X.7 0.02778 0.19314 0.144 0.88716
## X.8 -0.18545 0.18227 -1.017 0.32173
## X.9 0.07553 0.18743 0.403 0.69146
## X.10 0.07361 0.19528 0.377 0.71041
## X.11 -0.05438 0.17383 -0.313 0.75780
## X.12 -0.07336 0.15529 -0.472 0.64203
## X.13 0.02964 0.14166 0.209 0.83647
## X.14 0.10040 0.13625 0.737 0.47022
## X.15 -0.07181 0.08273 -0.868 0.39626
## Y.1 0.79038 0.25414 3.110 0.00577 **
## Y.2 0.17418 0.30843 0.565 0.57888
## Y.3 -0.03007 0.29864 -0.101 0.92086
## Y.4 -0.19852 0.31726 -0.626 0.53894
## Y.5 -0.18070 0.32944 -0.549 0.58973
## Y.6 -0.05696 0.23148 -0.246 0.80826
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5964 on 19 degrees of freedom
## Multiple R-squared: 0.9638, Adjusted R-squared: 0.9218
## F-statistic: 22.97 on 22 and 19 DF, p-value: 1.934e-09
AIC(model.ardl)
## [1] 90.45717
BIC(model.ardl)
## [1] 132.1612
Hasil di atas menunjukkan bahwa selain peubah \(X_{t-6}\) dan \(Y_{t-1}\), hasil uji t menunjukkan nilai-p pada peubah \(\ > 0.05\) Hal ini menunjukkan bahwa peubah \(X_{t-6}\) dan \(Y_{t-1}\) berpengaruh signifikan terhadap \(Y_t\), sementara peubah lain tidak berpengaruh signifikan terhadap \(Y_t\).
\[\hat{Y_t}=-31.48686 +0.16013X_t+0.16148X_{t-1}-0.22167X_{t-2}+0.04328X_{t-3}+0.05855X_{t-4}-0.23618X_{t-5}+0.3372X_{t-6}+0.02778 X_{t-7}-0.18545X_{t-8}+0.07553X_{t-9}+0.07361X_{t-10}-0.05438X_{t-11}-0.07336X_{t-12}+0.02964X_{t-13}+0.10040X_{t-14}-0.07181X_{t-15}+0.79038Y_{t-1}+0.17418Y_{t-2}-0.03007Y_{t-3}-0.19852Y_{t-4}-0.18070Y_{t-5}-0.05696Y_{t-6}\]
fore.ardl <- forecast(model = model.ardl, x=test$Xt, h=15)
fore.ardl
## $forecasts
## [1] 24.79388 24.21014 23.76236 22.96324 22.81308 22.45418 22.86662 23.60531
## [9] 23.17252 23.82399 24.38065 24.69424 25.38962 26.30082 27.07115
##
## $call
## forecast.ardlDlm(model = model.ardl, x = test$Xt, h = 15)
##
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"
Data di atas merupakan hasil peramalan untuk 15 periode ke depan menggunakan Model Autoregressive
mape.ardl <- MAPE(fore.ardl$forecasts, test$Yt)
mape.ardl.train <- GoF(model.ardl)["MAPE"]
c("MAPE Testing"=mape.ardl,"MAPE Training"=mape.ardl.train)
## $`MAPE Testing`
## [1] 0.1043768
##
## $`MAPE Training.MAPE`
## [1] 0.01193032
Berdasarkan akurasi di atas, terlihat bahwa nilai MAPE keduanya tidak jauh berbeda. Artinya, model regresi dengan distribusi lag ini tidak overfitted atau underfitted
akurasi <- matrix(c(mape.koyck, mape.dlm, mape.ardl))
row.names(akurasi)<- c("Koyck","DLM","Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi
## MAPE
## Koyck 0.1848113
## DLM 3.4975004
## Autoregressive 0.1043768
Berdasarkan nilai MAPE, model paling optimum didapat pada Model Autoregressive karena memiliki nilai MAPE yang terkecil.
par(mfrow=c(1,1))
plot(test$Xt, test$Yt, type="b", col="black")
points(test$Xt, fore.koyck$forecasts,col="red")
lines(test$Xt, fore.koyck$forecasts,col="red")
points(test$Xt, fore.dlm$forecasts,col="blue")
lines(test$Xt, fore.dlm$forecasts,col="blue")
points(test$Xt, fore.ardl$forecasts,col="green")
lines(test$Xt, fore.ardl$forecasts,col="green")
legend("topleft",c("aktual", "koyck","DLM", "autoregressive"), lty=1, col=c("black","red","blue","green"), cex=0.8)
Berdasarkan plot tersebut, terlihat bahwa plot yang paling mendekati data aktualnya adalah Model autoregressive, sehingga dapat disimpulkan model terbaik dalam hal ini adalah model regresi autoregressive
Dari ketiga model yang dicobakan terhadap pengaruh kadar \(CO\) terhadap \(AQI\) di kota New Delhi, diperoleh kesimpulan bahwa Model Autoregressive Distributed Lag (ARDL) adalah yang paling baik dalam peramalan data tersebut.