Data and Preparations

Package

library(dLagM)
library(dynlm) 
library(MLmetrics)
library(lmtest)
library(car)
library(readxl)

Data

Data yang digunakan adalah data trend flu yang bersumber dari kaggle.com atau bisa di akses pada link https://www.kaggle.com/code/ryanholbrook/exercise-linear-regression-with-time-series/mpdw_flue.

Data ini diambil per-mingguan dari Tahun 2009-2016, dengan jumlah amatan sebanyak 366 dan variable yang digunakan yaitu mpdw_flue(t), BodyTemperature (Yt), Bronchitis (Xt)

data <- mpdw_flue <- read.csv2("C://Users//asus//OneDrive//Documents//MK Semester 5//Metode Peramalan Deret Waktu//mpdw_flue.csv",sep=",")
str(data)
## 'data.frame':    366 obs. of  3 variables:
##  $ t : chr  "2009-06-29/2009-07-05" "2009-07-06/2009-07-12" "2009-07-13/2009-07-19" "2009-07-20/2009-07-26" ...
##  $ Xt: int  43 40 45 40 44 39 41 43 52 59 ...
##  $ Yt: int  22 21 20 19 19 18 17 20 22 25 ...
head(data)
##                       t Xt Yt
## 1 2009-06-29/2009-07-05 43 22
## 2 2009-07-06/2009-07-12 40 21
## 3 2009-07-13/2009-07-19 45 20
## 4 2009-07-20/2009-07-26 40 19
## 5 2009-07-27/2009-08-02 44 19
## 6 2009-08-03/2009-08-09 39 18
t<-data$t
Xt<-data$Xt
Yt<-data$Yt

datareg1<-cbind(t, Xt, Yt)
datareg <- as.data.frame(datareg1)

Tujuan Pengamatan

Pengamatan dilakukan untuk memprediksi pergerakan BodyTemperature (Yt) yang dipengaruhi oleh Bronchitis (Xt)

Splitting Data

Dilakukan splitting data dengan data train dengan jumlah amatan 80% dari jumlah seluruh amatan yaitu 292 dan data test yaitu 20% dari jumlah seluruh amatan yaitu 76 amatan.

train<-data[1:290,]
test<-data[291:366,]
# Data time series
train.ts<-ts(train)
test.ts<-ts(test)
data.ts<-ts(data)

Eksplorasi Data

Plot Time Series

Plot time series untuk BodyTemperature (Yt)

data.ts1<-ts(Yt)
plot(data.ts1, main = "Time Series Plot of Trends flu BodyTemperature", xlab = "Period", ylab="Celcius")
points(data.ts1)

Berdasarkan plot diatas, terlihat bahwa peubah BodyTemperature memiliki pola data yang stasioner. Peubah BodyTemperature ini akan dijadikan peubah respon untuk analisis regresi.

Korelasi antar Peubah

cor(Xt,Yt)
## [1] 0.7701919

Peubah Xt dan Yt memiliki korelasi yang cukup kuat yang bersifat liniear positif

Scatter Plot antara Xt dan Yt

plot(Xt, Yt, pch = 20, col = "red", main = "Scatter Plot Xt dan Yt")

Model Koyck

model.koyck = dLagM::koyckDlm(x=train$Xt , y=train$Yt)
summary(model.koyck)
## 
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.6331  -2.2115  -0.4483   2.0861  25.7575 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.292530   1.508153   0.857    0.392    
## Y.1         0.961816   0.027331  35.191   <2e-16 ***
## X.t         0.007701   0.038354   0.201    0.841    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.967 on 286 degrees of freedom
## Multiple R-Squared: 0.9298,  Adjusted R-squared: 0.9294 
## Wald test:  1894 on 2 and 286 DF,  p-value: < 2.2e-16 
## 
## Diagnostic tests:
## NULL
## 
##                             alpha        beta       phi
## Geometric coefficients:  33.85021 0.007701024 0.9618162
AIC(model.koyck)
## [1] 1621.661
BIC(model.koyck)
## [1] 1636.327

Ramalan model koyck

(fore.koyck <- forecast(model = model.koyck, x=test$Xt, h=76))
## $forecasts
##  [1] 67.34292 66.69552 66.04204 65.41352 64.79359 64.17423 63.55541 62.96793
##  [9] 62.37977 61.80637 61.21636 60.67199 60.15610 59.66761 59.18237 58.67715
## [17] 58.16812 57.67083 57.18482 56.70967 56.22956 55.74468 55.30142 54.85198
## [25] 54.43511 54.02645 53.64109 53.26275 52.89886 52.54115 52.21251 51.95803
## [33] 51.74407 51.59218 51.46150 51.32811 51.19981 51.08410 50.95742 50.82017
## [41] 50.65736 50.54697 50.44079 50.33097 50.16373 50.04909 49.97733 49.86210
## [49] 49.65886 49.48648 49.42850 49.34193 49.27406 49.23960 49.18334 49.15234
## [57] 49.12252 49.11694 49.08077 49.06139 48.98113 48.91164 48.81400 48.76630
## [65] 48.72041 48.64548 48.57341 48.45788 48.35446 48.24729 48.10571 47.94643
## [73] 47.78554 47.62308 47.48993 47.34647
## 
## $call
## forecast.koyckDlm(model = model.koyck, x = test$Xt, h = 76)
## 
## attr(,"class")
## [1] "forecast.koyckDlm" "dLagM"

MAPE testing dan akurasi Data Training

# mape data testing
mape.koyck <- MAPE(fore.koyck$forecasts, test$Yt)

# akurasi data training
mape_train <- dLagM::GoF(model.koyck)["MAPE"]

c("MAPE_testing" = mape.koyck, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.2982558
## 
## $MAPE_training.MAPE
## [1] 0.06462996

Regression with Distributed Lag

Lag = 2

model.dlm = dLagM::dlm(x = train$Xt,y = train$Yt , q = 2)
summary(model.dlm)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.893  -5.128  -1.045   3.895  42.554 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -19.89567    2.99809  -6.636 1.63e-10 ***
## x.t           0.64908    0.11058   5.870 1.22e-08 ***
## x.1          -0.04453    0.14522  -0.307    0.759    
## x.2           0.43945    0.11112   3.955 9.68e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.372 on 284 degrees of freedom
## Multiple R-squared:  0.6086, Adjusted R-squared:  0.6044 
## F-statistic: 147.2 on 3 and 284 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##        AIC      BIC
## 1 2112.211 2130.526
AIC(model.dlm)
## [1] 2112.211
BIC(model.dlm)
## [1] 2130.526

Ramalan Lag

(fore.dlm <- forecast(model = model.dlm, x=test$Xt, h=76)) 
## $forecasts
##  [1] 68.46197 64.30449 63.99446 63.29369 60.23775 58.37957 55.68703 55.15137
##  [9] 51.84126 51.76522 47.24601 48.97646 47.29472 49.21761 48.31436 45.59747
## [17] 42.99399 40.28128 38.35839 37.31440 34.97224 32.71915 33.48165 30.08248
## [25] 32.83257 30.77609 32.34859 31.21553 31.69951 30.61098 31.95367 36.61781
## [33] 39.73677 47.61777 50.36200 52.69997 53.62340 53.83303 52.49034 51.72069
## [41] 48.33454 51.52825 49.50328 51.49088 46.34277 50.15405 49.61670 48.13624
## [49] 42.81169 42.65663 46.33681 44.43539 52.06391 52.81339 51.56692 55.40553
## [57] 53.95360 57.21918 54.48927 57.28389 50.24440 52.12862 45.97221 50.48426
## [65] 48.45929 48.49964 48.67777 43.02550 43.94177 40.61148 37.85006 35.68603
## [73] 32.97332 31.05043 32.60275 30.73156
## 
## $call
## forecast.dlm(model = model.dlm, x = test$Xt, h = 76)
## 
## attr(,"class")
## [1] "forecast.dlm" "dLagM"

MAPE testing dan akurasi Data Training

#mape data testing
mape.dlm <- MAPE(fore.dlm$forecasts, test$Yt)

#akurasi data training
mape_train <- GoF(model.dlm)["MAPE"]

c("MAPE_testing" = mape.dlm, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.1162803
## 
## $MAPE_training.MAPE
## [1] 0.1568112

Lag Optimum

finiteDLMauto(formula = Yt ~ Xt,
              data = data.frame(train), q.min = 1, q.max = 10 ,
              model.type = "dlm", error.type = "AIC", trace = TRUE) ##q max lag maksimum
##    q - k    MASE      AIC      BIC    GMRAE    MBRAE R.Adj.Sq Ljung-Box
## 10    10 2.18034 2013.397 2060.650 35.10024  0.68038  0.65459         0
## 9      9 2.17259 2024.329 2067.989 33.81861 -1.92414  0.64924         0
## 8      8 2.15492 2037.121 2077.182 31.99670  1.05009  0.64213         0
## 7      7 2.16516 2047.282 2083.736 32.71268  0.88002  0.63879         0
## 6      6 2.15392 2058.390 2091.231 31.41291  0.95837  0.63503         0
## 5      5 2.18317 2070.318 2099.538 30.47528  0.95297  0.62986         0
## 4      4 2.22415 2080.129 2105.721 31.27892  0.16691  0.62709         0
## 3      3 2.27964 2092.017 2113.974 35.06167  1.63898  0.62156         0
## 2      2 2.34634 2112.211 2130.526 36.64824  0.09393  0.60444         0
## 1      1 2.42652 2132.075 2146.741 36.94918  1.31179  0.58683         0
#model dlm dengan lag optimum
model.dlm2 = dLagM::dlm(x=train$Xt,y = train$Yt , q = 10) #terdapat lag yang tidak signifikan sehingga dapat dikurangi jumlah lagnya 
summary(model.dlm2)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.240  -5.326  -1.671   3.594  39.626 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -35.87378    3.64047  -9.854  < 2e-16 ***
## x.t           0.79144    0.10597   7.469 1.14e-12 ***
## x.1          -0.06073    0.13815  -0.440   0.6606    
## x.2           0.02354    0.13875   0.170   0.8654    
## x.3           0.15943    0.13835   1.152   0.2502    
## x.4           0.07378    0.13923   0.530   0.5966    
## x.5          -0.01943    0.14008  -0.139   0.8898    
## x.6           0.02921    0.13985   0.209   0.8347    
## x.7           0.03096    0.14103   0.220   0.8264    
## x.8          -0.05090    0.14498  -0.351   0.7258    
## x.9           0.07816    0.14613   0.535   0.5932    
## x.10          0.26477    0.10991   2.409   0.0167 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.601 on 268 degrees of freedom
## Multiple R-squared:  0.6682, Adjusted R-squared:  0.6546 
## F-statistic: 49.07 on 11 and 268 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##        AIC     BIC
## 1 2013.397 2060.65
AIC(model.dlm2)
## [1] 2013.397
BIC(model.dlm2)
## [1] 2060.65

Ramalan Lag Optimum

(fore.dlm2 <- forecast(model = model.dlm2,x=test$Xt, h=76))
## $forecasts
##  [1] 69.82700 67.81075 61.81989 65.87774 64.95969 62.48400 57.99780 61.88302
##  [9] 60.23227 57.76587 55.10788 56.42436 55.37791 55.29928 53.14005 48.60402
## [17] 45.92072 44.94325 42.68185 40.37118 36.58270 34.80573 37.28011 34.36119
## [25] 34.68884 32.45298 32.21342 30.90440 30.83958 29.73010 30.09154 35.81041
## [33] 38.85372 43.99894 47.06288 47.33897 48.91654 50.52374 48.91682 47.17895
## [41] 45.49280 52.29942 52.82848 53.52166 48.37515 53.67047 56.80016 50.80152
## [49] 41.87559 45.05595 54.11240 49.28110 51.80676 56.22767 51.55451 55.95770
## [57] 58.53536 58.39570 51.50623 55.95313 53.15462 52.95094 50.58933 55.31377
## [65] 53.80179 50.94497 52.09625 47.93528 47.75529 46.37153 39.96908 37.12154
## [73] 35.71154 35.63680 36.75133 33.92716
## 
## $call
## forecast.dlm(model = model.dlm2, x = test$Xt, h = 76)
## 
## attr(,"class")
## [1] "forecast.dlm" "dLagM"

MAPE testing dan akurasi Data Training

mape.dlm2 <- MAPE(fore.dlm2$forecasts, test$Yt)

mape_train <- GoF(model.dlm2)["MAPE"]

c("MAPE_testing" = mape.dlm2, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.1089075
## 
## $MAPE_training.MAPE
## [1] 0.1433475

Model Autoregressive / Dynamic Regression

model.ardl = ardlDlm(x = train$Xt, y = train$Yt, p = 1 , q = 1) #p:lag x, q:lag y
#model untuk p=1, q=1: yt=b0+b1yt-1+b2xt+b3xt-1
#model untuk p=2, q=3: yt=b0+b1yt-1+b2yt-2+b3xt+b4xt-1+b5xt-2

summary(model.ardl)
## 
## Time series regression with "ts" data:
## Start = 2, End = 290
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.7261  -1.7662  -0.2586   1.4176  26.1876 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.90397    1.22124  -0.740     0.46    
## X.t          0.30511    0.04440   6.871 4.00e-11 ***
## X.1         -0.23862    0.04572  -5.219 3.47e-07 ***
## Y.1          0.93027    0.02294  40.548  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.694 on 285 degrees of freedom
## Multiple R-squared:  0.9394, Adjusted R-squared:  0.9387 
## F-statistic:  1472 on 3 and 285 DF,  p-value: < 2.2e-16
AIC(model.ardl)
## [1] 1581.413
BIC(model.ardl)
## [1] 1599.745

Ramalan Autoregressive

(fore.ardl <- forecast(model = model.ardl, x=test$Xt, h=76))
## $forecasts
##  [1] 69.13256 68.38285 66.94222 66.55652 65.58749 64.24795 62.80234 62.47850
##  [9] 61.02329 60.08031 57.91616 58.01133 57.68912 57.45587 56.39005 54.35025
## [17] 52.73045 51.63434 50.54818 49.47126 47.79274 46.03179 46.02481 44.38714
## [25] 44.18974 43.22376 42.86886 41.99499 41.42068 40.58130 40.64929 42.67617
## [33] 43.87321 46.16807 47.24280 47.46024 47.90114 48.61640 48.43295 48.12931
## [41] 47.10364 48.93462 49.20621 49.15376 46.90271 48.54822 50.17283 48.66041
## [49] 45.02386 45.41962 49.34345 48.43260 49.14994 50.56046 50.00283 51.11526
## [57] 51.43426 52.64635 51.83763 52.64998 50.48760 50.69004 49.41931 51.02232
## [65] 51.08184 49.91678 49.78742 47.83644 47.75832 47.14191 45.28157 43.82871
## [73] 42.88791 41.94622 42.22414 41.15661
## 
## $call
## forecast.ardlDlm(model = model.ardl, x = test$Xt, h = 76)
## 
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"

MAPE testing dan akurasi Data Training

mape.ardl <- MAPE(fore.ardl$forecasts, test$Yt) #data testing

mape_train <- GoF(model.ardl)["MAPE"]

c("MAPE_testing" = mape.ardl, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.2104407
## 
## $MAPE_training.MAPE
## [1] 0.05403695

Penentuan lag optimum

datareg <- data.frame(data$t,data$Xt,data$Yt)
colnames(datareg)[1] <- "t"
colnames(datareg)[2] <- "Xt"
colnames(datareg)[3] <- "Yt"
pq <- ardlBoundOrders(data = data.frame(datareg), formula = Yt ~ Xt ) 
c(p=pq$p$Xt,q=pq$q)
##  p  q 
## 15  1
model.ardl2 = ardlDlm(x = train$Xt, y = train$Yt, p = 15 , q = 1) #p:lag x, q:lag y
#model untuk p=1, q=1: yt=b0+b1yt-1+b2xt+b3xt-1
#model untuk p=2, q=3: yt=b0+b1yt-1+b2yt-2+b3xt+b4xt-1+b5xt-2

summary(model.ardl2)
## 
## Time series regression with "ts" data:
## Start = 16, End = 290
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.9380  -1.6350  -0.1325   1.5921  23.5285 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.694986   2.077032   1.298  0.19562    
## X.t          0.265197   0.047686   5.561 6.70e-08 ***
## X.1         -0.377619   0.059210  -6.378 8.31e-10 ***
## X.2          0.177929   0.058986   3.016  0.00281 ** 
## X.3          0.105326   0.058700   1.794  0.07394 .  
## X.4         -0.045903   0.059194  -0.775  0.43877    
## X.5         -0.149707   0.059248  -2.527  0.01211 *  
## X.6          0.015757   0.059376   0.265  0.79093    
## X.7          0.033446   0.059451   0.563  0.57422    
## X.8         -0.032874   0.061095  -0.538  0.59098    
## X.9          0.002448   0.062087   0.039  0.96858    
## X.10         0.110200   0.062038   1.776  0.07686 .  
## X.11        -0.071679   0.062319  -1.150  0.25113    
## X.12        -0.087902   0.062396  -1.409  0.16011    
## X.13         0.105083   0.062382   1.685  0.09330 .  
## X.14         0.014046   0.062404   0.225  0.82209    
## X.15        -0.080817   0.046730  -1.729  0.08493 .  
## Y.1          0.961664   0.027517  34.948  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.578 on 257 degrees of freedom
## Multiple R-squared:  0.9446, Adjusted R-squared:  0.941 
## F-statistic: 257.9 on 17 and 257 DF,  p-value: < 2.2e-16

AIC BIC

AIC(model.ardl2)
## [1] 1500.94
BIC(model.ardl2)
## [1] 1569.658
#PEMODELAN DLM dan ARDL dengan library dynlm
#library(dynlm)

#sama dengan model dlm p=1
cons_lm1 <- dynlm(Yt ~ Xt+L(Xt),data = train.ts)

#sama dengan model ardl p=0 q=1
cons_lm2 <- dynlm(Yt ~ Xt+L(Yt),data = train.ts)

#sama dengan ardl p=1 q=1
cons_lm3 <- dynlm(Yt ~ Xt+L(Xt)+L(Yt),data = train.ts)

#sama dengan dlm p=2
cons_lm4 <- dynlm(Yt ~ Xt+L(Xt)+L(Xt,2),data = train.ts)
summary(cons_lm1)
## 
## Time series regression with "ts" data:
## Start = 2, End = 290
## 
## Call:
## dynlm(formula = Yt ~ Xt + L(Xt), data = train.ts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.038  -5.250  -0.599   4.414  43.353 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.5280     2.9877  -5.867 1.23e-08 ***
## Xt            0.6747     0.1129   5.978 6.70e-09 ***
## L(Xt)         0.3279     0.1131   2.900  0.00402 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.594 on 286 degrees of freedom
## Multiple R-squared:  0.5897, Adjusted R-squared:  0.5868 
## F-statistic: 205.5 on 2 and 286 DF,  p-value: < 2.2e-16
summary(cons_lm1)
## 
## Time series regression with "ts" data:
## Start = 2, End = 290
## 
## Call:
## dynlm(formula = Yt ~ Xt + L(Xt), data = train.ts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.038  -5.250  -0.599   4.414  43.353 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.5280     2.9877  -5.867 1.23e-08 ***
## Xt            0.6747     0.1129   5.978 6.70e-09 ***
## L(Xt)         0.3279     0.1131   2.900  0.00402 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.594 on 286 degrees of freedom
## Multiple R-squared:  0.5897, Adjusted R-squared:  0.5868 
## F-statistic: 205.5 on 2 and 286 DF,  p-value: < 2.2e-16
summary(cons_lm2)
## 
## Time series regression with "ts" data:
## Start = 2, End = 290
## 
## Call:
## dynlm(formula = Yt ~ Xt + L(Yt), data = train.ts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.1887  -1.9159  -0.4293   1.4001  26.6915 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.78615    1.21914  -2.285    0.023 *  
## Xt           0.12452    0.02908   4.282 2.53e-05 ***
## L(Yt)        0.89367    0.02283  39.153  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.86 on 286 degrees of freedom
## Multiple R-squared:  0.9336, Adjusted R-squared:  0.9331 
## F-statistic:  2010 on 2 and 286 DF,  p-value: < 2.2e-16
summary(cons_lm3)
## 
## Time series regression with "ts" data:
## Start = 2, End = 290
## 
## Call:
## dynlm(formula = Yt ~ Xt + L(Xt) + L(Yt), data = train.ts)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.7261  -1.7662  -0.2586   1.4176  26.1876 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.90397    1.22124  -0.740     0.46    
## Xt           0.30511    0.04440   6.871 4.00e-11 ***
## L(Xt)       -0.23862    0.04572  -5.219 3.47e-07 ***
## L(Yt)        0.93027    0.02294  40.548  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.694 on 285 degrees of freedom
## Multiple R-squared:  0.9394, Adjusted R-squared:  0.9387 
## F-statistic:  1472 on 3 and 285 DF,  p-value: < 2.2e-16
summary(cons_lm4)
## 
## Time series regression with "ts" data:
## Start = 3, End = 290
## 
## Call:
## dynlm(formula = Yt ~ Xt + L(Xt) + L(Xt, 2), data = train.ts)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.893  -5.128  -1.045   3.895  42.554 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -19.89567    2.99809  -6.636 1.63e-10 ***
## Xt            0.64908    0.11058   5.870 1.22e-08 ***
## L(Xt)        -0.04453    0.14522  -0.307    0.759    
## L(Xt, 2)      0.43945    0.11112   3.955 9.68e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.372 on 284 degrees of freedom
## Multiple R-squared:  0.6086, Adjusted R-squared:  0.6044 
## F-statistic: 147.2 on 3 and 284 DF,  p-value: < 2.2e-16

SSE

deviance(cons_lm1)
## [1] 26325.44
deviance(cons_lm2)
## [1] 4260.957
deviance(cons_lm3)
## [1] 3889.22
deviance(cons_lm4)
## [1] 24944.99

Diagnostik Model

Uji Non Autokorelasi

dwtest(cons_lm1)
## 
##  Durbin-Watson test
## 
## data:  cons_lm1
## DW = 0.24539, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm2)
## 
##  Durbin-Watson test
## 
## data:  cons_lm2
## DW = 1.3612, p-value = 1.39e-08
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm3)
## 
##  Durbin-Watson test
## 
## data:  cons_lm3
## DW = 1.3777, p-value = 3.093e-08
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm4)
## 
##  Durbin-Watson test
## 
## data:  cons_lm4
## DW = 0.24643, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

Uji Heterogenitas

bptest(cons_lm1)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm1
## BP = 38.541, df = 2, p-value = 4.276e-09
bptest(cons_lm2)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm2
## BP = 45.019, df = 2, p-value = 1.676e-10
bptest(cons_lm3)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm3
## BP = 48.526, df = 3, p-value = 1.645e-10
bptest(cons_lm4)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm4
## BP = 40.196, df = 3, p-value = 9.683e-09

Normalitas

shapiro.test(residuals(cons_lm1))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm1)
## W = 0.93012, p-value = 2.05e-10
shapiro.test(residuals(cons_lm2))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm2)
## W = 0.86007, p-value = 1.676e-15
shapiro.test(residuals(cons_lm3))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm3)
## W = 0.85807, p-value = 1.283e-15
shapiro.test(residuals(cons_lm4))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(cons_lm4)
## W = 0.91921, p-value = 2.323e-11

Perbandingan Keakuratan Ramalan

#PERBANDINGAN
akurasi <- matrix(c(mape.koyck, mape.dlm, mape.dlm2, mape.ardl))
row.names(akurasi)<- c("Koyck","DLM 1","DLM 2","Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi
##                     MAPE
## Koyck          0.2982558
## DLM 1          0.1162803
## DLM 2          0.1089075
## Autoregressive 0.2104407

Plot

#PLOT
par(mfrow=c(1,1))
plot(test$Xt, test$Yt, col="black")
points(test$Xt, fore.koyck$forecasts,col="purple")
points(test$Xt, fore.dlm$forecasts,col="blue")
points(test$Xt, fore.dlm2$forecasts,col="yellow")
points(test$Xt, fore.ardl$forecasts,col="green")
legend("topleft",c("aktual", "koyck","DLM 1","DLM 2", "autoregressive"), lty=1, col=c("black","purple","blue","yellow","green"), cex=0.5)

Daftar Pustaka

https://www.kaggle.com/code/ryanholbrook/exercise-linear-regression-with-time-series/data