library(dLagM)
## Warning: package 'dLagM' was built under R version 4.1.3
## Loading required package: nardl
## Warning: package 'nardl' was built under R version 4.1.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Loading required package: dynlm
## Warning: package 'dynlm' was built under R version 4.1.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.1.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(dynlm)
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.1.3
## 
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:dLagM':
## 
##     MAPE
## The following object is masked from 'package:base':
## 
##     Recall
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.1.3
library(car)
## Warning: package 'car' was built under R version 4.1.2
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.1.2
library(knitr)
## Warning: package 'knitr' was built under R version 4.1.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2

Data

setwd("C:/Users/Nabil Izzany/Documents/Kuylah/Semester 5/MPDW/Individu")

data <-read_excel("Walmart.xlsx")
str(data)
## tibble [6,435 x 7] (S3: tbl_df/tbl/data.frame)
##  $ Date        : num [1:6435] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Weekly_Sales: num [1:6435] 1643691 1641957 1611968 1409728 1554807 ...
##  $ Holiday_Flag: num [1:6435] 0 1 0 0 0 0 0 0 0 0 ...
##  $ Temperature : num [1:6435] 42.3 38.5 39.9 46.6 46.5 ...
##  $ Fuel_Price  : num [1:6435] 2.57 2.55 2.51 2.56 2.62 ...
##  $ CPI         : num [1:6435] 211 211 211 211 211 ...
##  $ Unemployment: num [1:6435] 8.11 8.11 8.11 8.11 8.11 ...
knitr::kable(head(data), align="l")
Date Weekly_Sales Holiday_Flag Temperature Fuel_Price CPI Unemployment
1 1643691 0 42.31 2.572 211.0964 8.106
2 1641957 1 38.51 2.548 211.2422 8.106
3 1611968 0 39.93 2.514 211.2891 8.106
4 1409728 0 46.63 2.561 211.3196 8.106
5 1554807 0 46.50 2.625 211.3501 8.106
6 1439542 0 57.79 2.667 211.3806 8.106
train<-data[1:4505,]
test<-data[4506:6435,]

train.ts<-ts(train)
test.ts<-ts(test)
data.ts<-ts(data)

KOYCK

Model KOYCK

model.koyck = dLagM::koyckDlm(x = train$CPI, y = train$Weekly_Sales)
summary(model.koyck)
## 
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2017120   -56246   -18636    50272  1668758 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.134e+05  1.648e+04   6.879 6.84e-12 ***
## Y.1          9.333e-01  5.355e-03 174.304  < 2e-16 ***
## X.t         -2.009e+02  7.824e+01  -2.568   0.0103 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 202600 on 4501 degrees of freedom
## Multiple R-Squared: 0.8772,  Adjusted R-squared: 0.8772 
## Wald test: 1.608e+04 on 2 and 4501 DF,  p-value: < 2.2e-16 
## 
## Diagnostic tests:
## NULL
## 
##                            alpha      beta       phi
## Geometric coefficients:  1700951 -200.9237 0.9333405
AIC(model.koyck)
## [1] 122854
BIC(model.koyck)
## [1] 122879.7

Ramalan Model KOYCK

fore.koyck <- forecast(model = model.koyck, x=test$CPI, h=12)

MAPE Testing dan Akurasi

test12 <- test[1:12,]

#mape data testing
mape.koyck <- MAPE(fore.koyck$forecasts, test12$Weekly_Sales)

#akurasi data training
mape_train <- dLagM::GoF(model.koyck)["MAPE"]

c("MAPE_testing" = mape.koyck, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.02526593
## 
## $MAPE_training.MAPE
## [1] 0.09564814

Regression with Distributed Lag

Regression with Distributed Lag (lag=2)

Model Regression with Distributed Lag (lag=2)

model.dlm = dLagM::dlm(x = train$CPI, y = train$Weekly_Sales , q = 2)
summary(model.dlm)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -901370 -469864 -114482  443121 2687962 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1740041.3    37723.7  46.126   <2e-16 ***
## x.t           -3056.0     1877.0  -1.628    0.104    
## x.1            -863.2     2650.8  -0.326    0.745    
## x.2             687.3     1876.9   0.366    0.714    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 563600 on 4499 degrees of freedom
## Multiple R-squared:  0.04968,    Adjusted R-squared:  0.04905 
## F-statistic:  78.4 on 3 and 4499 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##        AIC      BIC
## 1 132044.1 132076.1
AIC(model.dlm)
## [1] 132044.1
BIC(model.dlm)
## [1] 132076.1

Ramalan Lag (2)

(fore.dlm <- forecast(model = model.dlm, x=test$CPI, h=12))
## $forecasts
##  [1] 1116281 1115948 1115642 1115244 1114592 1113896 1113251 1112606 1112145
## [10] 1111765 1111353 1110935
## 
## $call
## forecast.dlm(model = model.dlm, x = test$CPI, h = 12)
## 
## attr(,"class")
## [1] "forecast.dlm" "dLagM"

MAPE testing dan akurasi Data Training

mape.dlm <- MAPE(fore.dlm$forecasts, test12$Weekly_Sales)

#akurasi data training
mape_train <- GoF(model.dlm)["MAPE"]

c("MAPE_testing" = mape.dlm, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.05012342
## 
## $MAPE_training.MAPE
## [1] 0.5722005

Regression with Distributed Lag (lag=optimum)

Model Regression with Distributed Lag (lag=optimum)

finiteDLMauto(formula = Weekly_Sales ~ CPI,
              data = data.frame(train), q.max = 5, q.min = 1,
              model.type = "dlm", error.type = "AIC", trace = TRUE)
##   q - k    MASE      AIC      BIC   GMRAE   MBRAE R.Adj.Sq Ljung-Box
## 5     5 4.73783 131962.9 132014.2 8.57961 0.91074  0.04863         0
## 4     4 4.73792 131990.0 132034.9 8.58206 0.90780  0.04876         0
## 3     3 4.73737 132016.8 132055.2 8.58116 0.90631  0.04892         0
## 2     2 4.73662 132044.1 132076.1 8.57870 0.89643  0.04905         0
## 1     1 4.73760 132071.6 132097.2 8.57712 0.88510  0.04914         0
model.dlm2 = dLagM::dlm(x = train$CPI,y = train$Weekly_Sales , q = 3) #terdapat lag yang tidak signifikan sehingga dapat dikurangi jumlah lagnya 
summary(model.dlm2)
## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -901353 -469915 -114428  443014 2688174 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.740e+06  3.779e+04  46.059   <2e-16 ***
## x.t         -3.058e+03  1.877e+03  -1.629    0.103    
## x.1         -8.638e+02  2.651e+03  -0.326    0.745    
## x.2          6.812e+02  2.651e+03   0.257    0.797    
## x.3          5.582e+00  1.877e+03   0.003    0.998    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 563700 on 4497 degrees of freedom
## Multiple R-squared:  0.04977,    Adjusted R-squared:  0.04892 
## F-statistic: 58.88 on 4 and 4497 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##        AIC      BIC
## 1 132016.8 132055.2
AIC(model.dlm2)
## [1] 132016.8
BIC(model.dlm2)
## [1] 132055.2

Ramalan Lag Optimum

(fore.dlm2 <- forecast(model = model.dlm2, x=test$CPI, h=12))
## $forecasts
##  [1] 1116102 1115768 1115462 1115063 1114411 1113714 1113068 1112423 1111961
## [10] 1111581 1111169 1110750
## 
## $call
## forecast.dlm(model = model.dlm2, x = test$CPI, h = 12)
## 
## attr(,"class")
## [1] "forecast.dlm" "dLagM"

MAPE testing dan akurasi Data Training

#mape data testing
mape.dlm2 <- MAPE(fore.dlm2$forecasts, test12$Weekly_Sales)

#akurasi data training
mape_train <- GoF(model.dlm2)["MAPE"]

c("MAPE_testing" = mape.dlm2, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.05025164
## 
## $MAPE_training.MAPE
## [1] 0.5721527

Autoregressive

Model Autoregressive

#MODEL AUTOREGRESSIVE
model.ardl = ardlDlm(x = train$CPI, y = train$Weekly_Sales, p = 1 , q = 1) #p:lag x, q:lag y
summary(model.ardl)
## 
## Time series regression with "ts" data:
## Start = 2, End = 4505
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2018141   -55433   -18182    50184  1393965 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.175e+05  1.635e+04   7.190 7.53e-13 ***
## X.t         -4.779e+03  6.713e+02  -7.119 1.26e-12 ***
## X.1          4.550e+03  6.717e+02   6.774 1.41e-11 ***
## Y.1          9.339e-01  5.330e-03 175.198  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 201600 on 4500 degrees of freedom
## Multiple R-squared:  0.8785, Adjusted R-squared:  0.8784 
## F-statistic: 1.084e+04 on 3 and 4500 DF,  p-value: < 2.2e-16
AIC(model.ardl)
## [1] 122809.7
BIC(model.ardl)
## [1] 122841.8

Ramalan Autoregressive

(fore.ardl <- forecast(model = model.ardl, x=test$CPI, h=12))
## $forecasts
##  [1] 1186204 1180739 1175615 1170665 1165656 1160933 1156477 1152269 1148581
## [10] 1145153 1141921 1138874
## 
## $call
## forecast.ardlDlm(model = model.ardl, x = test$CPI, h = 12)
## 
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"

MAPE testing dan akurasi Data Training

#akurasi testing
mape.ardl <- MAPE(fore.ardl$forecasts, test12$Weekly_Sales) #data testing

#akurasi data training
mape_train <- GoF(model.ardl)["MAPE"]

c("MAPE_testing" = mape.ardl, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.02791166
## 
## $MAPE_training.MAPE
## [1] 0.09488061

Penentuan lag optimum

#penentuan lag optimum
ardlBoundOrders(data = data.frame(train), ic="AIC", formula = Weekly_Sales ~ CPI)
## $p
##   CPI
## 1  15
## 
## $q
## [1] 15
## 
## $Stat.table
##           q = 1    q = 2    q = 3    q = 4    q = 5    q = 6    q = 7    q = 8
## p = 1  122262.7 122172.4 121891.2 121711.3 121600.8 121448.1 121423.6 121369.1
## p = 2  122238.2 122174.3 121891.8 121712.7 121601.9 121449.3 121424.7 121370.0
## p = 3  122148.5 122148.5 121892.4 121714.4 121603.2 121450.1 121425.5 121370.4
## p = 4  121867.3 121868.2 121868.2 121714.0 121603.9 121451.4 121426.9 121371.8
## p = 5  122164.4 122095.1 121689.0 121689.0 121605.8 121452.7 121428.2 121372.9
## p = 6  122039.0 122015.7 121819.7 121581.7 121581.7 121452.7 121428.1 121372.2
## p = 7  121898.2 121897.9 121735.7 121430.0 121428.7 121428.7 121430.1 121374.2
## p = 8  121904.3 121881.2 121740.5 121518.7 121491.5 121405.8 121405.8 121375.7
## p = 9  121962.0 121929.0 121720.0 121449.6 121418.3 121354.6 121351.7 121351.7
## p = 10 121938.0 121907.7 121696.5 121504.3 121466.9 121356.7 121358.5 121322.5
## p = 11 121884.4 121860.3 121661.6 121465.1 121411.4 121328.0 121330.0 121304.6
## p = 12 121901.2 121868.2 121660.0 121473.0 121412.2 121301.5 121303.3 121279.2
## p = 13 121869.5 121835.8 121626.8 121426.4 121371.6 121266.4 121268.4 121251.5
## p = 14 121849.6 121816.9 121603.6 121411.9 121357.7 121250.0 121251.9 121227.0
## p = 15 121774.4 121745.5 121550.9 121346.9 121297.5 121205.2 121207.1 121186.9
##           q = 9   q = 10   q = 11   q = 12   q = 13   q = 14   q = 15
## p = 1  121338.6 121312.5 121284.9 121258.7 121230.5 121192.3 121167.9
## p = 2  121339.5 121313.4 121285.8 121259.6 121231.3 121193.1 121168.7
## p = 3  121340.1 121314.0 121286.4 121260.2 121231.9 121193.5 121169.2
## p = 4  121341.4 121315.3 121287.8 121261.5 121233.2 121194.9 121170.6
## p = 5  121342.4 121316.2 121288.6 121262.3 121234.0 121195.7 121171.4
## p = 6  121341.9 121315.8 121288.0 121261.6 121233.2 121194.9 121170.5
## p = 7  121343.9 121317.8 121290.0 121263.5 121235.1 121196.8 121172.4
## p = 8  121345.6 121319.4 121291.5 121265.1 121236.6 121198.1 121173.8
## p = 9  121347.4 121321.2 121293.4 121267.0 121238.6 121200.1 121175.8
## p = 10 121322.5 121322.8 121295.1 121268.7 121240.3 121201.9 121177.5
## p = 11 121298.6 121298.6 121296.9 121270.4 121242.0 121203.4 121179.1
## p = 12 121270.6 121272.6 121272.6 121272.3 121243.9 121205.4 121181.0
## p = 13 121243.4 121245.4 121246.7 121246.7 121244.9 121206.0 121181.7
## p = 14 121215.6 121217.4 121218.9 121220.9 121220.9 121208.0 121183.7
## p = 15 121177.0 121178.2 121179.9 121181.8 121183.7 121183.7 121185.7
## 
## $min.Stat
## [1] 121167.9

PEMODELAN DLM dan ARDL dengan library dynlm

#sama dengan model dlm p=1
cons_lm1 <- dynlm(Weekly_Sales ~ CPI+L(CPI),data = train.ts)

#sama dengan model ardl p=0 q=1
cons_lm2 <- dynlm(Weekly_Sales ~ CPI+L(Weekly_Sales),data = train.ts)

#sama dengan ardl p=1 q=1
cons_lm3 <- dynlm(Weekly_Sales ~ CPI+L(CPI)+L(Weekly_Sales),data = train.ts)

#sama dengan dlm p=2
cons_lm4 <- dynlm(Weekly_Sales ~ CPI+L(CPI)+L(CPI,2),data = train.ts)

deviance(cons_lm1)
## [1] 1.429709e+15
deviance(cons_lm2)
## [1] 1.846681e+14
deviance(cons_lm3)
## [1] 1.828038e+14
deviance(cons_lm4)
## [1] 1.429324e+15

Uji Diagnostik Model

Uji Non Autokorelasi

dwtest(cons_lm1)
## 
##  Durbin-Watson test
## 
## data:  cons_lm1
## DW = 0.13243, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm2)
## 
##  Durbin-Watson test
## 
## data:  cons_lm2
## DW = 2.6055, p-value = 1
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm3)
## 
##  Durbin-Watson test
## 
## data:  cons_lm3
## DW = 2.6184, p-value = 1
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm4)
## 
##  Durbin-Watson test
## 
## data:  cons_lm4
## DW = 0.13241, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

Uji Heterogenitas

bptest(cons_lm1)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm1
## BP = 52.218, df = 2, p-value = 4.58e-12
bptest(cons_lm2)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm2
## BP = 440.78, df = 2, p-value < 2.2e-16
bptest(cons_lm3)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm3
## BP = 494.28, df = 3, p-value < 2.2e-16
bptest(cons_lm4)
## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm4
## BP = 52.211, df = 3, p-value = 2.7e-11

Perbandingan Keakuratan Model

akurasi <- matrix(c(mape.koyck, mape.dlm, mape.dlm2, mape.ardl))
row.names(akurasi)<- c("Koyck","DLM 1","DLM Optimum","Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi
##                      MAPE
## Koyck          0.02526593
## DLM 1          0.05012342
## DLM Optimum    0.05025164
## Autoregressive 0.02791166

Karena model KOYCK memiliki MAPE paling kecil maka dapat dikatakan penggunaan pendekatan KOYCK adalah pemodelan terbaik untuk memodelkan data penjualan mingguan walmart.

Sumber data: https://www.kaggle.com/datasets/yasserh/walmart-dataset