library(dLagM)

## Warning: package 'dLagM' was built under R version 4.1.3

## Loading required package: nardl

## Warning: package 'nardl' was built under R version 4.1.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Loading required package: dynlm

## Warning: package 'dynlm' was built under R version 4.1.3

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 4.1.2

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(dynlm)
library(MLmetrics)

## Warning: package 'MLmetrics' was built under R version 4.1.3

## 
## Attaching package: 'MLmetrics'

## The following object is masked from 'package:dLagM':
## 
##     MAPE

## The following object is masked from 'package:base':
## 
##     Recall

library(lmtest)

## Warning: package 'lmtest' was built under R version 4.1.3

library(car)

## Warning: package 'car' was built under R version 4.1.2

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.1.2

library(knitr)

## Warning: package 'knitr' was built under R version 4.1.2

library(readxl)

## Warning: package 'readxl' was built under R version 4.1.2

Data

setwd("C:/Users/Nabil Izzany/Documents/Kuylah/Semester 5/MPDW/Individu")

data <-read_excel("Walmart.xlsx")
str(data)

## tibble [6,435 x 7] (S3: tbl_df/tbl/data.frame)
##  $ Date        : num [1:6435] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Weekly_Sales: num [1:6435] 1643691 1641957 1611968 1409728 1554807 ...
##  $ Holiday_Flag: num [1:6435] 0 1 0 0 0 0 0 0 0 0 ...
##  $ Temperature : num [1:6435] 42.3 38.5 39.9 46.6 46.5 ...
##  $ Fuel_Price  : num [1:6435] 2.57 2.55 2.51 2.56 2.62 ...
##  $ CPI         : num [1:6435] 211 211 211 211 211 ...
##  $ Unemployment: num [1:6435] 8.11 8.11 8.11 8.11 8.11 ...

knitr::kable(head(data), align="l")

Date	Weekly_Sales	Holiday_Flag	Temperature	Fuel_Price	CPI	Unemployment
1	1643691	0	42.31	2.572	211.0964	8.106
2	1641957	1	38.51	2.548	211.2422	8.106
3	1611968	0	39.93	2.514	211.2891	8.106
4	1409728	0	46.63	2.561	211.3196	8.106
5	1554807	0	46.50	2.625	211.3501	8.106
6	1439542	0	57.79	2.667	211.3806	8.106

train<-data[1:4505,]
test<-data[4506:6435,]

train.ts<-ts(train)
test.ts<-ts(test)
data.ts<-ts(data)

KOYCK

Model KOYCK

model.koyck = dLagM::koyckDlm(x = train$CPI, y = train$Weekly_Sales)
summary(model.koyck)

## 
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2017120   -56246   -18636    50272  1668758 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.134e+05  1.648e+04   6.879 6.84e-12 ***
## Y.1          9.333e-01  5.355e-03 174.304  < 2e-16 ***
## X.t         -2.009e+02  7.824e+01  -2.568   0.0103 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 202600 on 4501 degrees of freedom
## Multiple R-Squared: 0.8772,  Adjusted R-squared: 0.8772 
## Wald test: 1.608e+04 on 2 and 4501 DF,  p-value: < 2.2e-16 
## 
## Diagnostic tests:
## NULL
## 
##                            alpha      beta       phi
## Geometric coefficients:  1700951 -200.9237 0.9333405

AIC(model.koyck)

## [1] 122854

BIC(model.koyck)

## [1] 122879.7

Ramalan Model KOYCK

fore.koyck <- forecast(model = model.koyck, x=test$CPI, h=12)

MAPE Testing dan Akurasi

test12 <- test[1:12,]

#mape data testing
mape.koyck <- MAPE(fore.koyck$forecasts, test12$Weekly_Sales)

#akurasi data training
mape_train <- dLagM::GoF(model.koyck)["MAPE"]

c("MAPE_testing" = mape.koyck, "MAPE_training" = mape_train)

## $MAPE_testing
## [1] 0.02526593
## 
## $MAPE_training.MAPE
## [1] 0.09564814

Regression with Distributed Lag

Regression with Distributed Lag (lag=2)

Model Regression with Distributed Lag (lag=2)

model.dlm = dLagM::dlm(x = train$CPI, y = train$Weekly_Sales , q = 2)
summary(model.dlm)

## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -901370 -469864 -114482  443121 2687962 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1740041.3    37723.7  46.126   <2e-16 ***
## x.t           -3056.0     1877.0  -1.628    0.104    
## x.1            -863.2     2650.8  -0.326    0.745    
## x.2             687.3     1876.9   0.366    0.714    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 563600 on 4499 degrees of freedom
## Multiple R-squared:  0.04968,    Adjusted R-squared:  0.04905 
## F-statistic:  78.4 on 3 and 4499 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##        AIC      BIC
## 1 132044.1 132076.1

AIC(model.dlm)

## [1] 132044.1

BIC(model.dlm)

## [1] 132076.1

Ramalan Lag (2)

(fore.dlm <- forecast(model = model.dlm, x=test$CPI, h=12))

## $forecasts
##  [1] 1116281 1115948 1115642 1115244 1114592 1113896 1113251 1112606 1112145
## [10] 1111765 1111353 1110935
## 
## $call
## forecast.dlm(model = model.dlm, x = test$CPI, h = 12)
## 
## attr(,"class")
## [1] "forecast.dlm" "dLagM"

MAPE testing dan akurasi Data Training

mape.dlm <- MAPE(fore.dlm$forecasts, test12$Weekly_Sales)

#akurasi data training
mape_train <- GoF(model.dlm)["MAPE"]

c("MAPE_testing" = mape.dlm, "MAPE_training" = mape_train)

## $MAPE_testing
## [1] 0.05012342
## 
## $MAPE_training.MAPE
## [1] 0.5722005

Regression with Distributed Lag (lag=optimum)

Model Regression with Distributed Lag (lag=optimum)

finiteDLMauto(formula = Weekly_Sales ~ CPI,
              data = data.frame(train), q.max = 5, q.min = 1,
              model.type = "dlm", error.type = "AIC", trace = TRUE)

##   q - k    MASE      AIC      BIC   GMRAE   MBRAE R.Adj.Sq Ljung-Box
## 5     5 4.73783 131962.9 132014.2 8.57961 0.91074  0.04863         0
## 4     4 4.73792 131990.0 132034.9 8.58206 0.90780  0.04876         0
## 3     3 4.73737 132016.8 132055.2 8.58116 0.90631  0.04892         0
## 2     2 4.73662 132044.1 132076.1 8.57870 0.89643  0.04905         0
## 1     1 4.73760 132071.6 132097.2 8.57712 0.88510  0.04914         0

model.dlm2 = dLagM::dlm(x = train$CPI,y = train$Weekly_Sales , q = 3) #terdapat lag yang tidak signifikan sehingga dapat dikurangi jumlah lagnya 
summary(model.dlm2)

## 
## Call:
## lm(formula = model.formula, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -901353 -469915 -114428  443014 2688174 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.740e+06  3.779e+04  46.059   <2e-16 ***
## x.t         -3.058e+03  1.877e+03  -1.629    0.103    
## x.1         -8.638e+02  2.651e+03  -0.326    0.745    
## x.2          6.812e+02  2.651e+03   0.257    0.797    
## x.3          5.582e+00  1.877e+03   0.003    0.998    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 563700 on 4497 degrees of freedom
## Multiple R-squared:  0.04977,    Adjusted R-squared:  0.04892 
## F-statistic: 58.88 on 4 and 4497 DF,  p-value: < 2.2e-16
## 
## AIC and BIC values for the model:
##        AIC      BIC
## 1 132016.8 132055.2

AIC(model.dlm2)

## [1] 132016.8

BIC(model.dlm2)

## [1] 132055.2

Ramalan Lag Optimum

(fore.dlm2 <- forecast(model = model.dlm2, x=test$CPI, h=12))

## $forecasts
##  [1] 1116102 1115768 1115462 1115063 1114411 1113714 1113068 1112423 1111961
## [10] 1111581 1111169 1110750
## 
## $call
## forecast.dlm(model = model.dlm2, x = test$CPI, h = 12)
## 
## attr(,"class")
## [1] "forecast.dlm" "dLagM"

MAPE testing dan akurasi Data Training

#mape data testing
mape.dlm2 <- MAPE(fore.dlm2$forecasts, test12$Weekly_Sales)

#akurasi data training
mape_train <- GoF(model.dlm2)["MAPE"]

c("MAPE_testing" = mape.dlm2, "MAPE_training" = mape_train)

## $MAPE_testing
## [1] 0.05025164
## 
## $MAPE_training.MAPE
## [1] 0.5721527

Autoregressive

Model Autoregressive

#MODEL AUTOREGRESSIVE
model.ardl = ardlDlm(x = train$CPI, y = train$Weekly_Sales, p = 1 , q = 1) #p:lag x, q:lag y
summary(model.ardl)

## 
## Time series regression with "ts" data:
## Start = 2, End = 4505
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2018141   -55433   -18182    50184  1393965 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.175e+05  1.635e+04   7.190 7.53e-13 ***
## X.t         -4.779e+03  6.713e+02  -7.119 1.26e-12 ***
## X.1          4.550e+03  6.717e+02   6.774 1.41e-11 ***
## Y.1          9.339e-01  5.330e-03 175.198  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 201600 on 4500 degrees of freedom
## Multiple R-squared:  0.8785, Adjusted R-squared:  0.8784 
## F-statistic: 1.084e+04 on 3 and 4500 DF,  p-value: < 2.2e-16

AIC(model.ardl)

## [1] 122809.7

BIC(model.ardl)

## [1] 122841.8

Ramalan Autoregressive

(fore.ardl <- forecast(model = model.ardl, x=test$CPI, h=12))

## $forecasts
##  [1] 1186204 1180739 1175615 1170665 1165656 1160933 1156477 1152269 1148581
## [10] 1145153 1141921 1138874
## 
## $call
## forecast.ardlDlm(model = model.ardl, x = test$CPI, h = 12)
## 
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"

MAPE testing dan akurasi Data Training

#akurasi testing
mape.ardl <- MAPE(fore.ardl$forecasts, test12$Weekly_Sales) #data testing

#akurasi data training
mape_train <- GoF(model.ardl)["MAPE"]

c("MAPE_testing" = mape.ardl, "MAPE_training" = mape_train)

## $MAPE_testing
## [1] 0.02791166
## 
## $MAPE_training.MAPE
## [1] 0.09488061

Penentuan lag optimum

#penentuan lag optimum
ardlBoundOrders(data = data.frame(train), ic="AIC", formula = Weekly_Sales ~ CPI)

## $p
##   CPI
## 1  15
## 
## $q
## [1] 15
## 
## $Stat.table
##           q = 1    q = 2    q = 3    q = 4    q = 5    q = 6    q = 7    q = 8
## p = 1  122262.7 122172.4 121891.2 121711.3 121600.8 121448.1 121423.6 121369.1
## p = 2  122238.2 122174.3 121891.8 121712.7 121601.9 121449.3 121424.7 121370.0
## p = 3  122148.5 122148.5 121892.4 121714.4 121603.2 121450.1 121425.5 121370.4
## p = 4  121867.3 121868.2 121868.2 121714.0 121603.9 121451.4 121426.9 121371.8
## p = 5  122164.4 122095.1 121689.0 121689.0 121605.8 121452.7 121428.2 121372.9
## p = 6  122039.0 122015.7 121819.7 121581.7 121581.7 121452.7 121428.1 121372.2
## p = 7  121898.2 121897.9 121735.7 121430.0 121428.7 121428.7 121430.1 121374.2
## p = 8  121904.3 121881.2 121740.5 121518.7 121491.5 121405.8 121405.8 121375.7
## p = 9  121962.0 121929.0 121720.0 121449.6 121418.3 121354.6 121351.7 121351.7
## p = 10 121938.0 121907.7 121696.5 121504.3 121466.9 121356.7 121358.5 121322.5
## p = 11 121884.4 121860.3 121661.6 121465.1 121411.4 121328.0 121330.0 121304.6
## p = 12 121901.2 121868.2 121660.0 121473.0 121412.2 121301.5 121303.3 121279.2
## p = 13 121869.5 121835.8 121626.8 121426.4 121371.6 121266.4 121268.4 121251.5
## p = 14 121849.6 121816.9 121603.6 121411.9 121357.7 121250.0 121251.9 121227.0
## p = 15 121774.4 121745.5 121550.9 121346.9 121297.5 121205.2 121207.1 121186.9
##           q = 9   q = 10   q = 11   q = 12   q = 13   q = 14   q = 15
## p = 1  121338.6 121312.5 121284.9 121258.7 121230.5 121192.3 121167.9
## p = 2  121339.5 121313.4 121285.8 121259.6 121231.3 121193.1 121168.7
## p = 3  121340.1 121314.0 121286.4 121260.2 121231.9 121193.5 121169.2
## p = 4  121341.4 121315.3 121287.8 121261.5 121233.2 121194.9 121170.6
## p = 5  121342.4 121316.2 121288.6 121262.3 121234.0 121195.7 121171.4
## p = 6  121341.9 121315.8 121288.0 121261.6 121233.2 121194.9 121170.5
## p = 7  121343.9 121317.8 121290.0 121263.5 121235.1 121196.8 121172.4
## p = 8  121345.6 121319.4 121291.5 121265.1 121236.6 121198.1 121173.8
## p = 9  121347.4 121321.2 121293.4 121267.0 121238.6 121200.1 121175.8
## p = 10 121322.5 121322.8 121295.1 121268.7 121240.3 121201.9 121177.5
## p = 11 121298.6 121298.6 121296.9 121270.4 121242.0 121203.4 121179.1
## p = 12 121270.6 121272.6 121272.6 121272.3 121243.9 121205.4 121181.0
## p = 13 121243.4 121245.4 121246.7 121246.7 121244.9 121206.0 121181.7
## p = 14 121215.6 121217.4 121218.9 121220.9 121220.9 121208.0 121183.7
## p = 15 121177.0 121178.2 121179.9 121181.8 121183.7 121183.7 121185.7
## 
## $min.Stat
## [1] 121167.9

PEMODELAN DLM dan ARDL dengan library dynlm

#sama dengan model dlm p=1
cons_lm1 <- dynlm(Weekly_Sales ~ CPI+L(CPI),data = train.ts)

#sama dengan model ardl p=0 q=1
cons_lm2 <- dynlm(Weekly_Sales ~ CPI+L(Weekly_Sales),data = train.ts)

#sama dengan ardl p=1 q=1
cons_lm3 <- dynlm(Weekly_Sales ~ CPI+L(CPI)+L(Weekly_Sales),data = train.ts)

#sama dengan dlm p=2
cons_lm4 <- dynlm(Weekly_Sales ~ CPI+L(CPI)+L(CPI,2),data = train.ts)

deviance(cons_lm1)

## [1] 1.429709e+15

deviance(cons_lm2)

## [1] 1.846681e+14

deviance(cons_lm3)

## [1] 1.828038e+14

deviance(cons_lm4)

## [1] 1.429324e+15

Uji Diagnostik Model

Uji Non Autokorelasi

dwtest(cons_lm1)

## 
##  Durbin-Watson test
## 
## data:  cons_lm1
## DW = 0.13243, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

dwtest(cons_lm2)

## 
##  Durbin-Watson test
## 
## data:  cons_lm2
## DW = 2.6055, p-value = 1
## alternative hypothesis: true autocorrelation is greater than 0

dwtest(cons_lm3)

## 
##  Durbin-Watson test
## 
## data:  cons_lm3
## DW = 2.6184, p-value = 1
## alternative hypothesis: true autocorrelation is greater than 0

dwtest(cons_lm4)

## 
##  Durbin-Watson test
## 
## data:  cons_lm4
## DW = 0.13241, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0

Uji Heterogenitas

bptest(cons_lm1)

## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm1
## BP = 52.218, df = 2, p-value = 4.58e-12

bptest(cons_lm2)

## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm2
## BP = 440.78, df = 2, p-value < 2.2e-16

bptest(cons_lm3)

## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm3
## BP = 494.28, df = 3, p-value < 2.2e-16

bptest(cons_lm4)

## 
##  studentized Breusch-Pagan test
## 
## data:  cons_lm4
## BP = 52.211, df = 3, p-value = 2.7e-11

Perbandingan Keakuratan Model

akurasi <- matrix(c(mape.koyck, mape.dlm, mape.dlm2, mape.ardl))
row.names(akurasi)<- c("Koyck","DLM 1","DLM Optimum","Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi

##                      MAPE
## Koyck          0.02526593
## DLM 1          0.05012342
## DLM Optimum    0.05025164
## Autoregressive 0.02791166

Karena model KOYCK memiliki MAPE paling kecil maka dapat dikatakan penggunaan pendekatan KOYCK adalah pemodelan terbaik untuk memodelkan data penjualan mingguan walmart.

Sumber data: https://www.kaggle.com/datasets/yasserh/walmart-dataset

Regresi Peubah Lag

Nabil Izzany

2022-09-18

Data

KOYCK

Model KOYCK

Ramalan Model KOYCK

MAPE Testing dan Akurasi

Regression with Distributed Lag

Regression with Distributed Lag (lag=2)

Model Regression with Distributed Lag (lag=2)

Ramalan Lag (2)

MAPE testing dan akurasi Data Training

Regression with Distributed Lag (lag=optimum)

Model Regression with Distributed Lag (lag=optimum)

Ramalan Lag Optimum

MAPE testing dan akurasi Data Training

Autoregressive

Model Autoregressive

Ramalan Autoregressive

MAPE testing dan akurasi Data Training

Penentuan lag optimum

PEMODELAN DLM dan ARDL dengan library dynlm

Uji Diagnostik Model

Uji Non Autokorelasi

Uji Heterogenitas

Perbandingan Keakuratan Model