library(dLagM)
## Warning: package 'dLagM' was built under R version 4.1.3
## Loading required package: nardl
## Warning: package 'nardl' was built under R version 4.1.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: dynlm
## Warning: package 'dynlm' was built under R version 4.1.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.1.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(dynlm)
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.1.3
##
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:dLagM':
##
## MAPE
## The following object is masked from 'package:base':
##
## Recall
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.1.3
library(car)
## Warning: package 'car' was built under R version 4.1.2
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.1.2
library(knitr)
## Warning: package 'knitr' was built under R version 4.1.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
setwd("C:/Users/Nabil Izzany/Documents/Kuylah/Semester 5/MPDW/Individu")
data <-read_excel("Walmart.xlsx")
str(data)
## tibble [6,435 x 7] (S3: tbl_df/tbl/data.frame)
## $ Date : num [1:6435] 1 2 3 4 5 6 7 8 9 10 ...
## $ Weekly_Sales: num [1:6435] 1643691 1641957 1611968 1409728 1554807 ...
## $ Holiday_Flag: num [1:6435] 0 1 0 0 0 0 0 0 0 0 ...
## $ Temperature : num [1:6435] 42.3 38.5 39.9 46.6 46.5 ...
## $ Fuel_Price : num [1:6435] 2.57 2.55 2.51 2.56 2.62 ...
## $ CPI : num [1:6435] 211 211 211 211 211 ...
## $ Unemployment: num [1:6435] 8.11 8.11 8.11 8.11 8.11 ...
knitr::kable(head(data), align="l")
| Date | Weekly_Sales | Holiday_Flag | Temperature | Fuel_Price | CPI | Unemployment |
|---|---|---|---|---|---|---|
| 1 | 1643691 | 0 | 42.31 | 2.572 | 211.0964 | 8.106 |
| 2 | 1641957 | 1 | 38.51 | 2.548 | 211.2422 | 8.106 |
| 3 | 1611968 | 0 | 39.93 | 2.514 | 211.2891 | 8.106 |
| 4 | 1409728 | 0 | 46.63 | 2.561 | 211.3196 | 8.106 |
| 5 | 1554807 | 0 | 46.50 | 2.625 | 211.3501 | 8.106 |
| 6 | 1439542 | 0 | 57.79 | 2.667 | 211.3806 | 8.106 |
train<-data[1:4505,]
test<-data[4506:6435,]
train.ts<-ts(train)
test.ts<-ts(test)
data.ts<-ts(data)
model.koyck = dLagM::koyckDlm(x = train$CPI, y = train$Weekly_Sales)
summary(model.koyck)
##
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
##
## Residuals:
## Min 1Q Median 3Q Max
## -2017120 -56246 -18636 50272 1668758
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.134e+05 1.648e+04 6.879 6.84e-12 ***
## Y.1 9.333e-01 5.355e-03 174.304 < 2e-16 ***
## X.t -2.009e+02 7.824e+01 -2.568 0.0103 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 202600 on 4501 degrees of freedom
## Multiple R-Squared: 0.8772, Adjusted R-squared: 0.8772
## Wald test: 1.608e+04 on 2 and 4501 DF, p-value: < 2.2e-16
##
## Diagnostic tests:
## NULL
##
## alpha beta phi
## Geometric coefficients: 1700951 -200.9237 0.9333405
AIC(model.koyck)
## [1] 122854
BIC(model.koyck)
## [1] 122879.7
fore.koyck <- forecast(model = model.koyck, x=test$CPI, h=12)
test12 <- test[1:12,]
#mape data testing
mape.koyck <- MAPE(fore.koyck$forecasts, test12$Weekly_Sales)
#akurasi data training
mape_train <- dLagM::GoF(model.koyck)["MAPE"]
c("MAPE_testing" = mape.koyck, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.02526593
##
## $MAPE_training.MAPE
## [1] 0.09564814
model.dlm = dLagM::dlm(x = train$CPI, y = train$Weekly_Sales , q = 2)
summary(model.dlm)
##
## Call:
## lm(formula = model.formula, data = design)
##
## Residuals:
## Min 1Q Median 3Q Max
## -901370 -469864 -114482 443121 2687962
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1740041.3 37723.7 46.126 <2e-16 ***
## x.t -3056.0 1877.0 -1.628 0.104
## x.1 -863.2 2650.8 -0.326 0.745
## x.2 687.3 1876.9 0.366 0.714
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 563600 on 4499 degrees of freedom
## Multiple R-squared: 0.04968, Adjusted R-squared: 0.04905
## F-statistic: 78.4 on 3 and 4499 DF, p-value: < 2.2e-16
##
## AIC and BIC values for the model:
## AIC BIC
## 1 132044.1 132076.1
AIC(model.dlm)
## [1] 132044.1
BIC(model.dlm)
## [1] 132076.1
(fore.dlm <- forecast(model = model.dlm, x=test$CPI, h=12))
## $forecasts
## [1] 1116281 1115948 1115642 1115244 1114592 1113896 1113251 1112606 1112145
## [10] 1111765 1111353 1110935
##
## $call
## forecast.dlm(model = model.dlm, x = test$CPI, h = 12)
##
## attr(,"class")
## [1] "forecast.dlm" "dLagM"
mape.dlm <- MAPE(fore.dlm$forecasts, test12$Weekly_Sales)
#akurasi data training
mape_train <- GoF(model.dlm)["MAPE"]
c("MAPE_testing" = mape.dlm, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.05012342
##
## $MAPE_training.MAPE
## [1] 0.5722005
finiteDLMauto(formula = Weekly_Sales ~ CPI,
data = data.frame(train), q.max = 5, q.min = 1,
model.type = "dlm", error.type = "AIC", trace = TRUE)
## q - k MASE AIC BIC GMRAE MBRAE R.Adj.Sq Ljung-Box
## 5 5 4.73783 131962.9 132014.2 8.57961 0.91074 0.04863 0
## 4 4 4.73792 131990.0 132034.9 8.58206 0.90780 0.04876 0
## 3 3 4.73737 132016.8 132055.2 8.58116 0.90631 0.04892 0
## 2 2 4.73662 132044.1 132076.1 8.57870 0.89643 0.04905 0
## 1 1 4.73760 132071.6 132097.2 8.57712 0.88510 0.04914 0
model.dlm2 = dLagM::dlm(x = train$CPI,y = train$Weekly_Sales , q = 3) #terdapat lag yang tidak signifikan sehingga dapat dikurangi jumlah lagnya
summary(model.dlm2)
##
## Call:
## lm(formula = model.formula, data = design)
##
## Residuals:
## Min 1Q Median 3Q Max
## -901353 -469915 -114428 443014 2688174
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.740e+06 3.779e+04 46.059 <2e-16 ***
## x.t -3.058e+03 1.877e+03 -1.629 0.103
## x.1 -8.638e+02 2.651e+03 -0.326 0.745
## x.2 6.812e+02 2.651e+03 0.257 0.797
## x.3 5.582e+00 1.877e+03 0.003 0.998
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 563700 on 4497 degrees of freedom
## Multiple R-squared: 0.04977, Adjusted R-squared: 0.04892
## F-statistic: 58.88 on 4 and 4497 DF, p-value: < 2.2e-16
##
## AIC and BIC values for the model:
## AIC BIC
## 1 132016.8 132055.2
AIC(model.dlm2)
## [1] 132016.8
BIC(model.dlm2)
## [1] 132055.2
(fore.dlm2 <- forecast(model = model.dlm2, x=test$CPI, h=12))
## $forecasts
## [1] 1116102 1115768 1115462 1115063 1114411 1113714 1113068 1112423 1111961
## [10] 1111581 1111169 1110750
##
## $call
## forecast.dlm(model = model.dlm2, x = test$CPI, h = 12)
##
## attr(,"class")
## [1] "forecast.dlm" "dLagM"
#mape data testing
mape.dlm2 <- MAPE(fore.dlm2$forecasts, test12$Weekly_Sales)
#akurasi data training
mape_train <- GoF(model.dlm2)["MAPE"]
c("MAPE_testing" = mape.dlm2, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.05025164
##
## $MAPE_training.MAPE
## [1] 0.5721527
#MODEL AUTOREGRESSIVE
model.ardl = ardlDlm(x = train$CPI, y = train$Weekly_Sales, p = 1 , q = 1) #p:lag x, q:lag y
summary(model.ardl)
##
## Time series regression with "ts" data:
## Start = 2, End = 4505
##
## Call:
## dynlm(formula = as.formula(model.text), data = data, start = 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2018141 -55433 -18182 50184 1393965
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.175e+05 1.635e+04 7.190 7.53e-13 ***
## X.t -4.779e+03 6.713e+02 -7.119 1.26e-12 ***
## X.1 4.550e+03 6.717e+02 6.774 1.41e-11 ***
## Y.1 9.339e-01 5.330e-03 175.198 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 201600 on 4500 degrees of freedom
## Multiple R-squared: 0.8785, Adjusted R-squared: 0.8784
## F-statistic: 1.084e+04 on 3 and 4500 DF, p-value: < 2.2e-16
AIC(model.ardl)
## [1] 122809.7
BIC(model.ardl)
## [1] 122841.8
(fore.ardl <- forecast(model = model.ardl, x=test$CPI, h=12))
## $forecasts
## [1] 1186204 1180739 1175615 1170665 1165656 1160933 1156477 1152269 1148581
## [10] 1145153 1141921 1138874
##
## $call
## forecast.ardlDlm(model = model.ardl, x = test$CPI, h = 12)
##
## attr(,"class")
## [1] "forecast.ardlDlm" "dLagM"
#akurasi testing
mape.ardl <- MAPE(fore.ardl$forecasts, test12$Weekly_Sales) #data testing
#akurasi data training
mape_train <- GoF(model.ardl)["MAPE"]
c("MAPE_testing" = mape.ardl, "MAPE_training" = mape_train)
## $MAPE_testing
## [1] 0.02791166
##
## $MAPE_training.MAPE
## [1] 0.09488061
#penentuan lag optimum
ardlBoundOrders(data = data.frame(train), ic="AIC", formula = Weekly_Sales ~ CPI)
## $p
## CPI
## 1 15
##
## $q
## [1] 15
##
## $Stat.table
## q = 1 q = 2 q = 3 q = 4 q = 5 q = 6 q = 7 q = 8
## p = 1 122262.7 122172.4 121891.2 121711.3 121600.8 121448.1 121423.6 121369.1
## p = 2 122238.2 122174.3 121891.8 121712.7 121601.9 121449.3 121424.7 121370.0
## p = 3 122148.5 122148.5 121892.4 121714.4 121603.2 121450.1 121425.5 121370.4
## p = 4 121867.3 121868.2 121868.2 121714.0 121603.9 121451.4 121426.9 121371.8
## p = 5 122164.4 122095.1 121689.0 121689.0 121605.8 121452.7 121428.2 121372.9
## p = 6 122039.0 122015.7 121819.7 121581.7 121581.7 121452.7 121428.1 121372.2
## p = 7 121898.2 121897.9 121735.7 121430.0 121428.7 121428.7 121430.1 121374.2
## p = 8 121904.3 121881.2 121740.5 121518.7 121491.5 121405.8 121405.8 121375.7
## p = 9 121962.0 121929.0 121720.0 121449.6 121418.3 121354.6 121351.7 121351.7
## p = 10 121938.0 121907.7 121696.5 121504.3 121466.9 121356.7 121358.5 121322.5
## p = 11 121884.4 121860.3 121661.6 121465.1 121411.4 121328.0 121330.0 121304.6
## p = 12 121901.2 121868.2 121660.0 121473.0 121412.2 121301.5 121303.3 121279.2
## p = 13 121869.5 121835.8 121626.8 121426.4 121371.6 121266.4 121268.4 121251.5
## p = 14 121849.6 121816.9 121603.6 121411.9 121357.7 121250.0 121251.9 121227.0
## p = 15 121774.4 121745.5 121550.9 121346.9 121297.5 121205.2 121207.1 121186.9
## q = 9 q = 10 q = 11 q = 12 q = 13 q = 14 q = 15
## p = 1 121338.6 121312.5 121284.9 121258.7 121230.5 121192.3 121167.9
## p = 2 121339.5 121313.4 121285.8 121259.6 121231.3 121193.1 121168.7
## p = 3 121340.1 121314.0 121286.4 121260.2 121231.9 121193.5 121169.2
## p = 4 121341.4 121315.3 121287.8 121261.5 121233.2 121194.9 121170.6
## p = 5 121342.4 121316.2 121288.6 121262.3 121234.0 121195.7 121171.4
## p = 6 121341.9 121315.8 121288.0 121261.6 121233.2 121194.9 121170.5
## p = 7 121343.9 121317.8 121290.0 121263.5 121235.1 121196.8 121172.4
## p = 8 121345.6 121319.4 121291.5 121265.1 121236.6 121198.1 121173.8
## p = 9 121347.4 121321.2 121293.4 121267.0 121238.6 121200.1 121175.8
## p = 10 121322.5 121322.8 121295.1 121268.7 121240.3 121201.9 121177.5
## p = 11 121298.6 121298.6 121296.9 121270.4 121242.0 121203.4 121179.1
## p = 12 121270.6 121272.6 121272.6 121272.3 121243.9 121205.4 121181.0
## p = 13 121243.4 121245.4 121246.7 121246.7 121244.9 121206.0 121181.7
## p = 14 121215.6 121217.4 121218.9 121220.9 121220.9 121208.0 121183.7
## p = 15 121177.0 121178.2 121179.9 121181.8 121183.7 121183.7 121185.7
##
## $min.Stat
## [1] 121167.9
#sama dengan model dlm p=1
cons_lm1 <- dynlm(Weekly_Sales ~ CPI+L(CPI),data = train.ts)
#sama dengan model ardl p=0 q=1
cons_lm2 <- dynlm(Weekly_Sales ~ CPI+L(Weekly_Sales),data = train.ts)
#sama dengan ardl p=1 q=1
cons_lm3 <- dynlm(Weekly_Sales ~ CPI+L(CPI)+L(Weekly_Sales),data = train.ts)
#sama dengan dlm p=2
cons_lm4 <- dynlm(Weekly_Sales ~ CPI+L(CPI)+L(CPI,2),data = train.ts)
deviance(cons_lm1)
## [1] 1.429709e+15
deviance(cons_lm2)
## [1] 1.846681e+14
deviance(cons_lm3)
## [1] 1.828038e+14
deviance(cons_lm4)
## [1] 1.429324e+15
dwtest(cons_lm1)
##
## Durbin-Watson test
##
## data: cons_lm1
## DW = 0.13243, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm2)
##
## Durbin-Watson test
##
## data: cons_lm2
## DW = 2.6055, p-value = 1
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm3)
##
## Durbin-Watson test
##
## data: cons_lm3
## DW = 2.6184, p-value = 1
## alternative hypothesis: true autocorrelation is greater than 0
dwtest(cons_lm4)
##
## Durbin-Watson test
##
## data: cons_lm4
## DW = 0.13241, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
bptest(cons_lm1)
##
## studentized Breusch-Pagan test
##
## data: cons_lm1
## BP = 52.218, df = 2, p-value = 4.58e-12
bptest(cons_lm2)
##
## studentized Breusch-Pagan test
##
## data: cons_lm2
## BP = 440.78, df = 2, p-value < 2.2e-16
bptest(cons_lm3)
##
## studentized Breusch-Pagan test
##
## data: cons_lm3
## BP = 494.28, df = 3, p-value < 2.2e-16
bptest(cons_lm4)
##
## studentized Breusch-Pagan test
##
## data: cons_lm4
## BP = 52.211, df = 3, p-value = 2.7e-11
akurasi <- matrix(c(mape.koyck, mape.dlm, mape.dlm2, mape.ardl))
row.names(akurasi)<- c("Koyck","DLM 1","DLM Optimum","Autoregressive")
colnames(akurasi) <- c("MAPE")
akurasi
## MAPE
## Koyck 0.02526593
## DLM 1 0.05012342
## DLM Optimum 0.05025164
## Autoregressive 0.02791166
Karena model KOYCK memiliki MAPE paling kecil maka dapat dikatakan penggunaan pendekatan KOYCK adalah pemodelan terbaik untuk memodelkan data penjualan mingguan walmart.
Sumber data: https://www.kaggle.com/datasets/yasserh/walmart-dataset