2026-05-22

Import Library

library(readr)

## Warning: package 'readr' was built under R version 4.5.2

library(tseries)

## Warning: package 'tseries' was built under R version 4.5.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(forecast)

## Warning: package 'forecast' was built under R version 4.5.3

library(ggplot2)
library(TSA)

## Warning: package 'TSA' was built under R version 4.5.3

## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast

## 
## Attaching package: 'TSA'

## The following object is masked from 'package:readr':
## 
##     spec

## The following objects are masked from 'package:stats':
## 
##     acf, arima

## The following object is masked from 'package:utils':
## 
##     tar

Membaca Data

data <- read_csv("~/Belajar kuliah lukas/Basis Data/Data Matkul Basis Data/Walmart.csv")

## Rows: 6435 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (7): Store, Weekly_Sales, Holiday_Flag, Temperature, Fuel_Price, CPI, Un...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(data)

## # A tibble: 6 × 8
##   Store Date       Weekly_Sales Holiday_Flag Temperature Fuel_Price   CPI
##   <dbl> <chr>             <dbl>        <dbl>       <dbl>      <dbl> <dbl>
## 1     1 05-02-2010     1643691.            0        42.3       2.57  211.
## 2     1 12-02-2010     1641957.            1        38.5       2.55  211.
## 3     1 19-02-2010     1611968.            0        39.9       2.51  211.
## 4     1 26-02-2010     1409728.            0        46.6       2.56  211.
## 5     1 05-03-2010     1554807.            0        46.5       2.62  211.
## 6     1 12-03-2010     1439542.            0        57.8       2.67  211.
## # ℹ 1 more variable: Unemployment <dbl>

Forming Data Time Series

data.ts <- ts(data, start=c(2010,1), frequency = 12)

Plot

ts.plot(data.ts[,"Weekly_Sales"], type="l", ylab="Weekly Sales", col="blue")
title(main = "Time Series Plot of Weekly Sales", cex.sub = 0.8)
points(data.ts, pch = 20, col = "blue")

monthplot(data.ts[,"Weekly_Sales"], ylab="Weekly_Sales", col="blue")

ggplot(data , aes(x = Date, y = Weekly_Sales)) +
  geom_line(color = "blue") +
  labs(title = "Plot Data Time Series Weekly Sales",
       x = "Date",
       y = "Weekly Sales")

seasonplot(data.ts,5,main="Seasonal Plot of Weekly Sales", ylab="times",year.labels = TRUE, 
col=rainbow(20))

Splitting Data

fligner.test(Weekly_Sales ~ Date, data=data)

## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  Weekly_Sales by Date
## Fligner-Killeen:med chi-squared = 266.01, df = 142, p-value = 1.4e-09

train.ts <- data.ts[1:5149, "Weekly_Sales"]
test.ts  <- data.ts[5150:6435, "Weekly_Sales"]

Train Plot

ts.plot(train.ts, ylab="Weekly Sales", xlab="Date", col="blue")
title(main = "Time Series Plot of Weekly Sales", cex.sub = 0.8)
points(train.ts, pch = 20, col = "blue")

Test Plot

ts.plot(test.ts, ylab="Weekly Sales", xlab="Date", col="blue")
title(main = "Time Series Plot of Weekly Sales", cex.sub = 0.8)
points(test.ts, pch = 20, col = "blue")

Differencing Data

D1 <- diff(train.ts, 12)
ts.plot(D1, type="l", ylab="D1 Weekly_Sales", col="blue")

acf <- acf(D1, lag.max=48, xaxt="n", main="ACF D1", col="blue")

acf$lag <- acf$lag * 12
acf.1 <- as.data.frame(cbind(acf$acf,acf$lag))
acf.2 <- acf.1[which(acf.1$V2%%12==0),]
barplot(height = acf.2$V1, names.arg=acf.2$V2, ylab="ACF", xlab="Lag")

Differencing

d1D1 <- diff(D1)
ts.plot(d1D1, type="l", ylab="d1 D1 Xt", col="blue")

Identifikasi Model

acf3 <- acf(d1D1,lag.max=48,xaxt="n", main="ACF d1D1", col="blue")
axis(1, at=0:48/12, labels=0:48)

pacf3 <- pacf(d1D1,lag.max=48,xaxt="n", main="PACF d1D1", col="blue")
axis(1, at=0:48/12, labels=0:48)

par(mfrow=c(1,1))
eacf(as.matrix(d1D1))

## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x o x x x o x  x  x  x 
## 1 x x x x x o x o x o x  x  x  x 
## 2 x x x o x o x o o o o  x  x  x 
## 3 x x x x x x x o o o o  x  x  x 
## 4 x x x x x o x o x o o  x  x  o 
## 5 x x x x x o x x x x x  x  x  x 
## 6 x x x x x x x x x x x  x  x  x 
## 7 x x x x x x x x x o x  x  x  x

Kandidat Model

m1 <- Arima(train.ts,
            order = c(0,1,1),
            seasonal = list(order= c(0,1,1),
            period=12))

summary(m1)

## Series: train.ts 
## ARIMA(0,1,1)(0,1,1)[12] 
## 
## Coefficients:
##           ma1     sma1
##       -0.4631  -0.9989
## s.e.   0.0140   0.0065
## 
## sigma^2 = 3.335e+10:  log likelihood = -69543.6
## AIC=139093.2   AICc=139093.2   BIC=139112.8
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 250.5564 182341.7 95257.64 -1.455348 8.713497 0.9958173 0.03229225

m2 <- Arima(train.ts,
            order = c(1,1,0),
            seasonal = list(order=c(0,1,1),
            period=12))

summary(m2)

## Series: train.ts 
## ARIMA(1,1,0)(0,1,1)[12] 
## 
## Coefficients:
##           ar1     sma1
##       -0.3475  -0.9970
## s.e.   0.0131   0.0034
## 
## sigma^2 = 3.477e+10:  log likelihood = -69647.76
## AIC=139301.5   AICc=139301.5   BIC=139321.1
## 
## Training set error measures:
##                    ME   RMSE      MAE      MPE     MAPE      MASE        ACF1
## Training set 225.3947 186207 95126.57 -1.22977 8.625421 0.9944472 -0.04663137

m3 <- Arima(train.ts,
            order = c(1,1,1),
            seasonal = list(order=c(0,1,1),
            period=12))

summary(m3)

## Series: train.ts 
## ARIMA(1,1,1)(0,1,1)[12] 
## 
## Coefficients:
##          ar1      ma1     sma1
##       0.2453  -0.6730  -0.9998
## s.e.  0.0354   0.0291   0.0081
## 
## sigma^2 = 3.305e+10:  log likelihood = -69522.45
## AIC=139052.9   AICc=139052.9   BIC=139079.1
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 226.4751 181517.8 95519.39 -1.615278 8.809152 0.9985536
##                     ACF1
## Training set -0.01265612

m4 <- Arima(train.ts,
            order = c(2,1,1),
            seasonal = list(order=c(0,1,1),
            period=12))

summary(m4)

## Series: train.ts 
## ARIMA(2,1,1)(0,1,1)[12] 
## 
## Coefficients:
##          ar1     ar2      ma1     sma1
##       0.3832  0.1306  -0.8253  -0.9986
## s.e.  0.0252  0.0188   0.0201   0.0059
## 
## sigma^2 = 3.283e+10:  log likelihood = -69501.93
## AIC=139013.9   AICc=139013.9   BIC=139046.6
## 
## Training set error measures:
##                    ME     RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 186.4537 180883.9 94948.7 -1.749158 8.813655 0.9925877 0.001188474

m5 <- Arima(train.ts,
            order = c(1,1,1),
            seasonal = list(order=c(1,1,0),
            period=12))

summary(m5)

## Series: train.ts 
## ARIMA(1,1,1)(1,1,0)[12] 
## 
## Coefficients:
##          ar1      ma1     sar1
##       0.2158  -0.6549  -0.5192
## s.e.  0.0378   0.0318   0.0119
## 
## sigma^2 = 4.966e+10:  log likelihood = -70533.98
## AIC=141076   AICc=141076   BIC=141102.1
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE     MAPE     MASE
## Training set 139.4526 222493.2 135714.5 -0.9880094 12.72744 1.418751
##                     ACF1
## Training set -0.01248345

m6 <- Arima(train.ts,
            order = c(0,1,1),
            seasonal = list(order=c(1,1,1),
            period=12))

summary(m6)

## Series: train.ts 
## ARIMA(0,1,1)(1,1,1)[12] 
## 
## Coefficients:
##           ma1     sar1     sma1
##       -0.4634  -0.0245  -0.9979
## s.e.   0.0140   0.0140   0.0045
## 
## sigma^2 = 3.335e+10:  log likelihood = -69542.06
## AIC=139092.1   AICc=139092.1   BIC=139118.3
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 245.3621 182342.6 95518.05 -1.468695 8.747028 0.9985397 0.03277074

Model Terbaik

AICKandidatModel <- c(m1$aic, m2$aic, m3$aic, m4$aic, m5$aic, m6$aic)
AICcKandidatModel <- c(m1$aicc, m2$aicc, m3$aicc, m4$aicc, m5$aicc, m6$aicc)
BICKandidatModel <- c(m1$bic, m2$bic, m3$bic, m4$bic, m5$bic, m6$bic)
KandidatModelARIMA <- c("ARIMA(0,1,1)(0,1,1)12", "ARIMA(1,1,0)(0,1,1)12", "ARIMA(1,1,1)(0,1,1)12","ARIMA(2,1,1)(0,1,1)12", "ARIMA(1,1,1)(1,1,0)12", "ARIMA(0,1,1)(1,1,1)12")
compmodelARIMA <- cbind(KandidatModelARIMA, AICKandidatModel, AICcKandidatModel, BICKandidatModel)
colnames(compmodelARIMA) <- c("Kandidat Model", "Nilai AIC", "Nilai AICc", "Nilai BIC")
compmodelARIMA <- as.data.frame(compmodelARIMA)
compmodelARIMA

##          Kandidat Model        Nilai AIC       Nilai AICc        Nilai BIC
## 1 ARIMA(0,1,1)(0,1,1)12 139093.204280759 139093.208957298 139112.836370295
## 2 ARIMA(1,1,0)(0,1,1)12 139301.516058481  139301.52073502 139321.148148017
## 3 ARIMA(1,1,1)(0,1,1)12 139052.896390963 139052.904186715 139079.072510345
## 4 ARIMA(2,1,1)(0,1,1)12 139013.854175436 139013.865871343 139046.574324663
## 5 ARIMA(1,1,1)(1,1,0)12 141075.958349642 141075.966145394 141102.134469024
## 6 ARIMA(0,1,1)(1,1,1)12  139092.12829942 139092.136095172 139118.304418802

Model terbaik diperoleh dari nilai aic dan aicc terkecil, sehingga model terbaik adalah model 4 \[ARIMA(2,1,1)(0,1,1)_5\] dimana, persamaannya adalah \[ ϕ_2(B)Φ_0(B12)(1−B)(1−B12)Xt=θ_1(B)Θ_1(B12)εt \\(1−ϕ_1B−ϕ_2B2)(1−B)(1−B12)Xt=(1+θ_1B)(1+Θ_1B12)εt \\(1−ϕ_1B−ϕ_2B2)(Xt−Xt−1−Xt−12+Xt−13)=εt+θ1εt−1+Θ1εt−12+θ1Θ1εt−13 \]
Sehingga persamaan akhir diperoleh \[ \\Xt=(1+ϕ1)Xt−1+(ϕ2−ϕ1)Xt−2−ϕ2Xt−3 \\+Xt−12−(1+ϕ1)Xt−13+(ϕ1−ϕ2)Xt−14+ϕ2Xt−15 \\+εt+θ1εt−1+Θ1εt−12+θ1Θ1εt−13 \]

Menggunakan Function auto.arima

auto.arima(train.ts)

## Series: train.ts 
## ARIMA(4,1,5) 
## 
## Coefficients:
##           ar1      ar2      ar3     ar4      ma1     ma2      ma3     ma4
##       -0.1576  -0.1936  -0.0658  0.1211  -0.2282  0.0681  -0.1565  0.0731
## s.e.   0.0554   0.0497   0.0428  0.0443   0.0545  0.0409   0.0398  0.0443
##           ma5
##       -0.2729
## s.e.   0.0267
## 
## sigma^2 = 2.92e+10:  log likelihood = -69327.78
## AIC=138675.6   AICc=138675.6   BIC=138741

Ternyata jika menggunakan function auto.arima, data tidak perlu diolah menggunakan ARIMA Seasonal

Pengujian Parameter Model

printstatarima <- function (x, digits = 4,se=TRUE){
       if (length(x$coef) > 0) {
         cat("\nCoefficients:\n")
         coef <- round(x$coef, digits = digits)
         if (se && nrow(x$var.coef)) {
           ses <- rep(0, length(coef))
           ses[x$mask] <- round(sqrt(diag(x$var.coef)), digits = digits)
           coef <- matrix(coef, 1, dimnames = list(NULL, names(coef)))
           coef <- rbind(coef, s.e. = ses)
           statt <- coef[1,]/ses
           pval  <- 2*pt(abs(statt), df=length(x$residuals)-1, lower.tail = FALSE)
           coef <- rbind(coef, t=round(statt,digits=digits),sign.=round(pval,digits=digits))
           coef <- t(coef)
         }
         print.default(coef, print.gap = 2)
       }
}
printstatarima(m4)

## 
## Coefficients:
##                  s.e.          t  sign.
## ar1    0.3832  0.0252    15.2063      0
## ar2    0.1306  0.0188     6.9468      0
## ma1   -0.8253  0.0201   -41.0597      0
## sma1  -0.9986  0.0059  -169.2542      0

Diagnostic Model

tsdisplay(residuals(m4), lag.max=45, main='ARIMA(2,1,1)(0,1,1)12 Model Residuals', col="blue")

Box.test(residuals(m4),
         lag = 5,
         type = "Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  residuals(m4)
## X-squared = 485.38, df = 5, p-value < 2.2e-16

Box.test(residuals(m4),
         lag = 10,
         type = "Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  residuals(m4)
## X-squared = 534.47, df = 10, p-value < 2.2e-16

Box.test(residuals(m4),
         lag = 15,
         type = "Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  residuals(m4)
## X-squared = 562.74, df = 15, p-value < 2.2e-16

jarque.bera.test(residuals(m4))

## 
##  Jarque Bera Test
## 
## data:  residuals(m4)
## X-squared = 81052, df = 2, p-value < 2.2e-16

Forecasting

ramalan_arima2=forecast(m4, 6)
ramalan_arima2

##      Point Forecast     Lo 80    Hi 80       Lo 95    Hi 95
## 5150       418436.4 186103.00 650769.7   63113.143 773759.6
## 5151       399780.5 133744.32 665816.6   -7086.701 806647.6
## 5152       400550.6 108462.73 692638.4  -46159.228 847260.3
## 5153       381485.5  71532.63 691438.4  -92546.530 855517.6
## 5154       374775.2  50276.52 699273.9 -121502.691 871053.1
## 5155       366639.9  29641.13 703638.7 -148755.264 882035.1

prediksi <- forecast(m4, h = length(test.ts))

accuracy(prediksi, test.ts)

##                       ME     RMSE      MAE       MPE      MAPE      MASE
## Training set    186.4537 180883.9  94948.7 -1.749158  8.813655 0.9925877
## Test set     546723.4052 670606.2 546865.2 64.385125 64.429175 5.7168937
##                     ACF1
## Training set 0.001188474
## Test set              NA

plot(ramalan_arima2, col="blue")

forecast_arima2 <- cbind(ramalan_arima2$mean,ramalan_arima2$lower,ramalan_arima2$upper)
forecast_arima2

## Time Series:
## Start = 5150 
## End = 5155 
## Frequency = 1 
##      ramalan_arima2$mean ramalan_arima2$lower.80% ramalan_arima2$lower.95%
## 5150            418436.4                186103.00                63113.143
## 5151            399780.5                133744.32                -7086.701
## 5152            400550.6                108462.73               -46159.228
## 5153            381485.5                 71532.63               -92546.530
## 5154            374775.2                 50276.52              -121502.691
## 5155            366639.9                 29641.13              -148755.264
##      ramalan_arima2$upper.80% ramalan_arima2$upper.95%
## 5150                 650769.7                 773759.6
## 5151                 665816.6                 806647.6
## 5152                 692638.4                 847260.3
## 5153                 691438.4                 855517.6
## 5154                 699273.9                 871053.1
## 5155                 703638.7                 882035.1