library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.5.3
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
##
## accuracy
data <- read.csv("C:/Users/ASUS/Downloads/Topik Dalam Statistika/Walmart.csv")
str(data)
## 'data.frame': 6435 obs. of 8 variables:
## $ Store : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Date : chr "05-02-2010" "12-02-2010" "19-02-2010" "26-02-2010" ...
## $ Weekly_Sales: num 1643691 1641957 1611968 1409728 1554807 ...
## $ Holiday_Flag: int 0 1 0 0 0 0 0 0 0 0 ...
## $ Temperature : num 42.3 38.5 39.9 46.6 46.5 ...
## $ Fuel_Price : num 2.57 2.55 2.51 2.56 2.62 ...
## $ CPI : num 211 211 211 211 211 ...
## $ Unemployment: num 8.11 8.11 8.11 8.11 8.11 ...
data$Date <- as.Date(data$Date, format="%d-%m-%Y")
data <- data[order(data$Date), ]
head(data)
## Store Date Weekly_Sales Holiday_Flag Temperature Fuel_Price CPI
## 1 1 2010-02-05 1643690.9 0 42.31 2.572 211.0964
## 144 2 2010-02-05 2136989.5 0 40.19 2.572 210.7526
## 287 3 2010-02-05 461622.2 0 45.71 2.572 214.4249
## 430 4 2010-02-05 2135143.9 0 43.76 2.598 126.4421
## 573 5 2010-02-05 317173.1 0 39.70 2.572 211.6540
## 716 6 2010-02-05 1652635.1 0 40.43 2.572 212.6224
## Unemployment
## 1 8.106
## 144 8.324
## 287 7.368
## 430 8.623
## 573 6.566
## 716 7.259
#ambil data penjualan
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sales_weekly <- data %>%
group_by(Date) %>%
summarise(Weekly_Sales = sum(Weekly_Sales))
head(sales_weekly)
## # A tibble: 6 × 2
## Date Weekly_Sales
## <date> <dbl>
## 1 2010-02-05 49750740.
## 2 2010-02-12 48336678.
## 3 2010-02-19 48276994.
## 4 2010-02-26 43968571.
## 5 2010-03-05 46871470.
## 6 2010-03-12 45925397.
#bentuk time series dan plot time series
ts_sales <- ts(sales_weekly$Weekly_Sales,
frequency = 52,
start = c(2010,1))
ts_sales
## Time Series:
## Start = c(2010, 1)
## End = c(2012, 39)
## Frequency = 52
## [1] 49750741 48336678 48276994 43968571 46871470 45925397 44988975 44133961
## [9] 50423831 47365290 45183667 44734453 43705127 48503244 45330080 45120108
## [17] 47757503 50188543 47826547 47622046 46609036 48917485 47899529 46243900
## [25] 44888849 44630363 48204999 46464418 47060953 45909740 47194258 45634398
## [33] 43080727 41358514 42239876 45102974 43149473 43066670 43602831 45781982
## [41] 46124801 45125584 65821003 49909028 55666770 61820800 80931416 40432519
## [49] 42775788 40673678 40654648 39599853 46153111 47336193 48716164 44125860
## [57] 46980604 44627319 44872326 42876199 43458991 45887467 44973328 48676692
## [65] 43530033 46861958 45446145 44046598 45293457 48771994 47669735 47447562
## [73] 45884095 47578520 47859264 45515930 45274411 43683274 48015467 46249569
## [81] 46917348 47416948 45376623 46763228 43793960 42718097 42195831 47211688
## [89] 44374820 45818953 45855821 48655544 48474225 46438981 66593605 49390556
## [97] 55561148 60085696 76998241 46042461 44955422 42023078 42080997 39834975
## [105] 46085608 50009408 50197057 45771507 46861035 47480454 46901505 44993794
## [113] 45272862 53502316 46629261 45072530 43716799 47124198 46925879 46823939
## [121] 47892463 48281650 49651172 48412111 47668285 46597112 51253022 46099732
## [129] 46059543 44097155 47485900 47403451 47354452 47447324 47159639 48330059
## [137] 44226039 44354547 43734899 47566639 46128514 45122411 45544116
plot(ts_sales,
main="Plot Weekly Sales Walmart",
ylab="Sales",
xlab="Time",
col="blue")

#split data train dan test
# Training set (80%)
train.ts <- subset(ts_sales,
start = 1,
end = 114)
# Testing set (20%)
test.ts <- subset(ts_sales,
start = 115,
end = 143)
ts.plot(train.ts,
col = "blue",
ylab = "Weekly Sales",
xlab = "Time")
title(main = "Train Time Series Plot of Weekly Sales",
cex.sub = 0.8)
points(train.ts,
pch = 20,
col = "blue")

log_train <- log(train.ts)
#uji stasioner
train_log <- log(train.ts + 1)
adf.test(train_log)
## Warning in adf.test(train_log): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: train_log
## Dickey-Fuller = -5.7445, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Differencing non musiman
diff1 <- diff(train.ts)
# Plot differencing
ts.plot(diff1,
main = "Differencing Non Seasonal",
ylab = "Differenced Sales",
xlab = "Time",
col = "red")
# Titik data
points(diff1,
pch = 20,
col = "red")

# Uji ADF
adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff1
## Dickey-Fuller = -5.9312, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Differencing musiman
diff_seasonal <- diff(diff1,
lag = 52)
# Plot seasonal differencing
ts.plot(diff_seasonal,
main = "Seasonal Differencing",
ylab = "Seasonal Difference",
xlab = "Time",
col = "green")
# Titik data
points(diff_seasonal,
pch = 20,
col = "green")

# Uji ADF
adf.test(na.omit(diff_seasonal))
## Warning in adf.test(na.omit(diff_seasonal)): p-value smaller than printed
## p-value
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(diff_seasonal)
## Dickey-Fuller = -5.5458, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
#plot ACF dan PACF
par(mfrow=c(1,2))
Acf(diff_seasonal,
main="ACF")
Pacf(diff_seasonal,
main="PACF")

#bentuk model sarima
model1 <- Arima(train.ts,
order = c(1,1,1),
seasonal = list(order = c(0,1,1),
period = 52))
summary(model1)
## Series: train.ts
## ARIMA(1,1,1)(0,1,1)[52]
##
## Coefficients:
## ar1 ma1 sma1
## 0.1869 -0.8662 0.1532
## s.e. 0.1645 0.0718 0.2818
##
## sigma^2 = 3.931e+12: log likelihood = -970.64
## AIC=1949.27 AICc=1949.99 BIC=1957.72
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 234645.4 1414217 720978.7 0.5177475 1.500458 0.4743825 -0.01323472
model2 <- Arima(train.ts,
order = c(0,1,1),
seasonal = list(order = c(0,1,1),
period = 52))
summary(model2)
## Series: train.ts
## ARIMA(0,1,1)(0,1,1)[52]
##
## Coefficients:
## ma1 sma1
## -0.8302 0.0549
## s.e. 0.0891 0.2621
##
## sigma^2 = 4.009e+12: log likelihood = -971.28
## AIC=1948.56 AICc=1948.98 BIC=1954.89
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 232083.5 1440455 743524.9 0.5210133 1.541746 0.4892173 0.1031782
model3 <- Arima(train.ts,
order = c(1,1,0),
seasonal = list(order = c(0,1,1),
period = 52))
summary(model3)
## Series: train.ts
## ARIMA(1,1,0)(0,1,1)[52]
##
## Coefficients:
## ar1 sma1
## -0.3595 0.4328
## s.e. 0.1236 0.4026
##
## sigma^2 = 4.075e+12: log likelihood = -975.78
## AIC=1957.56 AICc=1957.98 BIC=1963.89
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 107021.8 1452225 732927.7 0.2213365 1.553227 0.4822446 -0.07248425
model4 <- Arima(train.ts,
order = c(2,1,1),
seasonal = list(order = c(0,1,1),
period = 52))
summary(model4)
## Series: train.ts
## ARIMA(2,1,1)(0,1,1)[52]
##
## Coefficients:
## ar1 ar2 ma1 sma1
## 0.1847 -0.0284 -0.8612 0.1501
## s.e. 0.1652 0.1489 0.0789 0.2812
##
## sigma^2 = 3.999e+12: log likelihood = -970.62
## AIC=1951.24 AICc=1952.33 BIC=1961.79
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 234666.3 1414111 720943.3 0.51859 1.499942 0.4743592 -0.01947318
#bandingkan nilai AIC
AIC(model1)
## [1] 1949.273
AIC(model2)
## [1] 1948.562
AIC(model3)
## [1] 1957.558
AIC(model4)
## [1] 1951.237
#auto model
log_train <- log(train.ts)
auto_model <- auto.arima(log_train,
seasonal = TRUE,
stepwise = FALSE,
approximation = FALSE)
summary(auto_model)
## Series: log_train
## ARIMA(0,1,1)(0,1,0)[52]
##
## Coefficients:
## ma1
## -0.8319
## s.e. 0.0800
##
## sigma^2 = 0.00195: log likelihood = 108.37
## AIC=-212.74 AICc=-212.53 BIC=-208.52
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.004080985 0.03203452 0.01695113 0.02316051 0.09585153 0.5264379
## ACF1
## Training set 0.07301659
#diagnostik residual dan forecasting
checkresiduals(auto_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(0,1,0)[52]
## Q* = 24.87, df = 22, p-value = 0.3034
##
## Model df: 1. Total lags used: 23
forecast_sarima <- forecast(auto_model,
h = length(test.ts))
forecast_sarima$mean <- exp(forecast_sarima$mean)
forecast_sarima$lower <- exp(forecast_sarima$lower)
forecast_sarima$upper <- exp(forecast_sarima$upper)
forecast_sarima <- forecast(auto_model,
h = length(test.ts))
# Menampilkan hasil forecast
forecast_sarima
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2012.192 17.67734 17.62075 17.73392 17.59079 17.76388
## 2012.212 17.75647 17.69908 17.81385 17.66871 17.84423
## 2012.231 17.64472 17.58655 17.70288 17.55576 17.73367
## 2012.250 17.71847 17.65953 17.77741 17.62833 17.80861
## 2012.269 17.68779 17.62809 17.74750 17.59649 17.77910
## 2012.288 17.65651 17.59606 17.71697 17.56405 17.74897
## 2012.308 17.68443 17.62323 17.74563 17.59083 17.77802
## 2012.327 17.75842 17.69649 17.82036 17.66370 17.85314
## 2012.346 17.73556 17.67290 17.79822 17.63973 17.83139
## 2012.365 17.73089 17.66751 17.79427 17.63396 17.82782
## 2012.385 17.69738 17.63329 17.76147 17.59937 17.79540
## 2012.404 17.73365 17.66886 17.79844 17.63456 17.83274
## 2012.423 17.73953 17.67404 17.80502 17.63938 17.83968
## 2012.442 17.68933 17.62315 17.75550 17.58812 17.79053
## 2012.462 17.68401 17.61715 17.75086 17.58176 17.78625
## 2012.481 17.64823 17.58070 17.71576 17.54496 17.75151
## 2012.500 17.74279 17.67459 17.81098 17.63849 17.84708
## 2012.519 17.70532 17.63646 17.77417 17.60001 17.81062
## 2012.538 17.71965 17.65014 17.78916 17.61335 17.82596
## 2012.558 17.73025 17.66009 17.80040 17.62295 17.83754
## 2012.577 17.68626 17.61546 17.75706 17.57798 17.79454
## 2012.596 17.71636 17.64493 17.78780 17.60711 17.82561
## 2012.615 17.65076 17.57869 17.72283 17.54055 17.76098
## 2012.635 17.62589 17.55320 17.69858 17.51472 17.73706
## 2012.654 17.61359 17.54028 17.68690 17.50147 17.72571
## 2012.673 17.72591 17.65198 17.79983 17.61285 17.83897
## 2012.692 17.66394 17.58940 17.73847 17.54994 17.77793
## 2012.712 17.69596 17.62082 17.77110 17.58105 17.81088
## 2012.731 17.69677 17.62103 17.77251 17.58093 17.81260
plot(forecast_sarima,
main = "Forecast SARIMA Walmart Sales",
xlab = "Time",
ylab = "Sales",
col = "blue")
# Menambahkan data aktual
lines(test.ts,
col = "red",
lwd = 2)
# Legend
legend("topleft",
legend = c("Forecast",
"Actual"),
col = c("blue",
"red"),
lty = 1,
lwd = 2)

prediksi <- exp(forecast_sarima$mean)
actual <- as.numeric(test.ts)
pred <- as.numeric(prediksi)
plot(actual,
type = "l",
lwd = 2,
col = "black",
ylim = range(c(actual, pred)),
xlab = "Time",
ylab = "Weekly Sales",
main = "Actual vs Forecast")
# garis prediksi
lines(pred,
col = "red",
lwd = 2)
# titik aktual
points(actual,
pch = 20,
col = "black")
# titik prediksi
points(pred,
pch = 20,
col = "red")
# legend
legend("topleft",
legend = c("Actual",
"Forecast"),
col = c("black",
"red"),
lty = 1,
lwd = 2,
pch = 20)

#evaluasi akurasi
mae(actual, pred)
## [1] 1903285
rmse(actual, pred)
## [1] 2306284
mape(actual, pred)
## [1] 0.04107816