library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.5.3
## 
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
## 
##     accuracy
data <- read.csv("C:/Users/ASUS/Downloads/Topik Dalam Statistika/Walmart.csv")

str(data)
## 'data.frame':    6435 obs. of  8 variables:
##  $ Store       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date        : chr  "05-02-2010" "12-02-2010" "19-02-2010" "26-02-2010" ...
##  $ Weekly_Sales: num  1643691 1641957 1611968 1409728 1554807 ...
##  $ Holiday_Flag: int  0 1 0 0 0 0 0 0 0 0 ...
##  $ Temperature : num  42.3 38.5 39.9 46.6 46.5 ...
##  $ Fuel_Price  : num  2.57 2.55 2.51 2.56 2.62 ...
##  $ CPI         : num  211 211 211 211 211 ...
##  $ Unemployment: num  8.11 8.11 8.11 8.11 8.11 ...
data$Date <- as.Date(data$Date, format="%d-%m-%Y")

data <- data[order(data$Date), ]

head(data)
##     Store       Date Weekly_Sales Holiday_Flag Temperature Fuel_Price      CPI
## 1       1 2010-02-05    1643690.9            0       42.31      2.572 211.0964
## 144     2 2010-02-05    2136989.5            0       40.19      2.572 210.7526
## 287     3 2010-02-05     461622.2            0       45.71      2.572 214.4249
## 430     4 2010-02-05    2135143.9            0       43.76      2.598 126.4421
## 573     5 2010-02-05     317173.1            0       39.70      2.572 211.6540
## 716     6 2010-02-05    1652635.1            0       40.43      2.572 212.6224
##     Unemployment
## 1          8.106
## 144        8.324
## 287        7.368
## 430        8.623
## 573        6.566
## 716        7.259
#ambil data penjualan
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
sales_weekly <- data %>%
  group_by(Date) %>%
  summarise(Weekly_Sales = sum(Weekly_Sales))

head(sales_weekly)
## # A tibble: 6 × 2
##   Date       Weekly_Sales
##   <date>            <dbl>
## 1 2010-02-05    49750740.
## 2 2010-02-12    48336678.
## 3 2010-02-19    48276994.
## 4 2010-02-26    43968571.
## 5 2010-03-05    46871470.
## 6 2010-03-12    45925397.
#bentuk time series dan plot time series
ts_sales <- ts(sales_weekly$Weekly_Sales,
               frequency = 52,
               start = c(2010,1))

ts_sales
## Time Series:
## Start = c(2010, 1) 
## End = c(2012, 39) 
## Frequency = 52 
##   [1] 49750741 48336678 48276994 43968571 46871470 45925397 44988975 44133961
##   [9] 50423831 47365290 45183667 44734453 43705127 48503244 45330080 45120108
##  [17] 47757503 50188543 47826547 47622046 46609036 48917485 47899529 46243900
##  [25] 44888849 44630363 48204999 46464418 47060953 45909740 47194258 45634398
##  [33] 43080727 41358514 42239876 45102974 43149473 43066670 43602831 45781982
##  [41] 46124801 45125584 65821003 49909028 55666770 61820800 80931416 40432519
##  [49] 42775788 40673678 40654648 39599853 46153111 47336193 48716164 44125860
##  [57] 46980604 44627319 44872326 42876199 43458991 45887467 44973328 48676692
##  [65] 43530033 46861958 45446145 44046598 45293457 48771994 47669735 47447562
##  [73] 45884095 47578520 47859264 45515930 45274411 43683274 48015467 46249569
##  [81] 46917348 47416948 45376623 46763228 43793960 42718097 42195831 47211688
##  [89] 44374820 45818953 45855821 48655544 48474225 46438981 66593605 49390556
##  [97] 55561148 60085696 76998241 46042461 44955422 42023078 42080997 39834975
## [105] 46085608 50009408 50197057 45771507 46861035 47480454 46901505 44993794
## [113] 45272862 53502316 46629261 45072530 43716799 47124198 46925879 46823939
## [121] 47892463 48281650 49651172 48412111 47668285 46597112 51253022 46099732
## [129] 46059543 44097155 47485900 47403451 47354452 47447324 47159639 48330059
## [137] 44226039 44354547 43734899 47566639 46128514 45122411 45544116
plot(ts_sales,
     main="Plot Weekly Sales Walmart",
     ylab="Sales",
     xlab="Time",
     col="blue")

#split data train dan test
# Training set (80%)
train.ts <- subset(ts_sales,
                   start = 1,
                   end = 114)

# Testing set (20%)
test.ts <- subset(ts_sales,
                  start = 115,
                  end = 143)

ts.plot(train.ts,
        col = "blue",
        ylab = "Weekly Sales",
        xlab = "Time")

title(main = "Train Time Series Plot of Weekly Sales",
      cex.sub = 0.8)

points(train.ts,
       pch = 20,
       col = "blue")

log_train <- log(train.ts)
#uji stasioner
train_log <- log(train.ts + 1)

adf.test(train_log)
## Warning in adf.test(train_log): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  train_log
## Dickey-Fuller = -5.7445, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Differencing non musiman
diff1 <- diff(train.ts)

# Plot differencing
ts.plot(diff1,
        main = "Differencing Non Seasonal",
        ylab = "Differenced Sales",
        xlab = "Time",
        col = "red")

# Titik data
points(diff1,
       pch = 20,
       col = "red")

# Uji ADF
adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1
## Dickey-Fuller = -5.9312, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Differencing musiman
diff_seasonal <- diff(diff1,
                      lag = 52)

# Plot seasonal differencing
ts.plot(diff_seasonal,
        main = "Seasonal Differencing",
        ylab = "Seasonal Difference",
        xlab = "Time",
        col = "green")

# Titik data
points(diff_seasonal,
       pch = 20,
       col = "green")

# Uji ADF
adf.test(na.omit(diff_seasonal))
## Warning in adf.test(na.omit(diff_seasonal)): p-value smaller than printed
## p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(diff_seasonal)
## Dickey-Fuller = -5.5458, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
#plot ACF dan PACF
par(mfrow=c(1,2))

Acf(diff_seasonal,
    main="ACF")

Pacf(diff_seasonal,
     main="PACF")

#bentuk model sarima
model1 <- Arima(train.ts,
                order = c(1,1,1),
                seasonal = list(order = c(0,1,1),
                                period = 52))

summary(model1)
## Series: train.ts 
## ARIMA(1,1,1)(0,1,1)[52] 
## 
## Coefficients:
##          ar1      ma1    sma1
##       0.1869  -0.8662  0.1532
## s.e.  0.1645   0.0718  0.2818
## 
## sigma^2 = 3.931e+12:  log likelihood = -970.64
## AIC=1949.27   AICc=1949.99   BIC=1957.72
## 
## Training set error measures:
##                    ME    RMSE      MAE       MPE     MAPE      MASE        ACF1
## Training set 234645.4 1414217 720978.7 0.5177475 1.500458 0.4743825 -0.01323472
model2 <- Arima(train.ts,
                order = c(0,1,1),
                seasonal = list(order = c(0,1,1),
                                period = 52))

summary(model2)
## Series: train.ts 
## ARIMA(0,1,1)(0,1,1)[52] 
## 
## Coefficients:
##           ma1    sma1
##       -0.8302  0.0549
## s.e.   0.0891  0.2621
## 
## sigma^2 = 4.009e+12:  log likelihood = -971.28
## AIC=1948.56   AICc=1948.98   BIC=1954.89
## 
## Training set error measures:
##                    ME    RMSE      MAE       MPE     MAPE      MASE      ACF1
## Training set 232083.5 1440455 743524.9 0.5210133 1.541746 0.4892173 0.1031782
model3 <- Arima(train.ts,
                order = c(1,1,0),
                seasonal = list(order = c(0,1,1),
                                period = 52))

summary(model3)
## Series: train.ts 
## ARIMA(1,1,0)(0,1,1)[52] 
## 
## Coefficients:
##           ar1    sma1
##       -0.3595  0.4328
## s.e.   0.1236  0.4026
## 
## sigma^2 = 4.075e+12:  log likelihood = -975.78
## AIC=1957.56   AICc=1957.98   BIC=1963.89
## 
## Training set error measures:
##                    ME    RMSE      MAE       MPE     MAPE      MASE        ACF1
## Training set 107021.8 1452225 732927.7 0.2213365 1.553227 0.4822446 -0.07248425
model4 <- Arima(train.ts,
                order = c(2,1,1),
                seasonal = list(order = c(0,1,1),
                                period = 52))

summary(model4)
## Series: train.ts 
## ARIMA(2,1,1)(0,1,1)[52] 
## 
## Coefficients:
##          ar1      ar2      ma1    sma1
##       0.1847  -0.0284  -0.8612  0.1501
## s.e.  0.1652   0.1489   0.0789  0.2812
## 
## sigma^2 = 3.999e+12:  log likelihood = -970.62
## AIC=1951.24   AICc=1952.33   BIC=1961.79
## 
## Training set error measures:
##                    ME    RMSE      MAE     MPE     MAPE      MASE        ACF1
## Training set 234666.3 1414111 720943.3 0.51859 1.499942 0.4743592 -0.01947318
#bandingkan nilai AIC
AIC(model1)
## [1] 1949.273
AIC(model2)
## [1] 1948.562
AIC(model3)
## [1] 1957.558
AIC(model4)
## [1] 1951.237
#auto model
log_train <- log(train.ts)

auto_model <- auto.arima(log_train,
                         seasonal = TRUE,
                         stepwise = FALSE,
                         approximation = FALSE)

summary(auto_model)
## Series: log_train 
## ARIMA(0,1,1)(0,1,0)[52] 
## 
## Coefficients:
##           ma1
##       -0.8319
## s.e.   0.0800
## 
## sigma^2 = 0.00195:  log likelihood = 108.37
## AIC=-212.74   AICc=-212.53   BIC=-208.52
## 
## Training set error measures:
##                       ME       RMSE        MAE        MPE       MAPE      MASE
## Training set 0.004080985 0.03203452 0.01695113 0.02316051 0.09585153 0.5264379
##                    ACF1
## Training set 0.07301659
#diagnostik residual dan forecasting
checkresiduals(auto_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)(0,1,0)[52]
## Q* = 24.87, df = 22, p-value = 0.3034
## 
## Model df: 1.   Total lags used: 23
forecast_sarima <- forecast(auto_model,
                            h = length(test.ts))

forecast_sarima$mean <- exp(forecast_sarima$mean)
forecast_sarima$lower <- exp(forecast_sarima$lower)
forecast_sarima$upper <- exp(forecast_sarima$upper)
forecast_sarima <- forecast(auto_model,
                            h = length(test.ts))

# Menampilkan hasil forecast
forecast_sarima
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2012.192       17.67734 17.62075 17.73392 17.59079 17.76388
## 2012.212       17.75647 17.69908 17.81385 17.66871 17.84423
## 2012.231       17.64472 17.58655 17.70288 17.55576 17.73367
## 2012.250       17.71847 17.65953 17.77741 17.62833 17.80861
## 2012.269       17.68779 17.62809 17.74750 17.59649 17.77910
## 2012.288       17.65651 17.59606 17.71697 17.56405 17.74897
## 2012.308       17.68443 17.62323 17.74563 17.59083 17.77802
## 2012.327       17.75842 17.69649 17.82036 17.66370 17.85314
## 2012.346       17.73556 17.67290 17.79822 17.63973 17.83139
## 2012.365       17.73089 17.66751 17.79427 17.63396 17.82782
## 2012.385       17.69738 17.63329 17.76147 17.59937 17.79540
## 2012.404       17.73365 17.66886 17.79844 17.63456 17.83274
## 2012.423       17.73953 17.67404 17.80502 17.63938 17.83968
## 2012.442       17.68933 17.62315 17.75550 17.58812 17.79053
## 2012.462       17.68401 17.61715 17.75086 17.58176 17.78625
## 2012.481       17.64823 17.58070 17.71576 17.54496 17.75151
## 2012.500       17.74279 17.67459 17.81098 17.63849 17.84708
## 2012.519       17.70532 17.63646 17.77417 17.60001 17.81062
## 2012.538       17.71965 17.65014 17.78916 17.61335 17.82596
## 2012.558       17.73025 17.66009 17.80040 17.62295 17.83754
## 2012.577       17.68626 17.61546 17.75706 17.57798 17.79454
## 2012.596       17.71636 17.64493 17.78780 17.60711 17.82561
## 2012.615       17.65076 17.57869 17.72283 17.54055 17.76098
## 2012.635       17.62589 17.55320 17.69858 17.51472 17.73706
## 2012.654       17.61359 17.54028 17.68690 17.50147 17.72571
## 2012.673       17.72591 17.65198 17.79983 17.61285 17.83897
## 2012.692       17.66394 17.58940 17.73847 17.54994 17.77793
## 2012.712       17.69596 17.62082 17.77110 17.58105 17.81088
## 2012.731       17.69677 17.62103 17.77251 17.58093 17.81260
plot(forecast_sarima,
     main = "Forecast SARIMA Walmart Sales",
     xlab = "Time",
     ylab = "Sales",
     col = "blue")

# Menambahkan data aktual
lines(test.ts,
      col = "red",
      lwd = 2)

# Legend
legend("topleft",
       legend = c("Forecast",
                  "Actual"),
       col = c("blue",
               "red"),
       lty = 1,
       lwd = 2)

prediksi <- exp(forecast_sarima$mean)


actual <- as.numeric(test.ts)

pred <- as.numeric(prediksi)


plot(actual,
     type = "l",
     lwd = 2,
     col = "black",
     ylim = range(c(actual, pred)),
     xlab = "Time",
     ylab = "Weekly Sales",
     main = "Actual vs Forecast")

# garis prediksi
lines(pred,
      col = "red",
      lwd = 2)

# titik aktual
points(actual,
       pch = 20,
       col = "black")

# titik prediksi
points(pred,
       pch = 20,
       col = "red")

# legend
legend("topleft",
       legend = c("Actual",
                  "Forecast"),
       col = c("black",
               "red"),
       lty = 1,
       lwd = 2,
       pch = 20)

#evaluasi akurasi
mae(actual, pred)
## [1] 1903285
rmse(actual, pred)
## [1] 2306284
mape(actual, pred)
## [1] 0.04107816