HANDS ON SARIMA
Coding SARIMA
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
##
## accuracy
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## 'data.frame': 6435 obs. of 8 variables:
## $ Store : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Date : chr "05-02-2010" "12-02-2010" "19-02-2010" "26-02-2010" ...
## $ Weekly_Sales: num 1643691 1641957 1611968 1409728 1554807 ...
## $ Holiday_Flag: int 0 1 0 0 0 0 0 0 0 0 ...
## $ Temperature : num 42.3 38.5 39.9 46.6 46.5 ...
## $ Fuel_Price : num 2.57 2.55 2.51 2.56 2.62 ...
## $ CPI : num 211 211 211 211 211 ...
## $ Unemployment: num 8.11 8.11 8.11 8.11 8.11 ...
# Mengubah format tanggal
climate_data$Tanggal <- as.Date(climate_data$Date,
format = "%d-%m-%Y")
# Mengurutkan data berdasarkan tanggal
climate_data <- climate_data[order(climate_data$Tanggal), ]
head(climate_data)## Store Date Weekly_Sales Holiday_Flag Temperature Fuel_Price CPI
## 1 1 05-02-2010 1643690.9 0 42.31 2.572 211.0964
## 144 2 05-02-2010 2136989.5 0 40.19 2.572 210.7526
## 287 3 05-02-2010 461622.2 0 45.71 2.572 214.4249
## 430 4 05-02-2010 2135143.9 0 43.76 2.598 126.4421
## 573 5 05-02-2010 317173.1 0 39.70 2.572 211.6540
## 716 6 05-02-2010 1652635.1 0 40.43 2.572 212.6224
## Unemployment Tanggal
## 1 8.106 2010-02-05
## 144 8.324 2010-02-05
## 287 7.368 2010-02-05
## 430 8.623 2010-02-05
## 573 6.566 2010-02-05
## 716 7.259 2010-02-05
# Mengambil data penjualan mingguan
weekly_data <- climate_data %>%
group_by(Tanggal) %>%
summarise(Total_Penjualan = sum(Weekly_Sales))
head(weekly_data)## # A tibble: 6 × 2
## Tanggal Total_Penjualan
## <date> <dbl>
## 1 2010-02-05 49750740.
## 2 2010-02-12 48336678.
## 3 2010-02-19 48276994.
## 4 2010-02-26 43968571.
## 5 2010-03-05 46871470.
## 6 2010-03-12 45925397.
# Membentuk time series
series_penjualan <- ts(weekly_data$Total_Penjualan,
frequency = 52,
start = c(2010,1))
series_penjualan## Time Series:
## Start = c(2010, 1)
## End = c(2012, 39)
## Frequency = 52
## [1] 49750741 48336678 48276994 43968571 46871470 45925397 44988975 44133961
## [9] 50423831 47365290 45183667 44734453 43705127 48503244 45330080 45120108
## [17] 47757503 50188543 47826547 47622046 46609036 48917485 47899529 46243900
## [25] 44888849 44630363 48204999 46464418 47060953 45909740 47194258 45634398
## [33] 43080727 41358514 42239876 45102974 43149473 43066670 43602831 45781982
## [41] 46124801 45125584 65821003 49909028 55666770 61820800 80931416 40432519
## [49] 42775788 40673678 40654648 39599853 46153111 47336193 48716164 44125860
## [57] 46980604 44627319 44872326 42876199 43458991 45887467 44973328 48676692
## [65] 43530033 46861958 45446145 44046598 45293457 48771994 47669735 47447562
## [73] 45884095 47578520 47859264 45515930 45274411 43683274 48015467 46249569
## [81] 46917348 47416948 45376623 46763228 43793960 42718097 42195831 47211688
## [89] 44374820 45818953 45855821 48655544 48474225 46438981 66593605 49390556
## [97] 55561148 60085696 76998241 46042461 44955422 42023078 42080997 39834975
## [105] 46085608 50009408 50197057 45771507 46861035 47480454 46901505 44993794
## [113] 45272862 53502316 46629261 45072530 43716799 47124198 46925879 46823939
## [121] 47892463 48281650 49651172 48412111 47668285 46597112 51253022 46099732
## [129] 46059543 44097155 47485900 47403451 47354452 47447324 47159639 48330059
## [137] 44226039 44354547 43734899 47566639 46128514 45122411 45544116
plot(series_penjualan,
main = "Plot Weekly Sales",
ylab = "Penjualan",
xlab = "Waktu",
col = "blue")# Split data training dan testing
train_data <- subset(series_penjualan,
start = 1,
end = 114)
test_data <- subset(series_penjualan,
start = 115,
end = 143)
# Plot training data
ts.plot(train_data,
col = "blue",
ylab = "Weekly Sales",
xlab = "Time")
title(main = "Train Time Series Plot",
cex.sub = 0.8)
points(train_data,
pch = 20,
col = "blue")Uji stasioneritas
## Warning in adf.test(train_data): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: train_data
## Dickey-Fuller = -5.3396, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
Differencing non-musiman
diff_nonmusiman <- diff(train_data)
# Plot differencing
ts.plot(diff_nonmusiman,
main = "Differencing Non Seasonal",
ylab = "Differenced Data",
xlab = "Time",
col = "red")
points(diff_nonmusiman,
pch = 20,
col = "yellow")## Warning in adf.test(diff_nonmusiman): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff_nonmusiman
## Dickey-Fuller = -5.9312, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
Differencing musiman
diff_musiman <- diff(diff_nonmusiman,
lag = 52)
# Plot seasonal differencing
ts.plot(diff_musiman,
main = "Seasonal Differencing",
ylab = "Seasonal Difference",
xlab = "Time",
col = "green")
points(diff_musiman,
pch = 20,
col = "black")## Warning in adf.test(na.omit(diff_musiman)): p-value smaller than printed
## p-value
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(diff_musiman)
## Dickey-Fuller = -5.5458, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
Plot ACF dan PACF
# Plot ACF dan PACF
par(mfrow = c(1,2))
Acf(diff_musiman,
main = "ACF")
Pacf(diff_musiman,
main = "PACF")
# Membentuk model SARIMA
# Membentuk model SARIMA
sarima_model1 <- Arima(train_data,
order = c(1,1,1),
seasonal = list(order = c(0,1,1),
period = 52))
summary(sarima_model1)## Series: train_data
## ARIMA(1,1,1)(0,1,1)[52]
##
## Coefficients:
## ar1 ma1 sma1
## 0.1869 -0.8662 0.1532
## s.e. 0.1645 0.0718 0.2818
##
## sigma^2 = 3.931e+12: log likelihood = -970.64
## AIC=1949.27 AICc=1949.99 BIC=1957.72
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 234645.4 1414217 720978.7 0.5177475 1.500458 0.4743825 -0.01323472
sarima_model2 <- Arima(train_data,
order = c(0,1,1),
seasonal = list(order = c(0,1,1),
period = 52))
summary(sarima_model2)## Series: train_data
## ARIMA(0,1,1)(0,1,1)[52]
##
## Coefficients:
## ma1 sma1
## -0.8302 0.0549
## s.e. 0.0891 0.2621
##
## sigma^2 = 4.009e+12: log likelihood = -971.28
## AIC=1948.56 AICc=1948.98 BIC=1954.89
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 232083.5 1440455 743524.9 0.5210133 1.541746 0.4892173 0.1031782
sarima_model3 <- Arima(train_data,
order = c(1,1,0),
seasonal = list(order = c(0,1,1),
period = 52))
summary(sarima_model3)## Series: train_data
## ARIMA(1,1,0)(0,1,1)[52]
##
## Coefficients:
## ar1 sma1
## -0.3595 0.4328
## s.e. 0.1236 0.4026
##
## sigma^2 = 4.075e+12: log likelihood = -975.78
## AIC=1957.56 AICc=1957.98 BIC=1963.89
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 107021.8 1452225 732927.7 0.2213365 1.553227 0.4822446 -0.07248425
sarima_model4 <- Arima(train_data,
order = c(2,1,1),
seasonal = list(order = c(0,1,1),
period = 52))
summary(sarima_model4)## Series: train_data
## ARIMA(2,1,1)(0,1,1)[52]
##
## Coefficients:
## ar1 ar2 ma1 sma1
## 0.1847 -0.0284 -0.8612 0.1501
## s.e. 0.1652 0.1489 0.0789 0.2812
##
## sigma^2 = 3.999e+12: log likelihood = -970.62
## AIC=1951.24 AICc=1952.33 BIC=1961.79
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 234666.3 1414111 720943.3 0.51859 1.499942 0.4743592 -0.01947318
Membandingkan nilai AIC
## [1] 1949.273
## [1] 1948.562
## [1] 1957.558
## [1] 1951.237
# Model otomatis SARIMA
log_train_data <- log(train_data)
model_otomatis <- auto.arima(log_train_data,
seasonal = TRUE,
stepwise = FALSE,
approximation = FALSE)
summary(model_otomatis)## Series: log_train_data
## ARIMA(0,1,1)(0,1,0)[52]
##
## Coefficients:
## ma1
## -0.8319
## s.e. 0.0800
##
## sigma^2 = 0.00195: log likelihood = 108.37
## AIC=-212.74 AICc=-212.53 BIC=-208.52
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.004080985 0.03203452 0.01695113 0.02316051 0.09585153 0.5264379
## ACF1
## Training set 0.07301659
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(0,1,0)[52]
## Q* = 24.87, df = 22, p-value = 0.3034
##
## Model df: 1. Total lags used: 23
hasil_forecast <- forecast(model_otomatis,
h = length(test_data))
hasil_forecast$mean <- exp(hasil_forecast$mean)
hasil_forecast$lower <- exp(hasil_forecast$lower)
hasil_forecast$upper <- exp(hasil_forecast$upper)
# Menampilkan hasil forecast
hasil_forecast## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2012.192 47552028 44935834 50320539 43609683 51850764
## 2012.212 51467737 48597501 54507494 47143463 56188659
## 2012.231 46025977 43425185 48782533 42108438 50307981
## 2012.250 49548950 46712966 52557108 45277997 54222769
## 2012.269 48051956 45267106 51008130 43858836 52645958
## 2012.288 46572161 43840037 49474552 42459228 51083504
## 2012.308 47890513 45047520 50912930 43611500 52589368
## 2012.327 51568504 48471526 54863356 46908099 56691928
## 2012.346 50403043 47341653 53662399 45797055 55472272
## 2012.365 50168131 47087183 53450667 45533574 55274408
## 2012.385 48515017 45503268 51726105 43985383 53511114
## 2012.404 50306597 47150504 53673948 45560729 55546822
## 2012.423 50603439 47395783 54028183 45780897 55933986
## 2012.442 48125742 45044165 51418137 43493573 53251248
## 2012.462 47870375 44774674 51180111 43217790 53023831
## 2012.481 46188005 43172012 49414694 41655999 51213074
## 2012.500 50768599 47421865 54351523 45740466 56349461
## 2012.519 48901447 45647642 52387187 44013759 54331909
## 2012.538 49607515 46276447 53178360 44604608 55171554
## 2012.558 50135762 46738923 53779472 45034924 55814342
## 2012.577 47978448 44699061 51498429 43054793 53465161
## 2012.596 49444558 46035656 53105885 44327286 55152582
## 2012.615 46305037 43085402 49765265 41472665 51700474
## 2012.635 45167485 42000676 48573069 40415162 50478624
## 2012.654 44615274 41461470 48008975 39883220 49908775
## 2012.673 49918733 46361524 53748877 44582241 55894002
## 2012.692 46919203 43549182 50550010 41864319 52584436
## 2012.712 48446140 44939262 52226682 43186791 54345981
## 2012.731 48485122 44948449 52300070 43181905 54439633
plot(hasil_forecast,
main = "Forecast SARIMA",
xlab = "Time",
ylab = "Sales",
col = "blue")
# Menambahkan data aktual
lines(test_data,
col = "red",
lwd = 2)
legend("topleft",
legend = c("Forecast", "Data Aktual"),
col = c("blue", "red"),
lty = 1,
lwd = 2)# Menghitung akurasi model
rmse_value <- rmse(test_data,
hasil_forecast$mean)
mae_value <- mae(test_data,
hasil_forecast$mean)
mape_value <- mape(test_data,
hasil_forecast$mean)
cat("RMSE :", rmse_value, "\n")## RMSE : 2306284
## MAE : 1903285
## MAPE : 0.04107816