Introduction

Permen adalah makanan yang umumnya berbahan dasar gula, air dan fruktosa. Permen sangat digemarin berbagai kalangan salah satunya adalah anak anak. Pada LBB kali ini saya akan melakukan forecasting candy production dari data kaggle, berupa data produksi permen di USA dari tahun 1972 sampai 2017.

Data PreProcess

Library

library(readr)
library(dplyr)
library(padr)
library(lubridate)
library(forecast)
library(ggsci)
library(padr)
library(tidyr)
library(zoo)
library(ggplot2)
library(TSstudio)

Read Data

candy <- read.csv("candy_production.csv")
range(candy$observation_date)
## [1] "1972-01-01" "2017-08-01"

Pertama ubah tipe data observation_date menjadi date dan ubah nama kolom IPG3113N menjadu production.

candy_agg <- candy %>% 
  mutate(date = ymd(observation_date)) %>% 
  rename(production = IPG3113N) %>% 
  select(c(date, production))

Selanjutnya melakukan padding dan replace missing value menjadi 0

candy_agg <- candy_agg %>% 
  pad() %>%
  replace(., is.na(.), 0)
## pad applied on the interval: month
head(candy_agg)
range(candy_agg$date)
## [1] "1972-01-01" "2017-08-01"

Seasonality

#membuat object ts
candy_ts <- ts(candy_agg$production,frequency = 12) 

#melakukan decompose lalu plotting
candy_ts %>% 
  decompose() %>% 
  autoplot()

ts_heatmap(candy_ts)
ts_seasonal(candy_ts, type = "box")

Model Fitting and Evaluation

Untuk pembuatan model saya akan menggunakan model Holt winter karena pada data saya terdapat seasonal dan trend, lalu membandungkannya dengan model ARIMA

Cross Validation

Sebelumnya kita perlu melakukan cross validation yaitu membagi data menjadi data test dan train :

  • test : data 8 tahun terakhir, -4 karena tahun 2017 hanya sampai bulan 8

  • train : sisa data test

test <- tail(candy_ts, 12*8-4)
train <- head(candy_ts, length(candy_ts)-92)

Holt Winter

Modeling & Forecast

#Modeling
candy_hw <- hw(train, seasonal="additive", h=92)

#forecast
forecast_hw <- forecast(candy_hw, h=92) 

Visualize

candy_ts %>%  autoplot(series = "Actual") +
  autolayer(forecast_hw$mean, series = "Predict Holt Winter")

Evaluate the model performance

#accuracy
accuracy(object = forecast_hw$mean, test)
##                 ME     RMSE      MAE         MPE     MAPE      ACF1 Theil's U
## Test set 0.1882302 5.550671 4.634408 -0.04621848 4.467333 0.6976633 0.8265308

ARIMA

Modeling & Forecast

Kita melakukan modeling dan forecast menggunakan fungsi stlm() dan forecast() dari package forecast

#Modelling
candy_arima <- stlm(train, method = "arima") 

# forecast
forecast_arima <- forecast(candy_arima, h=92) 

Visualize

Kita melakukan plotting menggunakan fungsi autoplot() & autolayer() dari package ggplot2

candy_ts %>%  autoplot(series = "Actual") +
  autolayer(forecast_arima$mean, series = "Predict ARIMA")

Evaluate the model performance

accuracy(object = forecast_arima$mean, test)
##                ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 3.452797 6.959805 5.280406 3.101515 4.981337 0.7461703  1.003541
accuracy(forecast_arima, test)
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 0.07232259 2.950233 2.233891 0.01916769 2.330316 0.4437522
## Test set     3.45279718 6.959805 5.280406 3.10151509 4.981337 1.0489283
##                       ACF1 Theil's U
## Training set -0.0002557591        NA
## Test set      0.7461702799  1.003541

Compare Model

Accuracy

acc_arima <- accuracy(object = forecast_arima$mean, test)
acc_hw <- accuracy(object = forecast_hw$mean, test)
result <- rbind(acc_arima, acc_hw)
rownames(result) <- c("Model Arima", "Model Holt Winter")
result
##                          ME     RMSE      MAE         MPE     MAPE      ACF1
## Model Arima       3.4527972 6.959805 5.280406  3.10151509 4.981337 0.7461703
## Model Holt Winter 0.1882302 5.550671 4.634408 -0.04621848 4.467333 0.6976633
##                   Theil's U
## Model Arima       1.0035411
## Model Holt Winter 0.8265308

Dari kesimpulan accuracy diatas, model Holt Winter adalah model terbaik denga MAE terkecil dibandingkan dengan model ARIMA

Visualize

model_Viz <- data.frame(date=candy_agg$date %>% tail(92),
                        actual = as.vector(test),
                        arima_forecast = as.vector(forecast_arima$mean),
                        hw_forecast = as.vector(forecast_hw$mean)) 
model_Viz %>% 
 ggplot() +
  geom_line(aes(x = date, y = actual, colour = "Actual"),size=1)+
  geom_line(aes(x = date, y = arima_forecast, colour = "Model Arima"),size=1)+
  geom_line(aes(x = date, y = hw_forecast, colour = "Model Holt Winter"),size=1)+
  scale_y_continuous()+
  labs(title = "Candy Production",x = "Date",y = "Production",colour = "")