You can embed an R code chunk like this:
library(fpp2)
library(dplyr)
library(imputeTS)
library(readxl)
library(forecast)
library(ggplot2)
library(ggfortify)
library(caret)
Project1_DataS06 <- read_excel("Project1_DataS06.xlsx", skip = 1)
View(Project1_DataS06)
data_S06 <- subset(Project1_DataS06, category == 'S06', select = c(SeriesInd, Var05, Var07)) %>%
mutate(date = as.Date(SeriesInd, origin = "1900-01-01"))
data_S06 <- filter(data_S06, SeriesInd <= 43221)
summary(data_S06)
## SeriesInd Var05 Var07 date
## Min. :40669 Min. :2.300e+01 Min. :2.300e+01 Min. :2011-05-08
## 1st Qu.:41304 1st Qu.:3.100e+01 1st Qu.:3.100e+01 1st Qu.:2013-01-31
## Median :41946 Median :4.100e+01 Median :4.100e+01 Median :2014-11-05
## Mean :41945 Mean :7.968e+08 Mean :7.968e+08 Mean :2014-11-03
## 3rd Qu.:42586 3rd Qu.:5.200e+01 3rd Qu.:5.200e+01 3rd Qu.:2016-08-06
## Max. :43221 Max. :1.000e+10 Max. :1.000e+10 Max. :2018-05-03
## NA's :5 NA's :5
str(data_S06)
## tibble [1,762 × 4] (S3: tbl_df/tbl/data.frame)
## $ SeriesInd: num [1:1762] 40669 40670 40671 40672 40673 ...
## $ Var05 : num [1:1762] 27 27.3 28 28.1 28.9 ...
## $ Var07 : num [1:1762] 27.3 28.1 28.1 29.1 28.9 ...
## $ date : Date[1:1762], format: "2011-05-08" "2011-05-09" ...
data_S06_v5 <- data_S06 %>% select(Var05)
data_S06_v5 <- data_S06_v5[1:1622,]
summary(data_S06_v5)
## Var05
## Min. : 22.91
## 1st Qu.: 30.32
## Median : 36.87
## Mean : 39.85
## 3rd Qu.: 50.47
## Max. :195.00
## NA's :5
str(data_S06_v5)
## tibble [1,622 × 1] (S3: tbl_df/tbl/data.frame)
## $ Var05: num [1:1622] 27 27.3 28 28.1 28.9 ...
data_S06_v7 <- data_S06 %>% select(Var07)
data_S06_v7 <- data_S06_v7[1:1622,]
summary(data_S06_v7)
## Var07
## Min. : 22.88
## 1st Qu.: 30.26
## Median : 36.97
## Mean : 39.85
## 3rd Qu.: 50.45
## Max. :189.72
## NA's :5
str(data_S06_v7)
## tibble [1,622 × 1] (S3: tbl_df/tbl/data.frame)
## $ Var07: num [1:1622] 27.3 28.1 28.1 29.1 28.9 ...
data_S06_Var05 <- ts(data_S06_v5)
ggtsdisplay(data_S06_Var05)
## Warning: Removed 5 rows containing missing values (`geom_point()`).
data_S06_Var07 <- ts(data_S06_v7)
ggtsdisplay(data_S06_Var07)
## Warning: Removed 5 rows containing missing values (`geom_point()`).
data_S06_Var05 <- data_S06_Var05 |>
tsclean()
ggtsdisplay(data_S06_Var05)
data_S06_Var07 <- data_S06_Var07 |>
tsclean()
ggtsdisplay(data_S06_Var07)
### The data look getting better, appears to be some seaonality, upward
trends via time. No significant variabilities but lags are so obvious
over time in the ACF and cutoff after lag2 in PACE.
gglagplot(data_S06_Var05)
gglagplot(data_S06_Var07)
train_index <- createDataPartition(data_S06_Var05, times = 1, p = 0.912, list = FALSE)
train <- data_S06_Var05[train_index]
test <- data_S06_Var05[-train_index]
horizon <- length(test)
print(dim(train))
## NULL
print(dim(test))
## NULL
print(horizon)
## [1] 140
train_index <- createDataPartition(data_S06_Var07, times = 1, p = 0.912, list = FALSE)
train <- data_S06_Var07[train_index]
test <- data_S06_Var07[-train_index]
horizon <- length(test)
print(dim(train))
## NULL
print(dim(test))
## NULL
print(horizon)
## [1] 140
# Simple Exponential Smoothing Method and # Holt's Method
set.seed(123)
ses.fit <- ses(train, h = horizon)
ses_accuracy_Var05 <- c(data_S06_Var05 = accuracy(ses.fit, test)['Test set', 'MAPE'])
holt.fit <- holt(train, damped=TRUE, h = horizon)
holt_accuracy_Var05 <- c(data_S06_Var05 = accuracy(holt.fit, test)['Test set', 'MAPE'])
autoplot(ses.fit) + ggtitle("ses.fit")
autoplot(holt.fit) + ggtitle("holt.fit")
ses.fit <- ses(train, h = horizon)
ses_accuracy_Var07<- c(data_S06_Var07 = accuracy(ses.fit, test)['Test set', 'MAPE'])
holt.fit <- holt(train, damped=TRUE, h = horizon)
holt_accuracy_Var07 <- c(data_S06_Var07 = accuracy(holt.fit, test)['Test set', 'MAPE'])
autoplot(ses.fit) + ggtitle("ses.fit")
autoplot(holt.fit) + ggtitle("holt.fit")
rbind(holt_pval = Box.test(residuals(holt.fit))$p.value,
ses_pval = Box.test(residuals(ses.fit))$p.value)
## [,1]
## holt_pval 0.9960170
## ses_pval 0.9439229
set.seed(123)
ets.fit <- ets(train)
ets.fit
## ETS(A,N,N)
##
## Call:
## ets(y = train)
##
## Smoothing parameters:
## alpha = 0.9067
##
## Initial states:
## l = 27.3824
##
## sigma: 0.5841
##
## AIC AICc BIC
## 9230.396 9230.412 9246.300
ets.fc <- forecast(ets.fit, h = horizon)
ets_accuray_Var05 <- c(data_S06_Var05 = accuracy(ets.fc, test)['Test set', 'MAPE'])
ets_accuray_Var05
## data_S06_Var05
## 36.1663
checkresiduals(ets.fc)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,N)
## Q* = 10.264, df = 10, p-value = 0.4176
##
## Model df: 0. Total lags used: 10
autoplot(ets.fit) + ggtitle("ets.fit")
autoplot(ets.fc) + ggtitle("ets.fc")
ets.fc <- forecast(ets.fit, h = horizon)
ets_accuray_Var07 <- c(data_S06_Var07 = accuracy(ets.fc, test)['Test set', 'MAPE'])
ets_accuray_Var07
## data_S06_Var07
## 36.1663
checkresiduals(ets.fc)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,N)
## Q* = 10.264, df = 10, p-value = 0.4176
##
## Model df: 0. Total lags used: 10
autoplot(ets.fit) + ggtitle("ets.fit")
autoplot(ets.fc) + ggtitle("ets.fc")
set.seed(123)
train |>
auto.arima() |>
forecast(h = horizon) |>
checkresiduals()
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,2)
## Q* = 5.0055, df = 6, p-value = 0.5431
##
## Model df: 4. Total lags used: 10
arima.fc <- train |>
auto.arima() |>
forecast(h=horizon)
arima_accuray_Var05 <- c(data_S06_Var05= accuracy(arima.fc, test)['Test set', 'MAPE'])
autoplot(arima.fc) + ggtitle("data_S06_Var05")
arima_accuray_Var07 <- c(data_S06_Var07= accuracy(arima.fc, test)['Test set', 'MAPE'])
autoplot(arima.fc) + ggtitle("data_S06_Var07")
S06_Var05.result <- cbind(ses = ses_accuracy_Var05, holt = holt_accuracy_Var05, ets = ets_accuray_Var05, arima = arima_accuray_Var05)
cat("\tMAPE results by model - S06_Var05\n\n")
## MAPE results by model - S06_Var05
S06_Var05.result
## ses holt ets arima
## data_S06_Var05 36.1661 36.16157 36.1663 36.5977
S06_Var07.result <- cbind(ses = ses_accuracy_Var07, holt = holt_accuracy_Var07, ets = ets_accuray_Var07, arima = arima_accuray_Var07)
cat("\tMAPE results by model - S06_Var07\n\n")
## MAPE results by model - S06_Var07
S06_Var07.result
## ses holt ets arima
## data_S06_Var07 36.1661 36.16157 36.1663 36.5977
It is found that MAPE accuracy measures of the forecasting models are the same. So, it might be similar patterns, have less significant variability over time and finally limitation of forecasting models.