Libraries

library(readxl)
library(ggplot2)
library(fpp3)
library(dplyr)
library(fst)
library(tsibbledata)
library(tsibble)
library(feasts)
library(lubridate)
library(seasonal)
library(zoo)
library(fable)
library(readxl)
library(ggplot2)
library(forecast)
library(tidyverse)
library(kableExtra)
library(Amelia)

Data

- Reading

data <- read_excel("data/DRO Difference 2021 -- formatted.xlsx")

# converting to time-series
data$Date = as.Date(data$Date, format = "%Y/%U")
data <- data %>%
  select(Date, Day, Actual) %>%
  group_by(Date) %>%
  as_tsibble(index = Date)

- Exploring

data %>% gg_tsdisplay()

# missing values ---
#
colSums(is.na(data))

  Date    Day Actual 
     0      0      3

Replace NA

# finding locations
loc = which(is.na(data), arr.ind=TRUE)


# function calculates average volume for given day of week
averageDay <- function(data, day){
  value = round(sum(data[which(data$Day == day), 3], na.rm = T) / sum(data$Day == day))
  
  return(value)
}


# replaces missing values with average value for given day
for (i in 1:3){
  data[loc[i],3] = averageDay(data, data$Day[loc[i]])
}
colSums(is.na(data))

  Date    Day Actual 
     0      0      0

bp <- boxplot(data$Actual, id=list(method="y"))

# There is one outlier. 

# Replacing outlier with its daily average
data[data$Actual == bp$out, 3] = averageDay(data, unlist(data[data$Actual == bp$out, 2]))
boxplot(data$Actual)

# need to convert from class back to tsibble
data <- data %>% as_tsibble()
data %>% gg_tsdisplay()

# much better

Testing Stationarity

# testing stationarity
ndiffs(data$Actual)

[1] 0

data %>% unitroot_nsdiffs()

nsdiffs 
      0

data %>%
  features(Actual, unitroot_kpss)

# A tibble: 1 x 2
  kpss_stat kpss_pvalue
      <dbl>       <dbl>
1     0.330         0.1

# the data is stationary

Training Data

# 90/10 split
tr.lim <- toString(as.Date(unlist(data[round(0.9 * nrow(data)),1])))
train = data %>% filter_index(. ~tr.lim)

Models

ETS Comparisons

# 8.10

# Evaluating Various ETS
myfits <- train %>%
  model(
    `ANN` = ETS(Actual~ error("A") + trend("N") + season("N")),
    `AAN` = ETS(Actual~ error("A") + trend("A") + season("N")),
    `Auto ETS` = ETS(Actual),
    `Damped` = ETS(Actual ~ error("A") + trend("Ad") + season("N")),
    `Damped Holt's method` = ETS(Actual~ error("A") + 
                                   trend("Ad", phi=0.9) + season("N")),
    `Damped Holt's method 2` = ETS(Actual~ error("A") + 
                                   trend("Ad", phi=0.8) + season("N"))
  )

bind_rows(
  myfits %>% accuracy(),
  myfits %>% forecast(h = 20) %>% accuracy(data)
)%>%
  select(-ME, -MPE, -ACF1)

# A tibble: 12 x 7
   .model                 .type     RMSE   MAE  MAPE  MASE RMSSE
   <chr>                  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl>
 1 ANN                    Training  581.  497. 23.9  2.12  1.96 
 2 AAN                    Training  592.  508. 24.5  2.16  2.00 
 3 Auto ETS               Training  220.  174.  7.92 0.739 0.744
 4 Damped                 Training  597.  511. 24.0  2.18  2.02 
 5 Damped Holt's method   Training  581.  498. 24.1  2.12  1.96 
 6 Damped Holt's method 2 Training  581.  498. 24.1  2.12  1.96 
 7 AAN                    Test      605.  518. 28.6  2.21  2.04 
 8 ANN                    Test      623.  505. 30.2  2.15  2.10 
 9 Auto ETS               Test      212.  171.  9.39 0.727 0.717
10 Damped                 Test      634.  540. 27.5  2.30  2.14 
11 Damped Holt's method   Test      616.  505. 29.8  2.15  2.08 
12 Damped Holt's method 2 Test      618.  505. 29.9  2.15  2.09

# Auto ETS performs best

STL Comparisons

# Auto ETS
myets = train %>%
  model(
    ETS(Actual)
  )

# seasonal NAIVE
mysnaive <- train %>%
  model(
    SNAIVE(Actual)
  )

# STL dcmp applied to regular data
fit.dcmp <- train %>%
  model(stlf = decomposition_model(
    STL(Actual ~ trend(window = 7), robust = TRUE),
    NAIVE(season_adjust)
  ))

# STL dcmp applied to log transform
fit.dcmp.log <- train %>%
  model(stlf = decomposition_model(
    STL(log(Actual) ~ trend(window = 7), robust = TRUE),
    NAIVE(season_adjust)
  ))





# Comparing
bind_rows(
  myets %>% accuracy(),
  mysnaive %>% accuracy(),
  fit.dcmp %>% accuracy(),
  fit.dcmp.log %>% accuracy(),
  
  myets %>% forecast(h = 20) %>% accuracy(data),
  mysnaive %>% forecast(h = 20) %>% accuracy(data),
  fit.dcmp %>% forecast(h=20) %>% accuracy(data),
  fit.dcmp.log %>% forecast(h=20) %>% accuracy(data),
) %>%
  select(-.model, -ME, -MPE, -ACF1)

# A tibble: 8 x 6
  .type     RMSE   MAE  MAPE  MASE RMSSE
  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl>
1 Training  220.  174.  7.92 0.739 0.744
2 Training  296.  235. 10.6  1     1    
3 Training  298.  222. 10.0  0.947 1.01 
4 Training  316.  234. 10.5  0.997 1.07 
5 Test      212.  171.  9.39 0.727 0.717
6 Test      245.  192.  9.22 0.817 0.828
7 Test      392.  341. 17.2  1.45  1.32 
8 Test      742.  704. 33.6  3.00  2.50

#
#
####
# The auto ETS outperforms all models in both the train and test set.

ARIMA

Auto ARIMA

# Auto ARIMA
myarimaC = train %>%
  model(
    `Auto` = ARIMA(Actual),
  )

bind_rows(
  myarimaC %>% accuracy(),
  myarimaC %>% forecast(h = 20) %>% accuracy(data) 
)

# A tibble: 2 x 10
  .model .type        ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
  <chr>  <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
1 Auto   Training  -32.7  223.  168. -2.51  7.74 0.717 0.752 -0.00871
2 Auto   Test     -126.   219.  180. -7.23  9.36 0.764 0.740 -0.293

Finding best pdq combination

# This function produces accuracy measures for each combination
arimaIterate <- function(train, p, d, q){
  myarimaC = train %>%
    model(
      `Auto` = ARIMA(Actual~ pdq(p,d,q) )
    )
  
  measures = bind_rows(
    myarimaC %>% accuracy(),
    myarimaC %>% forecast(h = 20) %>% accuracy(data) 
  )
  
  measures['combo'] = toString(unlist(rbind(p,d,q)))
  return(measures)
}

# this loop sends pdq combinations to above function
totals = list()
for (p in 0:4){
  for(d in 0:1){
    for(q in 0:4){
      
      vals = arimaIterate(train, p,d,q)
      totals = bind_rows(totals, vals)
      
    }
  }
}
totals = na.omit(totals)
head(totals,20)

# A tibble: 20 x 11
   .model .type         ME  RMSE   MAE     MPE  MAPE  MASE RMSSE    ACF1 combo  
   <chr>  <chr>      <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>  
 1 Auto   Training  -44.6   225.  169.  -3.05   7.85 0.721 0.761 -0.0538 0, 0, 0
 2 Auto   Test     -129.    226.  184.  -7.28   9.49 0.783 0.764 -0.319  0, 0, 0
 3 Auto   Training  -44.9   225.  169.  -3.06   7.85 0.721 0.761 -0.0466 0, 0, 1
 4 Auto   Test     -128.    226.  184.  -7.27   9.47 0.782 0.764 -0.319  0, 0, 1
 5 Auto   Training  -41.0   224.  170.  -2.90   7.85 0.722 0.757 -0.0492 0, 0, 2
 6 Auto   Test     -129.    225.  184.  -7.32   9.52 0.785 0.760 -0.312  0, 0, 2
 7 Auto   Training  -44.1   223.  168.  -3.00   7.78 0.716 0.753 -0.0464 0, 0, 3
 8 Auto   Test     -132.    226.  186.  -7.46   9.63 0.791 0.763 -0.306  0, 0, 3
 9 Auto   Training  -44.2   224.  169.  -3.02   7.81 0.718 0.756 -0.0427 0, 0, 4
10 Auto   Test     -142.    230.  188.  -7.94   9.79 0.802 0.776 -0.287  0, 0, 4
11 Auto   Training   -1.32  311.  246.  -1.06  11.1  1.05  1.05  -0.526  0, 1, 0
12 Auto   Test     -262.    320.  273. -14.1   14.5  1.16  1.08  -0.329  0, 1, 0
13 Auto   Training   27.8   225.  173.   0.151  7.79 0.736 0.759 -0.0867 0, 1, 1
14 Auto   Test      -68.2   198.  162.  -4.43   8.30 0.690 0.670 -0.316  0, 1, 1
15 Auto   Training   29.0   226.  175.   0.183  7.91 0.744 0.763 -0.0270 0, 1, 2
16 Auto   Test      -77.7   199.  163.  -4.81   8.35 0.693 0.671 -0.288  0, 1, 2
17 Auto   Training   29.1   226.  175.   0.190  7.89 0.743 0.762 -0.0298 0, 1, 3
18 Auto   Test      -76.0   198.  162.  -4.73   8.32 0.691 0.668 -0.284  0, 1, 3
19 Auto   Training   26.9   222.  170.   0.143  7.70 0.726 0.751 -0.0155 0, 1, 4
20 Auto   Test      -90.8   201.  163.  -5.61   8.50 0.693 0.678 -0.283  0, 1, 4

# Finding best p,d,q combinations for training data
measure_train = totals %>%
  filter(.type == "Training")
#
best_train = head(sort(measure_train$RMSE),20)
best_train = totals %>% filter(totals$RMSE %in% best_train)


# Finding best p,d,q combinations for test data
measure_test = totals %>%
  filter(.type == "Test")
#
best_test = head(sort(measure_test$RMSE),20)
best_test = totals %>% filter(totals$RMSE %in% best_test)



# Finding models that perform best on training and test data
best_both = best_test %>% filter(best_test$combo %in% best_train$combo)
#
# Top 5 models -- determined by smallest RMSE value
best5 = best_both %>% filter(best_both$RMSE %in% head(sort(best_both$RMSE),5))
head(best5)

# A tibble: 5 x 11
  .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1 combo  
  <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <chr>  
1 Auto   Test  -68.2  198.  162. -4.43  8.30 0.690 0.670 -0.316 0, 1, 1
2 Auto   Test  -76.7  196.  162. -4.73  8.29 0.692 0.661 -0.262 1, 1, 2
3 Auto   Test  -75.2  195.  162. -4.67  8.26 0.689 0.659 -0.259 1, 1, 3
4 Auto   Test  -76.1  198.  161. -4.89  8.38 0.687 0.667 -0.265 2, 1, 2
5 Auto   Test  -84.5  198.  162. -5.26  8.37 0.689 0.670 -0.272 3, 1, 1

Best ARIMA

bestArima = train %>%
  model(
    ARIMA(Actual~ pdq(0,1,1)) # 0,1,1 has smallest AIC
  )
report(bestArima)

Series: Actual 
Model: ARIMA(0,1,1)(2,1,1)[7] 

Coefficients:
          ma1     sar1     sar2     sma1
      -0.9349  -0.0071  -0.1088  -0.8575
s.e.   0.0434   0.1051   0.0971   0.0779

sigma^2 estimated as 54336:  log likelihood=-1159.36
AIC=2328.72   AICc=2329.1   BIC=2344.34

bind_rows(
  myarimaC %>% accuracy(),
  bestArima %>% accuracy(),
  myarimaC %>% forecast(h = 20) %>% accuracy(data),
  bestArima %>% forecast(h = 20) %>% accuracy(data)
)

# A tibble: 4 x 10
  .model              .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE     ACF1
  <chr>               <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>    <dbl>
1 Auto                Trai~  -32.7  223.  168. -2.51   7.74 0.717 0.752 -0.00871
2 ARIMA(Actual ~ pdq~ Trai~   27.8  225.  173.  0.151  7.79 0.736 0.759 -0.0867 
3 Auto                Test  -126.   219.  180. -7.23   9.36 0.764 0.740 -0.293  
4 ARIMA(Actual ~ pdq~ Test   -68.2  198.  162. -4.43   8.30 0.690 0.670 -0.316

# The auto-ARIMA performs slightly better on the training data, but the model 
# with the optimal pdq values performs significantly better on the test data.

Best Models

Auto ETS

# auto ETS
myets = train %>%
  model(
    ETS(Actual)
  )
report(myets)

Series: Actual 
Model: ETS(A,N,A) 
  Smoothing parameters:
    alpha = 0.07030721 
    gamma = 0.000100008 

  Initial states:
    l[0]      s[0]     s[-1]    s[-2]    s[-3]    s[-4]    s[-5]     s[-6]
 2588.34 -905.1391 -239.9334 454.6483 501.2507 634.4895 57.10333 -502.4194

  sigma^2:  51210.91

     AIC     AICc      BIC 
2829.260 2830.593 2860.964

myets %>% forecast(h=20) %>% autoplot(data) + ggtitle("ETS")

myets %>% gg_tsresiduals()

Best ARIMA

# best ARIMA
bestArima = train %>%
  model(
    ARIMA(Actual~ pdq(0,1,1))
  )
report(bestArima)

Series: Actual 
Model: ARIMA(0,1,1)(2,1,1)[7] 

Coefficients:
          ma1     sar1     sar2     sma1
      -0.9349  -0.0071  -0.1088  -0.8575
s.e.   0.0434   0.1051   0.0971   0.0779

sigma^2 estimated as 54336:  log likelihood=-1159.36
AIC=2328.72   AICc=2329.1   BIC=2344.34

bestArima %>% forecast(h=20) %>% autoplot(data) + ggtitle("ARIMA")

bestArima %>% gg_tsresiduals()

Best Model Evaluation

bind_rows(
  myets %>% accuracy(),
  bestArima %>% accuracy(),
  myets %>% forecast(h = 20) %>% accuracy(data),
  bestArima %>% forecast(h = 20) %>% accuracy(data)
)

# A tibble: 4 x 10
  .model               .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE    ACF1
  <chr>                <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
1 ETS(Actual)          Trai~  -20.1  220.  174. -1.93   7.92 0.739 0.744 -0.0540
2 ARIMA(Actual ~ pdq(~ Trai~   27.8  225.  173.  0.151  7.79 0.736 0.759 -0.0867
3 ETS(Actual)          Test  -100.   212.  171. -6.65   9.39 0.727 0.717 -0.266 
4 ARIMA(Actual ~ pdq(~ Test   -68.2  198.  162. -4.43   8.30 0.690 0.670 -0.316

# Wow. The ARIMA outperforms the ETS on the test set! 
# ETS wins on training set but test set and ARIMA take the win.

Final Evaluation and Decision

ets.fc = myets %>% forecast(h = 20)
arima.fc = bestArima %>% forecast(h = 20)
actual = tail(data, 20)

final.compare = bind_cols(actual$Date, actual$Actual, round(ets.fc$.mean), round(arima.fc$.mean))
names(final.compare) = c("Date", "Actual","ETS", "ARIMA")


ggplot(data = final.compare, mapping = aes(x = Date)) + 
  geom_line(aes(y = Actual), color = "blue") + 
  geom_point(aes(y = Actual), color = "blue") + 
  
  geom_line(aes(y = ETS), color = "red") + 
  geom_point(aes(y = ETS), color = "red") + 
  
  geom_line(aes(y = ARIMA), color = "darkgreen") + 
  geom_point(aes(y = ARIMA), color = "darkgreen") + 
  ggtitle("Actual (blue)  v  ETS (red)  v  ARIMA (green)")

ets.difference = sum(abs(final.compare$ETS - final.compare$Actual))
ets.difference

[1] 3414

arima.difference = sum(abs(final.compare$ARIMA - final.compare$Actual))
arima.difference

[1] 3242

It’s not necessarily apparent in the above graph but if the numbers are correct, then the ARIMA model achieves better performance than the ETS. The added sum of differences between both models predictions with the actual values, along with the residual histograms seen above show that less error is present in the ARIMA model making it the more accurate model. Due to this, the ARIMA model is the chosen model.

For future work, the same process used to find the best combination of pdq could be applied to finding the best PDQ combination. Since this technique found a model better than AutoARIMA, it may follow that further improvements to model accuracy would be observed.

Assignment: Time Series

Silas Selfe

2022-04-27