Week 3 Discussion: Building ETS Models

library(lubridate)

## Warning: package 'lubridate' was built under R version 3.6.2

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.6.2

library(forecast)

## Warning: package 'forecast' was built under R version 3.6.2

## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(dplyr)

## Warning: package 'dplyr' was built under R version 3.6.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)

USLEI <- read_csv("TheConferenceBoard_USLEI_Historical_Data.csv")

## Parsed with column specification:
## cols(
##   Date = col_date(format = ""),
##   CEI = col_double(),
##   LEI = col_double(),
##   LAG = col_double(),
##   `LEI Change` = col_character()
## )

str(USLEI)

## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 704 obs. of  5 variables:
##  $ Date      : Date, format: "1959-01-01" "1959-02-01" ...
##  $ CEI       : num  30.5 30.8 31 31.3 31.5 31.6 31.5 31.1 31.1 31.1 ...
##  $ LEI       : num  33.3 33.8 34.3 34.4 34.7 34.8 34.9 34.8 34.9 34.7 ...
##  $ LAG       : num  32.5 32.7 32.7 32.9 33.1 33.2 33.5 33.9 34.1 34.4 ...
##  $ LEI Change: chr  NA "1.5%" "1.5%" "0.3%" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Date = col_date(format = ""),
##   ..   CEI = col_double(),
##   ..   LEI = col_double(),
##   ..   LAG = col_double(),
##   ..   `LEI Change` = col_character()
##   .. )

USLEI$Date <- strptime(USLEI$Date, '%Y-%m-%d')
LEI_ts<-ts(USLEI$LEI,start=1959,frequency=12)

#First, let's review decomposition of LEI data, we can see that the seasonal component is relatively small compared to the other components. That's vey important because ETS model contains seasonal part.
aLEI<-decompose(LEI_ts,type='additive')
autoplot(aLEI)

mLEI<-decompose(LEI_ts,type='multiplicative')
autoplot(mLEI)

# Now I split the whole dataset into test and train dataset. .
LEI_ts_train = window(LEI_ts, end=c(2010,12))
LEI_ts_test = window(LEI_ts, start=c(2011,1))

# Here I set the all possible parameters, so we can compare the result to check which part of error, trend, and seasonal can make a difference.
# 
# I try 'AAM', 'ANM', 'AMA', but it comes out an error, says 'Forbidden model combination'
# So I try to list them all (2*3*3=18 types in total), and try one by one to see which combination is forbidden. Then, I got a table like this: (y means it can run, while n means it was forbidden)
# AAA(y),AAM(n),AAN(y),
# AMA(n),AMM(n),AMN(n),
# MAA(y),MAM(y),MAN(y),
# MMA(n),MMM(y),MMN(y),
# ANA(y),ANM(n),ANN(y),
# AMA(n),AMM(n),AMN(n).
model1 = ets(LEI_ts_train, model = "AAA")
model2 = ets(LEI_ts_train, model = 'AAN')
model3 = ets(LEI_ts_train, model = 'MAA')
model4 = ets(LEI_ts_train, model = 'MAM')
model5 = ets(LEI_ts_train, model = 'MAN')
model6 = ets(LEI_ts_train, model = 'MMM')
model7 = ets(LEI_ts_train, model = 'MMN')
model8 = ets(LEI_ts_train, model = 'ANA')
model9 = ets(LEI_ts_train, model = 'ANN')



# After training the model, let's use it to forecast
h = length(LEI_ts_test)
fc_model1 = forecast(model1,h=h)
fc_model2 = forecast(model2,h=h)
fc_model3 = forecast(model3,h=h)
fc_model4 = forecast(model4,h=h)
fc_model5 = forecast(model5,h=h)
fc_model6 = forecast(model6,h=h)
fc_model7 = forecast(model7,h=h)
fc_model8 = forecast(model8,h=h)
fc_model9 = forecast(model9,h=h)


# I try to plot them all, but it looks terrible. But we can see most of them are overlaping. There are three clear lines.
autoplot(LEI_ts)+
  autolayer(fc_model1, PI=FALSE, series='AAA')+
  autolayer(fc_model2, PI=FALSE, series='AAN')+
  autolayer(fc_model3, PI=FALSE, series='MAA')+
  autolayer(fc_model4, PI=FALSE, series='MAM')+
  autolayer(fc_model5, PI=FALSE, series='MAN')+
  autolayer(fc_model6, PI=FALSE, series='MMM')+
  autolayer(fc_model7, PI=FALSE, series='MMN')+
  autolayer(fc_model8, PI=FALSE, series='ANA')+
  autolayer(fc_model9, PI=FALSE, series='ANN')+
  guides(colour=guide_legend(title="Forecast"))

#

# I choose four lines of all to make the plot clear.
autoplot(LEI_ts)+
  autolayer(fc_model1, PI=FALSE, series='AAA')+
  autolayer(fc_model2, PI=FALSE, series='AAN')+
  autolayer(fc_model4, PI=FALSE, series='MAM')+
  autolayer(fc_model9, PI=FALSE, series='ANN')+
  guides(colour=guide_legend(title="Forecast"))

# Can see that 'AAA' line is similar to 'AAN' line, as we talk about, this data shows small seasonal fluctuations.
# After set trend to 'None', we get a horizontal line. Trend is the major driving force.
# It seems like 'MAM' do the best job to forecast the LEI. LEt's check with accuracy function.

accuracy(fc_model1, LEI_ts_test)

##                      ME      RMSE       MAE        MPE      MAPE
## Training set 0.03424355 0.4843364 0.3455683 0.06108413 0.4531384
## Test set     5.91051937 8.4238012 6.2717819 4.80255256 5.1408801
##                    MASE      ACF1 Theil's U
## Training set 0.08325636 0.3518343        NA
## Test set     1.51103476 0.9672891  14.99283

accuracy(fc_model2, LEI_ts_test)

##                      ME      RMSE       MAE        MPE      MAPE
## Training set 0.02283327 0.4356209 0.3071167 0.03817106 0.3991451
## Test set     5.71234878 8.9647420 6.9945865 4.57648185 5.7773934
##                    MASE       ACF1 Theil's U
## Training set 0.07399237 0.00393205        NA
## Test set     1.68517712 0.97075665  15.98341

accuracy(fc_model4, LEI_ts_test)

##                      ME      RMSE       MAE        MPE      MAPE
## Training set 0.02278924 0.5081947 0.3573354 0.04540405 0.4689812
## Test set     1.39393223 4.2019158 3.5439755 1.00455227 2.9863392
##                    MASE      ACF1 Theil's U
## Training set 0.08609136 0.4753417        NA
## Test set     0.85383554 0.9638407   7.62646

accuracy(fc_model9, LEI_ts_test)

##                      ME      RMSE        MAE        MPE       MAPE
## Training set  0.1118661  0.613196  0.4519601  0.1782275  0.5933541
## Test set     12.3163702 14.533232 12.3188672 10.2702336 10.2726579
##                   MASE      ACF1 Theil's U
## Training set 0.1088889 0.6219407        NA
## Test set     2.9679343 0.9627649  26.17947

# As we expect, 'MAM' is the best combination in the dataset, with the lowest mean error and lowest mean absolute scaled error!

Week 3 Discussion: Building ETS Models

Jiahao Lin

7/15/2020