library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.2
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
library(forecast)
## Warning: package 'forecast' was built under R version 3.6.2
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
USLEI <- read_csv("TheConferenceBoard_USLEI_Historical_Data.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## CEI = col_double(),
## LEI = col_double(),
## LAG = col_double(),
## `LEI Change` = col_character()
## )
str(USLEI)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 704 obs. of 5 variables:
## $ Date : Date, format: "1959-01-01" "1959-02-01" ...
## $ CEI : num 30.5 30.8 31 31.3 31.5 31.6 31.5 31.1 31.1 31.1 ...
## $ LEI : num 33.3 33.8 34.3 34.4 34.7 34.8 34.9 34.8 34.9 34.7 ...
## $ LAG : num 32.5 32.7 32.7 32.9 33.1 33.2 33.5 33.9 34.1 34.4 ...
## $ LEI Change: chr NA "1.5%" "1.5%" "0.3%" ...
## - attr(*, "spec")=
## .. cols(
## .. Date = col_date(format = ""),
## .. CEI = col_double(),
## .. LEI = col_double(),
## .. LAG = col_double(),
## .. `LEI Change` = col_character()
## .. )
USLEI$Date <- strptime(USLEI$Date, '%Y-%m-%d')
LEI_ts<-ts(USLEI$LEI,start=1959,frequency=12)
#First, let's review decomposition of LEI data, we can see that the seasonal component is relatively small compared to the other components. That's vey important because ETS model contains seasonal part.
aLEI<-decompose(LEI_ts,type='additive')
autoplot(aLEI)

mLEI<-decompose(LEI_ts,type='multiplicative')
autoplot(mLEI)

# Now I split the whole dataset into test and train dataset. .
LEI_ts_train = window(LEI_ts, end=c(2010,12))
LEI_ts_test = window(LEI_ts, start=c(2011,1))
# Here I set the all possible parameters, so we can compare the result to check which part of error, trend, and seasonal can make a difference.
#
# I try 'AAM', 'ANM', 'AMA', but it comes out an error, says 'Forbidden model combination'
# So I try to list them all (2*3*3=18 types in total), and try one by one to see which combination is forbidden. Then, I got a table like this: (y means it can run, while n means it was forbidden)
# AAA(y),AAM(n),AAN(y),
# AMA(n),AMM(n),AMN(n),
# MAA(y),MAM(y),MAN(y),
# MMA(n),MMM(y),MMN(y),
# ANA(y),ANM(n),ANN(y),
# AMA(n),AMM(n),AMN(n).
model1 = ets(LEI_ts_train, model = "AAA")
model2 = ets(LEI_ts_train, model = 'AAN')
model3 = ets(LEI_ts_train, model = 'MAA')
model4 = ets(LEI_ts_train, model = 'MAM')
model5 = ets(LEI_ts_train, model = 'MAN')
model6 = ets(LEI_ts_train, model = 'MMM')
model7 = ets(LEI_ts_train, model = 'MMN')
model8 = ets(LEI_ts_train, model = 'ANA')
model9 = ets(LEI_ts_train, model = 'ANN')
# After training the model, let's use it to forecast
h = length(LEI_ts_test)
fc_model1 = forecast(model1,h=h)
fc_model2 = forecast(model2,h=h)
fc_model3 = forecast(model3,h=h)
fc_model4 = forecast(model4,h=h)
fc_model5 = forecast(model5,h=h)
fc_model6 = forecast(model6,h=h)
fc_model7 = forecast(model7,h=h)
fc_model8 = forecast(model8,h=h)
fc_model9 = forecast(model9,h=h)
# I try to plot them all, but it looks terrible. But we can see most of them are overlaping. There are three clear lines.
autoplot(LEI_ts)+
autolayer(fc_model1, PI=FALSE, series='AAA')+
autolayer(fc_model2, PI=FALSE, series='AAN')+
autolayer(fc_model3, PI=FALSE, series='MAA')+
autolayer(fc_model4, PI=FALSE, series='MAM')+
autolayer(fc_model5, PI=FALSE, series='MAN')+
autolayer(fc_model6, PI=FALSE, series='MMM')+
autolayer(fc_model7, PI=FALSE, series='MMN')+
autolayer(fc_model8, PI=FALSE, series='ANA')+
autolayer(fc_model9, PI=FALSE, series='ANN')+
guides(colour=guide_legend(title="Forecast"))

#
# I choose four lines of all to make the plot clear.
autoplot(LEI_ts)+
autolayer(fc_model1, PI=FALSE, series='AAA')+
autolayer(fc_model2, PI=FALSE, series='AAN')+
autolayer(fc_model4, PI=FALSE, series='MAM')+
autolayer(fc_model9, PI=FALSE, series='ANN')+
guides(colour=guide_legend(title="Forecast"))

# Can see that 'AAA' line is similar to 'AAN' line, as we talk about, this data shows small seasonal fluctuations.
# After set trend to 'None', we get a horizontal line. Trend is the major driving force.
# It seems like 'MAM' do the best job to forecast the LEI. LEt's check with accuracy function.
accuracy(fc_model1, LEI_ts_test)
## ME RMSE MAE MPE MAPE
## Training set 0.03424355 0.4843364 0.3455683 0.06108413 0.4531384
## Test set 5.91051937 8.4238012 6.2717819 4.80255256 5.1408801
## MASE ACF1 Theil's U
## Training set 0.08325636 0.3518343 NA
## Test set 1.51103476 0.9672891 14.99283
accuracy(fc_model2, LEI_ts_test)
## ME RMSE MAE MPE MAPE
## Training set 0.02283327 0.4356209 0.3071167 0.03817106 0.3991451
## Test set 5.71234878 8.9647420 6.9945865 4.57648185 5.7773934
## MASE ACF1 Theil's U
## Training set 0.07399237 0.00393205 NA
## Test set 1.68517712 0.97075665 15.98341
accuracy(fc_model4, LEI_ts_test)
## ME RMSE MAE MPE MAPE
## Training set 0.02278924 0.5081947 0.3573354 0.04540405 0.4689812
## Test set 1.39393223 4.2019158 3.5439755 1.00455227 2.9863392
## MASE ACF1 Theil's U
## Training set 0.08609136 0.4753417 NA
## Test set 0.85383554 0.9638407 7.62646
accuracy(fc_model9, LEI_ts_test)
## ME RMSE MAE MPE MAPE
## Training set 0.1118661 0.613196 0.4519601 0.1782275 0.5933541
## Test set 12.3163702 14.533232 12.3188672 10.2702336 10.2726579
## MASE ACF1 Theil's U
## Training set 0.1088889 0.6219407 NA
## Test set 2.9679343 0.9627649 26.17947
# As we expect, 'MAM' is the best combination in the dataset, with the lowest mean error and lowest mean absolute scaled error!