HW 5 Exponential smoothing

library(fpp3)

## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --

## v tibble      3.1.4      v tsibble     1.0.1 
## v dplyr       1.0.7      v tsibbledata 0.3.0 
## v tidyr       1.1.3      v feasts      0.2.2 
## v lubridate   1.7.10     v fable       0.3.1 
## v ggplot2     3.3.5

## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()    masks base::date()
## x dplyr::filter()      masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval()  masks lubridate::interval()
## x dplyr::lag()         masks stats::lag()
## x tsibble::setdiff()   masks base::setdiff()
## x tsibble::union()     masks base::union()

library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages ---------------------------------------------- fpp2 2.4 --

## v forecast  8.15     v expsmooth 2.3 
## v fma       2.4

##

## 
## Attaching package: 'fpp2'

## The following object is masked from 'package:fpp3':
## 
##     insurance

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v readr   2.0.1     v stringr 1.4.0
## v purrr   0.3.4     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x tsibble::intersect()     masks lubridate::intersect(), base::intersect()
## x tsibble::interval()      masks lubridate::interval()
## x dplyr::lag()             masks stats::lag()
## x tsibble::setdiff()       masks lubridate::setdiff(), base::setdiff()
## x tsibble::union()         masks lubridate::union(), base::union()

library(dplyr)
library(latex2exp)
library(ggplot2)
library(fabletools)

Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0 , and generate forecasts for the next four months.

aus_livestock %>%
  distinct(Animal)

## # A tibble: 7 x 1
##   Animal                    
##   <fct>                     
## 1 Bulls, bullocks and steers
## 2 Calves                    
## 3 Cattle (excl. calves)     
## 4 Cows and heifers          
## 5 Lambs                     
## 6 Pigs                      
## 7 Sheep

aus_livestock %>%
  distinct(State)

## # A tibble: 8 x 1
##   State                       
##   <fct>                       
## 1 Australian Capital Territory
## 2 New South Wales             
## 3 Northern Territory          
## 4 Queensland                  
## 5 South Australia             
## 6 Tasmania                    
## 7 Victoria                    
## 8 Western Australia

head(aus_livestock)

## # A tsibble: 6 x 4 [1M]
## # Key:       Animal, State [1]
##      Month Animal                     State                        Count
##      <mth> <fct>                      <fct>                        <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory  2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory  2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory  2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory  1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory  2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory  1800

aus_livestock %>%
  filter(State=="Victoria" & Animal=="Pigs") %>%
  autoplot(Count)+ ggtitle("Pigs in Victoria")

Pigs<-aus_livestock %>%
  filter(State=="Victoria" & Animal=="Pigs")

FORECAST :

fit <- as_tsibble(Pigs) %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N")))

#fit1<-ses(Pigs,h=4)

fc<-fit %>%
  forecast(h = 4)

print(tidy(fit))

## # A tibble: 2 x 5
##   Animal State    .model                                         term   estimate
##   <fct>  <fct>    <chr>                                          <chr>     <dbl>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + se~ alpha   3.22e-1
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + se~ l[0]    1.01e+5

DISCUSSION:

Parameters

alpha=.322 l0=1.01e+5

fc %>%
  autoplot(Pigs) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit)) +
  labs(y="Count", title="Victoria  Pigs") +
  guides(colour = "none")

DISCUSSION:
Black line represents data, red line is one step ahead.

Compute a 95% prediction interval for the first forecast using y^±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

h = unpack_hilo(hilo(fc, 95) , "95%" )
View(h)
h[1,]

## # A tsibble: 1 x 8 [1M]
## # Key:       Animal, State, .model [1]
##   Animal State    .model    Month             Count  .mean `95%_lower` `95%_upper`
##   <fct>  <fct>    <chr>     <mth>            <dist>  <dbl>       <dbl>       <dbl>
## 1 Pigs   Victoria "ETS(~ 2019 Jan N(95187, 8.7e+07) 95187.      76855.     113518.

h$Count

## <distribution[4]>
## [1] N(95187, 8.7e+07) N(95187, 9.7e+07) N(95187, 1.1e+08) N(95187, 1.1e+08)

So the r generated interval is: [76855,113518]

Now, manually calculate.

# mean of 1st forecast is 95186.56
(m<-95186.56)

## [1] 95186.56

#get sigma
report(fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

#sigma^2:  87480760
(sigma<-84780760^.5)

## [1] 9207.647

#Lower bound
m-(1.96*sigma)

## [1] 77139.57

#upper bound
m+(1.96*sigma)

## [1] 113233.5

DISCUSSION: r generated 95% CI: [76855,113518] manual 95% CI: [77140,113234]

The two intervals are reasonably close. The manual CI bounds are closer together.

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

Plot the Exports series and discuss the main features of the data.

#Choose China
global_economy%>%distinct(Country)

## # A tibble: 263 x 1
##    Country            
##    <fct>              
##  1 Afghanistan        
##  2 Albania            
##  3 Algeria            
##  4 American Samoa     
##  5 Andorra            
##  6 Angola             
##  7 Antigua and Barbuda
##  8 Arab World         
##  9 Argentina          
## 10 Armenia            
## # ... with 253 more rows

China_economy<-global_economy%>%
  select("Year", Exports) %>%
  filter(Country=="China")
autoplot(China_economy)+ggtitle("Timeplot: China Export")

## Plot variable not specified, automatically selected `.vars = Exports`

DISCUSSION:

The Chinese exports appear to have increasing trend until 2006 with a possible correction and decrease

#Summary of US exports
summary(China_economy$Exports)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.492   4.357  13.048  14.117  20.748  36.035

#STL decomposition        

dcmp<-China_economy %>% model(stl=STL(Exports)) 
components(dcmp)

## # A dable: 58 x 7 [1Y]
## # Key:     Country, .model [1]
## # :        Exports = trend + remainder
##    Country .model  Year Exports trend remainder season_adjust
##    <fct>   <chr>  <dbl>   <dbl> <dbl>     <dbl>         <dbl>
##  1 China   stl     1960    4.31  4.21    0.100           4.31
##  2 China   stl     1961    3.87  4.09   -0.223           3.87
##  3 China   stl     1962    4.05  3.98    0.0704          4.05
##  4 China   stl     1963    4.01  3.87    0.139           4.01
##  5 China   stl     1964    3.77  3.75    0.0229          3.77
##  6 China   stl     1965    3.64  3.62    0.0154          3.64
##  7 China   stl     1966    3.49  3.48    0.0114          3.49
##  8 China   stl     1967    3.28  3.33   -0.0488          3.28
##  9 China   stl     1968    3.30  3.20    0.107           3.30
## 10 China   stl     1969    3.05  3.13   -0.0821          3.05
## # ... with 48 more rows

components(dcmp) %>% as_tsibble() %>%
  autoplot(Exports,colour="#D55E00")+
  geom_line(aes(y=trend), colour="gray")

  labs(y="Exports",title="Total China Exports")

## $y
## [1] "Exports"
## 
## $title
## [1] "Total China Exports"
## 
## attr(,"class")
## [1] "labels"

  #red line is data, gray is trend
  
components(dcmp) %>% autoplot()

DISCUSSION:

The trend increases then decreases, no seasonality, and remainder terms are ok.

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

fit_c1 <- as_tsibble(China_economy) %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))



fc_c1<-fit_c1 %>%
  forecast(h = 4)

print(tidy(fit_c1))

## # A tibble: 2 x 4
##   Country .model                                                  term  estimate
##   <fct>   <chr>                                                   <chr>    <dbl>
## 1 China   "ETS(Exports ~ error(\"A\") + trend(\"N\") + season(\"~ alpha     1.00
## 2 China   "ETS(Exports ~ error(\"A\") + trend(\"N\") + season(\"~ l[0]      4.31

DISCUSSION:

Parameters: alpha=.9998999 l[0]=4.3082414

fc_c1 %>%
  autoplot(China_economy) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit_c1)) +
  labs(y="Exports", title="China Exports") +
  guides(colour = "none")

Compute the RMSE values for the training data.

accuracy(fit_c1)

## # A tibble: 1 x 11
##   Country .model           .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <fct>   <chr>            <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China   "ETS(Exports ~ ~ Trai~ 0.266  1.90  1.26  1.84  9.34 0.983 0.991 0.288

DISCUSSION:

RSME=1.900172

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

fit_c2 <- as_tsibble(China_economy) %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))



fc_c2<-fit_c2 %>%
  forecast(h = 4)

print(tidy(fit_c2))

## # A tibble: 4 x 4
##   Country .model                                                  term  estimate
##   <fct>   <chr>                                                   <chr>    <dbl>
## 1 China   "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"~ alpha   1.00  
## 2 China   "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"~ beta    0.0992
## 3 China   "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"~ l[0]    4.04  
## 4 China   "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"~ b[0]    0.100

DISCUSSION:

Parameters of ETS(A, A, N) alpha=0.99989993 beta=0.09923760 l0=4.03989974
b0=0.09995689

The second model: ETS(A,A,N) produces a forecast with a downward trend, while the ETS(A,N,N) produces a level unchanging forecast.

ETS(A,A,N) accounts for trend but may overforecast. ETS(A,N,N) may not be sophisticated enough to capture the characteristics.

Compare the forecasts from both methods. Which do you think is best?

fc_c2 %>%
  autoplot(China_economy) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit_c1)) +
  labs(y="Exports", title="China Exports") +
  guides(colour = "none")

DISCUSSION:

The second model: ETS(A,A,N) produces a forecast with a downward trend, while the ETS(A,N,N) produces a level unchanging forecast.

Let’s look at RMSE:

accuracy(fit_c2)

## # A tibble: 1 x 11
##   Country .model        .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <fct>   <chr>         <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China   "ETS(Exports~ Trai~ -0.0854  1.91  1.25 -0.169  9.57 0.973 0.995 0.232

DISCUSSION:

RMSE=1.906436

the first model RMSE=1.900172

This indicates the first model is better. MAPE also indicates the first model is better.

Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

h_c1 = unpack_hilo(hilo(fc_c1, 95) , "95%" )
View(h_c1)
h_c1[1,]

## # A tsibble: 1 x 7 [1Y]
## # Key:       Country, .model [1]
##   Country .model                   Year    Exports .mean `95%_lower` `95%_upper`
##   <fct>   <chr>                   <dbl>     <dist> <dbl>       <dbl>       <dbl>
## 1 China   "ETS(Exports ~ error(\~  2018 N(20, 3.7)  19.8        16.0        23.5

h_c1$Exports

## <distribution[4]>
## [1] N(20, 3.7) N(20, 7.5) N(20, 11)  N(20, 15)

So the r generated interval is: [15.96718,23.54756]

Now, manually calculate.

# mean of 1st forecast is 19.75737

(m_c1<-19.75737)

## [1] 19.75737

#get sigma
report(fit_c1)

## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
## 
##   Initial states:
##      l[0]
##  4.308241
## 
##   sigma^2:  3.7396
## 
##      AIC     AICc      BIC 
## 315.9713 316.4157 322.1526

#sigma^2:  3.7396
(sigma_c1<-3.7396^.5)

## [1] 1.933805

#Lower bound
m_c1-(1.96*sigma_c1)

## [1] 15.96711

#upper bound
m_c1+(1.96*sigma_c1)

## [1] 23.54763

DISCUSSION: ETS[A,N,N] r generated 95% CI: [15.96718,23.54756] manual 95% CI: [15.96711,23.54763]

The two intervals are very close.

Now calculate 95% for the second model ETS[A,A,N]

h_c2 = unpack_hilo(hilo(fc_c2, 95) , "95%" )
View(h_c2)
h_c2[1,]

## # A tsibble: 1 x 7 [1Y]
## # Key:       Country, .model [1]
##   Country .model                   Year    Exports .mean `95%_lower` `95%_upper`
##   <fct>   <chr>                   <dbl>     <dist> <dbl>       <dbl>       <dbl>
## 1 China   "ETS(Exports ~ error(\~  2018 N(19, 3.9)  19.4        15.5        23.2

h_c2$Exports

## <distribution[4]>
## [1] N(19, 3.9) N(19, 8.6) N(19, 14)  N(18, 21)

So the r generated interval is: [15.493100,23.23803]

Now, manually calculate.

# mean of 1st forecast is 19.36556

(m_c2<-19.36556)

## [1] 19.36556

#get sigma
report(fit_c2)

## Series: Exports 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
##     beta  = 0.0992376 
## 
##   Initial states:
##    l[0]       b[0]
##  4.0399 0.09995689
## 
##   sigma^2:  3.9037
## 
##      AIC     AICc      BIC 
## 320.3530 321.5069 330.6552

#sigma^2:  3.9037
(sigma_c2<-3.9037^.5)

## [1] 1.975778

#Lower bound
m_c2-(1.96*sigma_c2)

## [1] 15.49303

#upper bound
m_c2+(1.96*sigma_c2)

## [1] 23.23809

DISCUSSION: ETS[A,A,N] r generated 95% CI: [15.493100,23.23803] manual 95% CI: [15.49303,23.23809]

The two intervals are very close.

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

global_economy%>%distinct(Country)

## # A tibble: 263 x 1
##    Country            
##    <fct>              
##  1 Afghanistan        
##  2 Albania            
##  3 Algeria            
##  4 American Samoa     
##  5 Andorra            
##  6 Angola             
##  7 Antigua and Barbuda
##  8 Arab World         
##  9 Argentina          
## 10 Armenia            
## # ... with 253 more rows

China_economy<-global_economy%>%
  select("Year", GDP) %>%
  filter(Country=="China")
autoplot(China_economy)+ggtitle("Timeplot: China GDP")

## Plot variable not specified, automatically selected `.vars = GDP`

DISCUSSION:

plot indicates level to exponential growth.

Let’s take a closer peek

#Summary of US exports
summary(China_economy$Exports)

## Warning: Unknown or uninitialised column: `Exports`.

## Length  Class   Mode 
##      0   NULL   NULL

#STL decomposition        

dcmp2<-China_economy %>% model(stl=STL(GDP)) 
components(dcmp2)

## # A dable: 58 x 7 [1Y]
## # Key:     Country, .model [1]
## # :        GDP = trend + remainder
##    Country .model  Year          GDP        trend    remainder season_adjust
##    <fct>   <chr>  <dbl>        <dbl>        <dbl>        <dbl>         <dbl>
##  1 China   stl     1960 59716467625. 49854129407.  9862338218.  59716467625.
##  2 China   stl     1961 50056868958. 52325315607. -2268446650.  50056868958.
##  3 China   stl     1962 47209359006. 54796501807. -7587142802.  47209359006.
##  4 China   stl     1963 50706799903. 57892689218. -7185889315.  50706799903.
##  5 China   stl     1964 59708343489. 61493201134. -1784857646.  59708343489.
##  6 China   stl     1965 70436266147. 65616214610.  4820051537.  70436266147.
##  7 China   stl     1966 76720285970. 70105946202.  6614339768.  76720285970.
##  8 China   stl     1967 72881631327. 74486778685. -1605147358.  72881631327.
##  9 China   stl     1968 70846535056. 79495425860. -8648890805.  70846535056.
## 10 China   stl     1969 79705906247. 86406865221. -6700958973.  79705906247.
## # ... with 48 more rows

components(dcmp2) %>% as_tsibble() %>%
  autoplot(GDP,colour="#D55E00")+
  geom_line(aes(y=trend), colour="gray")

  labs(y="GDP",title="Total China GDP")

## $y
## [1] "GDP"
## 
## $title
## [1] "Total China GDP"
## 
## attr(,"class")
## [1] "labels"

  #red line is data, gray is trend
  
components(dcmp2) %>% autoplot()

Trend is upward.

Modelling as follows:

China_economy %>%
  model(
    `Holt's method` = ETS(GDP ~ error("A") +
                       trend("A") + season("N")),
    `Damped Holt's method` = ETS(GDP ~ error("A") +
                       trend("Ad", phi = 0.9) + season("N"))
  ) %>%
  forecast(h = 15) %>%
  autoplot(China_economy, level = NULL) +
  labs(title = "China GDP",
       y = "GDP") +
  guides(colour = guide_legend(title = "Forecast"))

DISCUSSION:

The forescasted by Holt’s method may over-forecast as seen in the plot because it is a constant increasing trend in this example.

The Damped Holt’s method brings in a parameter that dampens or flattens the trend. Let’s expand the forecast period.

China_economy %>%
  model(
    `Holt's method` = ETS(GDP ~ error("A") +
                       trend("A") + season("N")),
    `Damped Holt's method` = ETS(GDP ~ error("A") +
                       trend("Ad", phi = 0.9) + season("N"))
  ) %>%
  forecast(h = 30) %>%
  autoplot(China_economy, level = NULL) +
  labs(title = "China GDP",
       y = "GDP") +
  guides(colour = guide_legend(title = "Forecast"))

DISCUSSION:

With a forecast period of h=30, clearly Holt’s method and Damped Holt’s method diverge. Holt’s method continues to increase, while Damped Holt’s method flattens.

Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

aus_production %>%
  
  autoplot(Gas)+ ggtitle("Gas in Australia")

Plot appears to have increase trend, let’s decompose.

#STL decomposition        

dcmp_g<-aus_production %>% model(stl=STL(Gas)) 
components(dcmp_g)

## # A dable: 218 x 7 [1Q]
## # Key:     .model [1]
## # :        Gas = trend + season_year + remainder
##    .model Quarter   Gas trend season_year remainder season_adjust
##    <chr>    <qtr> <dbl> <dbl>       <dbl>     <dbl>         <dbl>
##  1 stl    1956 Q1     5  5.85      -1.18     0.334           6.18
##  2 stl    1956 Q2     6  5.93       0.506   -0.439           5.49
##  3 stl    1956 Q3     7  6.02       1.10    -0.118           5.90
##  4 stl    1956 Q4     6  6.14      -0.428    0.286           6.43
##  5 stl    1957 Q1     5  6.27      -1.18    -0.0863          6.18
##  6 stl    1957 Q2     7  6.25       0.511    0.242           6.49
##  7 stl    1957 Q3     7  6.24       1.10    -0.345           5.90
##  8 stl    1957 Q4     6  6.24      -0.434    0.191           6.43
##  9 stl    1958 Q1     5  6.37      -1.18    -0.185           6.18
## 10 stl    1958 Q2     7  6.50       0.516   -0.0185          6.48
## # ... with 208 more rows

components(dcmp_g) %>% as_tsibble() %>%
  autoplot(Gas,colour="#D55E00")+
  geom_line(aes(y=trend), colour="gray")

  labs(y="Gas",title="Total Gas")

## $y
## [1] "Gas"
## 
## $title
## [1] "Total Gas"
## 
## attr(,"class")
## [1] "labels"

  #red line is data, gray is trend
  
components(dcmp_g) %>% autoplot()

DISCUSSION:

This plot shows there is increasing trend, seasonality, and increasing remainders.

fit_gas1<-aus_production %>%
  model(
    additive = ETS(Gas ~ error("A") +
                       trend("A") + season("A")),
    multiplicative = ETS(Gas ~ error("A") +
                       trend("A") + season("M"))
  ) 
fc_gas1<-  fit_gas1 %>%
  forecast(h = 15) 

fc_gas1%>%
  autoplot(aus_production, level = NULL) +
  labs(title = "Australian Gas production",
       y = "Gas") +
  guides(colour = guide_legend(title = "Forecast"))

DISCUSSION:

The additive method is preferred when seasonal variations are roughly constant through the series, while the multiplicative method is preferred when the seasonal variations are changing proportional to the level of the series. Here they are increasing. Let’s look at RMSE:

accuracy(fit_gas1)

## # A tibble: 2 x 10
##   .model         .type         ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
##   <chr>          <chr>      <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 additive       Training 0.00525  4.76  3.35 -4.69  10.9  0.600 0.628 0.0772
## 2 multiplicative Training 0.218    4.19  2.84 -0.920  5.03 0.510 0.553 0.0405

DISCUSSION:

The additive model RMSE=4.76. The multiplicative model RMSE=4.19.

Therefore, the multiplicative model is better.

Let’s try a damped trend next.

fit_gas2<-aus_production %>%
  model(
    `Damped Holt's method` = ETS(Gas ~ error("A") +
                       trend("Ad", phi = 0.9) + season("M")),
    multiplicative = ETS(Gas ~ error("A") +
                       trend("A") + season("M"))
  ) 
fc_gas2<-  fit_gas2 %>%
  forecast(h = 15) 

fc_gas2%>%
  autoplot(aus_production, level = NULL) +
  labs(title = "Australian Gas production",
       y = "Gas") +
  guides(colour = guide_legend(title = "Forecast"))

Let’s take a look at the RMSE

accuracy(fit_gas2)

## # A tibble: 2 x 10
##   .model               .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
##   <chr>                <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Damped Holt's method Training 0.796  4.28  2.86  1.46   4.08 0.513 0.564 0.0134
## 2 multiplicative       Training 0.218  4.19  2.84 -0.920  5.03 0.510 0.553 0.0405

DISCUSSION:

Damped RMSE=4.28 Multiplicative RMSE=4.19

This indicates the Multiplicate model is better than the damped model. The damped model does not improve the forescasts.

Recall your retail time series data (from Exercise 8 in Section 2.10).

#Here is prior look at this series

set.seed(1111111)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries %>% distinct(Industry)

## # A tibble: 1 x 1
##   Industry                    
##   <chr>                       
## 1 Newspaper and book retailing

head(myseries)

## # A tsibble: 6 x 5 [1M]
## # Key:       State, Industry [1]
##   State             Industry                     `Series ID`    Month Turnover
##   <chr>             <chr>                        <chr>          <mth>    <dbl>
## 1 Western Australia Newspaper and book retailing A3349822A   1982 Apr      9.7
## 2 Western Australia Newspaper and book retailing A3349822A   1982 May     11  
## 3 Western Australia Newspaper and book retailing A3349822A   1982 Jun     10.7
## 4 Western Australia Newspaper and book retailing A3349822A   1982 Jul      9  
## 5 Western Australia Newspaper and book retailing A3349822A   1982 Aug      9.1
## 6 Western Australia Newspaper and book retailing A3349822A   1982 Sep     10

autoplot(myseries,.vars=Turnover)

A quick look at this timeseries shows an upward trend, with possible yearly seasonality and also possible a 10 year cyclicity. Let’s take a closer look at seasonality. Note the downward trend in 2010.

Lets decompose

#STL decomposition        

dcmp_my<-myseries %>% model(stl=STL(Turnover)) 
components(dcmp_my)

## # A dable: 441 x 9 [1M]
## # Key:     State, Industry, .model [1]
## # :        Turnover = trend + season_year + remainder
##    State             Industry .model    Month Turnover trend season_year remainder
##    <chr>             <chr>    <chr>     <mth>    <dbl> <dbl>       <dbl>     <dbl>
##  1 Western Australia Newspap~ stl    1982 Apr      9.7 10.7      -0.991    0.00593
##  2 Western Australia Newspap~ stl    1982 May     11   10.4       0.260    0.318  
##  3 Western Australia Newspap~ stl    1982 Jun     10.7 10.2      -0.815    1.36   
##  4 Western Australia Newspap~ stl    1982 Jul      9    9.90     -0.631   -0.266  
##  5 Western Australia Newspap~ stl    1982 Aug      9.1  9.66      0.0367  -0.597  
##  6 Western Australia Newspap~ stl    1982 Sep     10    9.42     -0.712    1.29   
##  7 Western Australia Newspap~ stl    1982 Oct      7.7  9.19     -0.496   -0.991  
##  8 Western Australia Newspap~ stl    1982 Nov      8.4  8.97     -0.165   -0.401  
##  9 Western Australia Newspap~ stl    1982 Dec     11.8  8.75      4.38    -1.32   
## 10 Western Australia Newspap~ stl    1983 Jan      7.4  8.52     -0.0943  -1.03   
## # ... with 431 more rows, and 1 more variable: season_adjust <dbl>

components(dcmp_my) %>% as_tsibble() %>%
  autoplot(Turnover,colour="#D55E00")+
  geom_line(aes(y=trend), colour="gray")

  labs(y="Gas",title="Total Gas")

## $y
## [1] "Gas"
## 
## $title
## [1] "Total Gas"
## 
## attr(,"class")
## [1] "labels"

  #red line is data, gray is trend
  
components(dcmp_my) %>% autoplot()

Why is multiplicative seasonality necessary for this series?

DISCUSSION:

As seen from the decomposition above, the seasonal variations are changing proportionally to the level of the series.

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

fit_my<-myseries %>%
  model(
    `Damped Holt's method` = ETS(Turnover ~ error("A") +
                       trend("Ad", phi = 0.9) + season("M")),
    multiplicative = ETS(Turnover ~ error("A") +
                       trend("A") + season("M"))
  ) 
fc_my<-  fit_my %>%
  forecast(h = 15) 

fc_my%>%
  autoplot(myseries, level = NULL) +
  labs(title = "Newspaper Turnover ",
       y = "Turnover") +
  guides(colour = guide_legend(title = "Forecast"))

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

accuracy(fit_my)

## # A tibble: 2 x 12
##   State   Industry   .model  .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE
##   <chr>   <chr>      <chr>   <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Wester~ Newspaper~ Damped~ Trai~  0.0845   2.50  1.75 -0.113  6.91 0.421 0.441
## 2 Wester~ Newspaper~ multip~ Trai~ -0.00751  2.49  1.74 -0.462  6.89 0.418 0.440
## # ... with 1 more variable: ACF1 <dbl>

DISCUSSION:

Damped RSME=2.498 Multiplicative=2.49

so Multiplicative is better.

Check that the residuals from the best method look like white noise.

aug<-myseries %>%
  model(
    ETS(Turnover ~ error("A") +
                       trend("A") + season("M"))
  )%>%
  augment()

autoplot(aug,.innov)

Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

train2011 <- myseries %>% filter(year(Month) < 2011 )
autoplot(train2011)

## Plot variable not specified, automatically selected `.vars = Turnover`

fit_e<-train2011 %>%
  model(
    `SNAIVE` = SNAIVE(Turnover),
    multiplicative = ETS(Turnover ~ error("A") +
                       trend("A") + season("M"))
  ) 
fc_e<-  fit_e %>%
  forecast(h = 15) 

fc_e%>%
  autoplot(train2011, level = NULL) +
  labs(title = "Newspaper Turnover ",
       y = "Turnover") +
  guides(colour = guide_legend(title = "Forecast"))

RMSE

accuracy(fit_e)

## # A tibble: 2 x 12
##   State   Industry    .model  .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE
##   <chr>   <chr>       <chr>   <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Wester~ Newspaper ~ SNAIVE  Trai~  0.838   5.67  4.09  2.49  16.0  1     1    
## 2 Wester~ Newspaper ~ multip~ Trai~ -0.0426  2.24  1.59 -0.705  6.92 0.389 0.395
## # ... with 1 more variable: ACF1 <dbl>

DISCUSSION:

SNAIVE RMSE= 5.67 Multiplicative RMSE= 2.24

Multiplicative is better than SNAIVE.

For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

(lambda = train2011 %>% features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero) )

## [1] 0.05073177

train2011$bc<-box_cox(train2011$Turnover, lambda)

#STL decomposition        

dcmp_mybc<-train2011 %>% model(stl=STL(bc)) 
components(dcmp_mybc)

## # A dable: 345 x 9 [1M]
## # Key:     State, Industry, .model [1]
## # :        bc = trend + season_year + remainder
##    State             Industry  .model    Month    bc trend season_year remainder
##    <chr>             <chr>     <chr>     <mth> <dbl> <dbl>       <dbl>     <dbl>
##  1 Western Australia Newspape~ stl    1982 Apr  2.41  2.51    -0.0904   -0.0122 
##  2 Western Australia Newspape~ stl    1982 May  2.55  2.48     0.0307    0.0395 
##  3 Western Australia Newspape~ stl    1982 Jun  2.52  2.45    -0.0756    0.146  
##  4 Western Australia Newspape~ stl    1982 Jul  2.32  2.42    -0.0534   -0.0392 
##  5 Western Australia Newspape~ stl    1982 Aug  2.34  2.39     0.0123   -0.0638 
##  6 Western Australia Newspape~ stl    1982 Sep  2.44  2.36    -0.0570    0.140  
##  7 Western Australia Newspape~ stl    1982 Oct  2.15  2.33    -0.0455   -0.135  
##  8 Western Australia Newspape~ stl    1982 Nov  2.25  2.30    -0.00350  -0.0527 
##  9 Western Australia Newspape~ stl    1982 Dec  2.63  2.28     0.347     0.00519
## 10 Western Australia Newspape~ stl    1983 Jan  2.11  2.25    -0.00727  -0.136  
## # ... with 335 more rows, and 1 more variable: season_adjust <dbl>

components(dcmp_mybc) %>% as_tsibble() %>%
  autoplot(bc,colour="#D55E00")+
  geom_line(aes(y=trend), colour="gray")

  labs(y="box coxTurnover",title="Total Turnover")

## $y
## [1] "box coxTurnover"
## 
## $title
## [1] "Total Turnover"
## 
## attr(,"class")
## [1] "labels"

  #red line is data, gray is trend
  
components(dcmp_mybc) %>% autoplot()

We have completed the boxcox transformation and plots.

Now run models:

fit_bc<-train2011 %>%
  model(ETS(bc ~ error("A") +
                       trend("A") + season("M")))

fc_bc<-  fit_bc %>%
  forecast(h = 15) 

fc_bc%>%
  autoplot(train2011, level = NULL) +
  labs(title = "Newspaper Turnover ",
       y = "Turnover transformed") +
  guides(colour = guide_legend(title = "Forecast"))

accuracy(fit_bc
        )

## # A tibble: 1 x 12
##   State  Industry  .model   .type      ME  RMSE    MAE     MPE  MAPE  MASE RMSSE
##   <chr>  <chr>     <chr>    <chr>   <dbl> <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 Weste~ Newspape~ "ETS(bc~ Trai~ 0.00127 0.105 0.0788 -0.0171  2.45 0.406 0.435
## # ... with 1 more variable: ACF1 <dbl>

The RMSE for this box cox transformed Turnover = .147925 which is the best of all previous forecast on the test sets.

HW 5 Exponential smoothing

Lisa Szydziak

10/6/2021