Data624 Hw 5

Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.

p <- aus_livestock %>% 
  filter(Animal == 'Pigs' & State == 'Victoria')

pigs <- p %>%
  autoplot(Count) +
  labs(title = 'Timeseries')
pigs

a. Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0 , and generate forecasts for the next four months.

fit <- p%>%
  model(ses = ETS(Count ~ error('A') + trend('N') + season('N')))
opt_val <- fit %>% report()

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

pigsforecast <- fit %>%
  forecast(h = 4)
pigsforecast

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model    Month             Count  .mean
##   <fct>  <fct>    <chr>     <mth>            <dist>  <dbl>
## 1 Pigs   Victoria ses    2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria ses    2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria ses    2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria ses    2019 Apr N(95187, 1.1e+08) 95187.

plot <- fit %>%
  forecast(h = 4) %>%
  autoplot(filter(p, Month >= yearmonth('2016 Jan'))) +
  labs(title = 'Four Month Forecast')
plot

Compute a 95% prediction interval for the first forecast using y^± 1.96 s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.

y<- pigsforecast %>%
  pull(Count) %>%
  head(1)


sD <- augment(fit) %>%
  pull(.resid) %>%
  sd()

# Calculate the lower and upper confidence intervals. 
lowerCi <- y - 1.96 * sD
upperCi <- y + 1.96 * sD
z <- c(lowerCi, upperCi)
names(z) <- c('Lower', 'Upper')
z

## <distribution[2]>
##              Lower              Upper 
##  N(76871, 8.7e+07) N(113502, 8.7e+07)

The 95% prediction interval for the first forecast is from 76871 to 113502.

hilo(pigsforecast$Count, 95)

## <hilo[4]>
## [1] [76854.79, 113518.3]95 [75927.17, 114445.9]95 [75042.22, 115330.9]95
## [4] [74194.54, 116178.6]95

The intervals calculated by R function are slightly different.

Data set global_economy contains the annual Exports from many countries. Select one country to analyse.

a.Plot the Exports series and discuss the main features of the data.

pol <- global_economy %>%
  filter(Country == 'Poland')


head(pol,15)

## # A tsibble: 15 x 9 [1Y]
## # Key:       Country [1]
##    Country Code   Year   GDP Growth     CPI Imports Exports Population
##    <fct>   <fct> <dbl> <dbl>  <dbl>   <dbl>   <dbl>   <dbl>      <dbl>
##  1 Poland  POL    1960    NA     NA NA           NA      NA   29637450
##  2 Poland  POL    1961    NA     NA NA           NA      NA   29964000
##  3 Poland  POL    1962    NA     NA NA           NA      NA   30308500
##  4 Poland  POL    1963    NA     NA NA           NA      NA   30712000
##  5 Poland  POL    1964    NA     NA NA           NA      NA   31139450
##  6 Poland  POL    1965    NA     NA NA           NA      NA   31444950
##  7 Poland  POL    1966    NA     NA NA           NA      NA   31681000
##  8 Poland  POL    1967    NA     NA NA           NA      NA   31987155
##  9 Poland  POL    1968    NA     NA NA           NA      NA   32294655
## 10 Poland  POL    1969    NA     NA NA           NA      NA   32548300
## 11 Poland  POL    1970    NA     NA  0.0210      NA      NA   32664300
## 12 Poland  POL    1971    NA     NA  0.0213      NA      NA   32783500
## 13 Poland  POL    1972    NA     NA  0.0213      NA      NA   33055650
## 14 Poland  POL    1973    NA     NA  0.0218      NA      NA   33357200
## 15 Poland  POL    1974    NA     NA  0.0233      NA      NA   33678899

pol %>%
  autoplot(Exports) +
  labs(y="% of GDP", title="Exports: Poland") +
  guides(colour = "none")

## Warning: Removed 35 row(s) containing missing values (geom_path).

It looks like the export in Poland started after 1989 after the economic reform by Balcerowicz, where the reform had a temporary dropped but it boomed around 2000.

b.Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

pol1 <- na.omit(pol) 


fit <- pol1 %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))

report(fit)

## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
## 
##   Initial states:
##      l[0]
##  22.96177
## 
##   sigma^2:  4.6429
## 
##      AIC     AICc      BIC 
## 111.3369 112.6000 114.7434

fc <- fit %>%
  forecast(h = 5)

fc%>%
  autoplot(pol) +
  labs(y="% of GDP", title="Exports: Poland") +
  guides(colour = "none")

## Warning: Removed 35 row(s) containing missing values (geom_path).

fc

## # A fable: 5 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country .model                                           Year    Exports .mean
##   <fct>   <chr>                                           <dbl>     <dist> <dbl>
## 1 Poland  "ETS(Exports ~ error(\"A\") + trend(\"N\") + s~  2018 N(54, 4.6)  54.3
## 2 Poland  "ETS(Exports ~ error(\"A\") + trend(\"N\") + s~  2019 N(54, 9.3)  54.3
## 3 Poland  "ETS(Exports ~ error(\"A\") + trend(\"N\") + s~  2020  N(54, 14)  54.3
## 4 Poland  "ETS(Exports ~ error(\"A\") + trend(\"N\") + s~  2021  N(54, 19)  54.3
## 5 Poland  "ETS(Exports ~ error(\"A\") + trend(\"N\") + s~  2022  N(54, 23)  54.3

Above plot shows the forecast for the next 5 years. The forecast value is 54.3(%).

Compute the RMSE values for the training data.

acc <- accuracy(fit)
acc

## # A tibble: 1 x 11
##   Country .model          .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <fct>   <chr>           <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Poland  "ETS(Exports ~~ Trai~  1.36  2.06  1.72  3.55  4.88 0.957 0.978 -0.112

The RMSE of the training data for the ETS(A,N,N) model is 2.05.

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

 fit2 <- pol1 %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))

acc2 <- accuracy(fit2)
acc2

## # A tibble: 1 x 11
##   Country .model       .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
##   <fct>   <chr>        <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Poland  "ETS(Export~ Trai~ 6.74e-4  1.47  1.19 -0.462  3.70 0.663 0.700 0.0291

The RMSE of the training data for the ETS(A,A,N) model is 1.47. A bit better performance by the ETS(A,A,N) model over the ETS(A,N,N) model. The ETS(A,A,N) model takes into account trending, which the initial plot of Exports clearly indicated, thats why it performed better.

Compare the forecasts from both methods. Which do you think is best?

fc2 <- fit2 %>%
  forecast(h = 5)

fc2 %>%
  autoplot(pol1) +
  labs(y="% of GDP", title="Exports: Poland") +
  guides(colour = "none")

fc2

## # A fable: 5 x 5 [1Y]
## # Key:     Country, .model [1]
##   Country .model                                           Year    Exports .mean
##   <fct>   <chr>                                           <dbl>     <dist> <dbl>
## 1 Poland  "ETS(Exports ~ error(\"A\") + trend(\"A\") + s~  2018 N(55, 2.6)  55.4
## 2 Poland  "ETS(Exports ~ error(\"A\") + trend(\"A\") + s~  2019 N(57, 3.9)  56.9
## 3 Poland  "ETS(Exports ~ error(\"A\") + trend(\"A\") + s~  2020 N(58, 5.2)  58.3
## 4 Poland  "ETS(Exports ~ error(\"A\") + trend(\"A\") + s~  2021 N(60, 6.5)  59.7
## 5 Poland  "ETS(Exports ~ error(\"A\") + trend(\"A\") + s~  2022 N(61, 7.8)  61.2

The forecast values for the ETS(A,N,N) model is the same value 54.29 for every year forward. In the above plot and forecast values for the ETS(A,A,N) model shows, the predicted values for ETS(A,A,N) increases due to the additive trend parameter. The constant increase for this model of 1.4 visually appear more in line with the increasing trend of the Exports data.

Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

y <- fc$.mean[1]

lower <- y - (acc$RMSE * 1.96)
upper <- y + (acc$RMSE * 1.96)

z <- c(lower, upper)

z

## [1] 50.2544 58.3254

y2 <- fc2$.mean[1]

lower2 <- y2 - (acc2$RMSE * 1.96)
upper2 <- y2 + (acc2$RMSE * 1.96)

z2 <- c(lower2, upper2)
z2

## [1] 52.52691 58.29927

hilo1 <- fc %>% hilo()

hilo1$`95%`[1]

## <hilo[1]>
## [1] [50.06668, 58.51312]95

Above is the 95% interval for the ETS(A,N,N) model produced by R. The R produced values are 2.46 lower for the lower limit and 0.19 lower for the higher limit.

hilo2 <- fc2 %>% hilo()


hilo2$`95%`[1]

## <hilo[1]>
## [1] [52.23766, 58.58852]95

Above is the 95% interval for the ETS(A,A,N) model produced by R. The R produced values are 0.29 lower for the lower limit and 0.07 higher for the higher limit.

As noted in Exercise 8.1 the R produced confidence intervals appear a bit broader than the calculated values, but given the small differences, the calculated and R produced confidence intervals do appear in line with each other.

6.Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

options(scipen = 999)

# Extract Chinese GDP data from the global_economy dataset.
chGDP <- global_economy %>%
  filter(Country == 'China')

# Create a plot of the data.
chGDP %>% autoplot(GDP) +
  labs(title = 'Chinese GDP')

fit <-chGDP%>%
  model(
    SES = ETS(GDP ~ error("A") + trend("N") + season("N")),
    Holt = ETS(GDP ~ error("A") + trend("A") + season("N")),
    Damped = ETS(GDP ~ error("A") + trend("Ad") + season("N"))
  )


fc <- fit %>% forecast(h = 20)

fc %>%
  autoplot(chGDP, level=NULL) +
  labs(y="USD", title="GDP: China") +
  guides(colour = guide_legend(title = "Forecast"))

Above plot shows trends for the next 20 years of the Chinese GDP. The exponential smoothing forecast produces a horizontal line which doesn’t appear to follow the increasing trend. The Holt forecast shows an increasing forecast with constant growth. And the Holt dampened forecast shows increasing trend that does slow over time. The Holt dampened forecast appears the most predictable values as show some slow increases in future.

lambda <- chGDP %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

fit2 <- chGDP %>%
  model(
    SES = ETS(box_cox(GDP, lambda) ~ error("A") + trend("N") + season("N")),
    Holt = ETS(box_cox(GDP, lambda) ~ error("A") + trend("A") + season("N")),
    Damped = ETS(box_cox(GDP, lambda) ~ error("A") + trend("Ad") + season("N"))
  )


fc2 <- fit2 %>% forecast(h = 20)

fc2 %>%
  autoplot(chGDP, level=NULL) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed China GDP with $\\lambda$ = ",
         round(lambda,2)))) +
  guides(colour = guide_legend(title = "Forecast"))

Applying the Box-Cox transformation along with the ETS() function shows some interesting results. The simple exponential smoothing model produced a horizontal line. The Holt approach produced a forecast that follows an exponential growth that show straight up trend after 5 years. The dampened method also starts to increase more steady not quite as the trend of the Holt method. Given above methods I would pick the dampened method as it is the most realistic looking.

7.Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

aus_production %>%
  autoplot(Gas)

fit <- aus_production %>%
  model(fit = ETS(Gas))
report(fit)

## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

fit %>%
  forecast(h =10 ) %>%
  autoplot(aus_production)

Multiplicative seasonality is necessary due to the fact that the seasonal variation trends upwards over time.

fit2 <- aus_production %>%
  model(fit = ETS(Gas  ~ trend('Ad', phi = 0.9)))

fit2 %>%
  forecast(h = 10) %>%
  autoplot(aus_production)

Comparing the damped and non damped trend plots above, making the trend damped does not appear to improve the forecast.

8.Recall your retail time series data (from Exercise 8 in Section 2.10).

set.seed(666)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))

myseries %>% autoplot(Turnover)

a. Why is multiplicative seasonality necessary for this series?

Because the retail time series displays increasing proportional seasonality as the level of the series increases.

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

fit <- myseries %>%
  model(
    'Holt Winters Multiplicative Method' = ETS(Turnover ~ error('M') + trend('A') + season('M')),
    'Holt Winters Damped Method' = ETS(Turnover ~ error('M') + trend('Ad') + season('M'))
  )

HoltWinters <- fit %>% forecast(h = 10)

HoltWinters %>% autoplot(myseries, level = NULL)

c. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

accuracy(fit) %>% select('.model', 'RMSE')

## # A tibble: 2 x 2
##   .model                              RMSE
##   <chr>                              <dbl>
## 1 Holt Winters Multiplicative Method  15.0
## 2 Holt Winters Damped Method          14.6

The multiplicative method has a slightly lower RMSE than that of the damped method suggesting that this may be the more accurate choice for the time series.

Check that the residuals from the best method look like white noise.

fit %>%
  select('Holt Winters Multiplicative Method') %>%
  gg_tsresiduals()

tThe risiduals plot and historgram above show that the residuals for the multiplicative method look like white noise with the exception of a few outliers. The ACF confirms that most of the risiduals are within bounds.

Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

     train <- myseries %>%
  filter(year(Month) < 2011)

fit <- train %>%
  model(
    'Holt Winters Multiplicative Method' = ETS(Turnover ~ error('M') + trend('A') + season('M')),
    'Holt Winters Damped Method' = ETS(Turnover ~ error('M') + trend('Ad') + season('M')),
    'Seasonal Naive' = SNAIVE(Turnover)
  )

comp <- anti_join(myseries, train, by = c('State', 'Industry', 'Series ID', 'Month', 'Turnover'))
fc <- fit %>% forecast(comp)

autoplot(comp, Turnover) +
  autolayer(fc, level = NULL) +
  labs(title = 'Forecast Method Comparison')

The above method comparison plot shows that both the Holt Winters Damped and Multiplicative methods overthrown the Seasonal Niave method, and that the Multiplicative method is the most accurate.

9.For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

lambda <- train %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

mybc <- train %>%
  mutate(
    bc = box_cox(Turnover, lambda)
  )

fit <- mybc %>%
  model(
    'STL Box-Cox' = STL(bc ~ season(window = 'periodic'), robust = TRUE),
    'ETS Box-Cox' = ETS(bc)
  )

fit2 <- mybc %>%
  model(
    'Holt Winters Multiplicative Method' = ETS(Turnover ~ error('M') + trend('A') + season('M'))
  )

accuracy(fit)

## # A tibble: 2 x 12
##   State  Industry .model .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>    <chr>  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 New S~ Hardwar~ STL B~ Trai~  0.320  12.9  8.67 -0.399  5.40 0.421 0.507 0.355
## 2 New S~ Hardwar~ ETS B~ Trai~ -1.74   13.2  9.37 -1.47   5.85 0.455 0.519 0.221

accuracy(fit2)

## # A tibble: 1 x 12
##   State  Industry  .model .type    ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>     <chr>  <chr> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 New S~ Hardware~ Holt ~ Trai~ -1.14  13.5  9.55 -0.912  5.79 0.450 0.515 0.247

Looking at the RMSE values of the STL and ETS Box-Cox methods, we can see that both these methods are more accurate than our previous most accurate ‘Holt Winters Multiplicative’ method that has an RMSE.

Data624 Hw 5

Dominika Markowska-Desvallons

3/13/2022