library(fpp3) # datasets
library(tidyverse)
library(forecast)
library(tsibble)
library(flextable)
Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:
Australian Population (global_economy):
Figure 1. I selected the drift method for Australian population owing to the monotonic increase and absence of seasonal variation in this series. This approach yielded better results (based on visual inspection) than the MEAN method.
Bricks (aus_production):
Figure 2. I selected the SNAIVE method to capture seasonal variation associated with this series. A NAIVE model could also be employed. However, it would not embed the variation as part of the forecast.
NSW Lambs (aus_livestock):
Figure 3. Similar to the previous example, I selected the SNAIVE method for this series.
Household wealth (hh_budget):
Figure 4. I selected the Drift method for this series owing to the absence of distinct seasonality across these series (i.e., Country) as well as the gradual upward trend in the data over the last ~8 years of record.
Australian takeaway food turnover (aus_retail):
Figure 5. I selected the Drift method in order to capture the overall series trend. An SNAIVE forecast reproduced recent seasonal patterns but did not adequately capture growth in food turnover beginning in 2014.
# Subset australian population
<-global_economy%>%
aus_popselect(Population, Year)%>%
filter(Country %in% 'Australia')
# fit model
<-aus_pop%>%
aus_popfitmodel(RW(Population ~ drift()))
# forecast 5 yrs
<-aus_popfit%>%
aus_popfcforecast(h=5)
# plot pop forecast
%>%
aus_popfcautoplot(aus_pop)+ # include confidence intervals
labs(title = 'Figure 1. Australian Population', subtitle = 'Drift Method: 5 YR Forecast')+
theme_classic()
#===========================================================
# subset Australian production - Bricks (SNAIVE)
<- aus_production%>%
aus_prodselect(c(Bricks, Quarter))%>%
filter_index('1956 Q1' ~ '1999 Q4') # subset to exclude last 5 yrs
# fit model
<-aus_prod%>%
aus_prodfitmodel(SNAIVE(Bricks ~ lag('year'))) # add 1 yr lag
# forecast 5 yrs
<-aus_prodfit%>%
aus_prodfcforecast(h=20) # h = Q, there are 4 per year
# plot pop forecast with data overlap
<-aus_production%>% # last 15 yrs of record for comparative purposes
limitedselect(c(Bricks, Quarter))%>%
filter_index('1990 Q1' ~ '2005 Q2')
%>%
aus_prodfcautoplot(limited)+
labs(title = 'Figure 2. Australian Brick Production', subtitle = 'Seasonal Naive Method: 5 YR Forecast')+
theme_classic()
# ============================================================
# subset Australian livestock - Lambs, NSW
<-aus_livestock%>%
aus_livefilter(Animal %in% 'Lambs', State %in% 'New South Wales')%>%
filter_index('1972 Jul' ~ '2014 Dec') # exclude last 3 yrs of data
# fit model (SNAIVE)
<-aus_live%>%
aus_livefitmodel(SNAIVE(Count)) # 1 yr lag default
# forecast 5 yrs
<-aus_livefit%>%
aus_livefcforecast(h=60) # h = mo, there are 12 per year
# create plot with data overlap
<-aus_livestock%>% # last 8 yrs of record for comparative purposes
limited2filter(Animal %in% 'Lambs', State %in% 'New South Wales')%>%
filter_index('2010 Jan' ~ '2018 Dec')
%>%
aus_livefcautoplot(limited2)+
labs(title = 'Figure 3. Australian Lamb Production: NSW', subtitle = 'Seasonal Naive Method: 5 YR Forecast')+
theme_classic()
#=============================================================
# subset Austrailian household wealth
<-hh_budget#%>%
hh_wealth#select(c(Country, Year, Wealth))%>%
#filter(Country %in% 'Australia')
# fit model (Drift)
<-hh_wealth%>%
hh_fitmodel(RW (Wealth ~ drift()))
# forecast 5 yrs
<-hh_fit%>%
hh_fcforecast(h=5) # h = yr
# create plot
%>%
hh_fcautoplot(hh_wealth)+
labs(title = 'Figure 4. Australian Household Wealth', subtitle = 'Drift Method: 5 YR Forecast')+
theme_classic()
#============================================================
# subset Australian Capital Area takeaway food turnover
<-aus_retail%>%
aus_foodselect(c(State, Industry, Month, Turnover))%>%
filter(Industry %in% 'Takeaway food services', State %in% 'Australian Capital Territory')
# fit model (SNAIVE)
<-aus_food%>%
aus_foodfitmodel(RW(Turnover ~drift())) # 1 yr lag default
# forecast 2 yrs
<-aus_foodfit%>%
aus_foodfcforecast(h=24) # h = mo, there are 12 per year
# create plot with data overlap
<-aus_retail%>% # last ~10 yrs of record for comparative purposes
limited3select(c(State, Industry, Month, Turnover))%>%
filter(Industry %in% 'Takeaway food services', State %in% 'Australian Capital Territory')%>%
filter_index('2011 Jan' ~ '2018 Dec')
%>%
aus_foodfcautoplot(limited3)+
labs(title = 'Figure 5. Australian Take Away Food Turnover', subtitle = 'Drift Method: 2 YR Forecast')+
theme_classic()
Use the Facebook stock price (data set gafa_stock) to do the following:
Produce a time plot of the series:
See Figure 6.
Produce forecasts using the drift method and plot them:
See Figure 7.
Show that the forecasts are identical to extending the line drawn between the first and last observations:
See Figure 7. The mean 30 day forecast (blue line) extends the line segment connecting the first and last observations in 2018 (red).
Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?
See Figure 8. Among forecasts produced by the Mean, Naive, and Drift methods, the latter produced the best result. Neither the Mean or Naive forecasts continued the downward trend observed over the last ~ 100 days of record. Nonetheless, if a seasonal increase in the data is expected, the Mean method may be a better option.
#plot time series of adjusted closing stock, adjust for trading days, restrict to 2018
<-gafa_stock%>%
fbselect(Symbol, Date, Adj_Close)%>%
filter(Symbol %in% 'FB')%>%
filter(Date > '2018-01-01')%>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
%>%
fbautoplot(color = 'steelblue')+
labs(title = 'Figure 6. Facebook Adjusted Closing Stock Price', subtitle = '2018')+
theme_classic()
# fit model -- Note 1 day intervals
<-fb%>%
fb_fitmodel(RW(Adj_Close ~ drift()))
# forecast 1 yrs
<-fb_fit%>%
fb_fcforecast(h=30) # h = day
# create plot
%>%
fb_fcautoplot(fb)+
geom_segment(aes(x = 1, y = 181.42, xend = 251, yend = 131.09, colour = "Connect First & Last"), data = fb)+
labs(title = 'Figure 7. Facebook Adjusted Closing Stock Price: 2018', subtitle = 'Drift Method: 30 Day Forecast')+
theme_classic()
# Model and plot other benchmarks
<- fb %>%
mult_fit model(
Drift = RW(Adj_Close ~ drift()),
Mean = MEAN(Adj_Close),
Naive = NAIVE(Adj_Close)
)
# Generate forecasts for 60 days
<- mult_fit %>% forecast(h = 60)
mult_fc
# Plot forecasts against actual values
%>%
mult_fc autoplot(fb, level = NULL) +
labs(
y = "Adj_Close",
title = "Figure 8. Forecast Comparison for Facebook Adjusted Closing Stock Prices", subtitle = '2018'
+
) theme_classic()
Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. What do you conclude?
The residuals are randomly & normally distributed and there does not appear to be any autocorrelation in the lag (Figure 9). That said, there is an outlier at Lag 4.
The seasonal naive forecast effectively captures the variation observed in the series and does not depart from the overall trend (Figure 10). From results presented in Figures 9 & 10, the model provides a reasonable choice for forecasting this series (2-yr. window).
# Extract data of interest
<- aus_production %>%
recent_production filter(year(Quarter) >= 1992)
# Define and estimate a model
<- recent_production %>% model(SNAIVE(Beer))
fit
# Look at the residuals --- check using a box test?
%>% gg_tsresiduals()+
fit labs(title='Figure 9. Australian Beer Production Diagnostics')
# Look a some forecasts
%>% forecast() %>%
fit autoplot(recent_production)+
labs(title='Figure 10. Australian Beer Production')+
theme_classic()
Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.
Global Economy: Australian Exports - the residuals for this series are normally distributed and homoscedastic (Figure 11). There is a hint of autocorrelation at 4-Yr lag intervals. This feature, however, is not consistent across the lag series. Similarly, inter-annual seasonality (Figure 12) is irregular. The NAIVE method is appropriate for this series.
_Australian Production: Bricks (Figure 13 & 14) - there is periodic autocorrelation within the lag series (Figure 13) and the residuals are skewed (left tail). A plot the innovation residuals points to a 5-yr cycle of increasing-decreasing values (~1990 on). The SNAIVE method is appropriate for this series but does not replicate the more recent 5-yr cycle.
# Extract data of interest
<- global_economy %>%
aus_exp select(c(Country, Year, Exports))%>%
filter(Country %in% 'Australia')
# Define model
<- aus_exp %>% model(NAIVE(Exports)) # NAIVE - annual data
aus_fit
# Plot residuals
%>% gg_tsresiduals()+
aus_fit labs(title='Figure 11. Diagnostics for Australian Exports')
# Forecast
%>% forecast(h=10) %>%
aus_fit autoplot(aus_exp)+
labs(title='Figure 12. Forecast for Australian Exports', subtitle="Naive Forecasting Method")+
theme_classic()
# Extract Australian Brick Production
<- aus_production %>%
aus_prod select(Bricks)%>%
drop_na(Bricks) # a dim() check indicates gaps (scan_gaps()?)
# Define and estimate a model
<- aus_prod%>% model(SNAIVE(Bricks))
ausprod_fit
# Plot residuals
%>% gg_tsresiduals()+
ausprod_fit labs(title='Figure 13. Diagnostics for Australian Brick Production')
# Plot forecast 2 yrs
%>%
ausprod_fitforecast(h=16)%>%
autoplot(aus_prod)+
labs(title='Figure 14. Forecast for Australian Brick Production from 1980', subtitle='SNAIVE Method')+
theme_classic()
For your retail time series (from Exercise 8 in Section 2.10) create a training dataset consisting of observations before 2011.
Dataset = Turnover in the Pharmaceutical, Cosmetic, and Toiletry Industry. Turnover has been log transformed to reduce changes in seasonal variance over time.
# replicate dataset used in HW2
set.seed(124566791)
# load data
<- aus_retail %>%
myseries filter(`Series ID` == sample(aus_retail$`Series ID`,1))
#log transform Turnover
<-myseries%>%
myseriesmutate(Turnover = log(Turnover))
<- myseries %>%
myseries_train filter(year(Month) < 2011)
Check that your data have been split appropriately by producing the following plot.
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")+
labs(title='Figure 15. South Australia Industry Turnover', subtitle='pharmaceutical, cosmetic, and toiletry', y = 'log(Turnover)')+
theme_classic()
Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).
Note: due to an increase in the seasonal variance over time, I log-transformed the Turnover series.
# fit the model
<- myseries_train %>%
fit model(SNAIVE(Turnover))
Check the residuals. Do the residuals appear to be uncorrelated and normally distributed?
There are indications of pattern in the residuals (log transformed data), particularly between 1995-2005, when the data showed a rapid increase in seasonal variation and trend. There is also autocorrelation in the lag series. It appears that the model did not capture these latter effects completely.
The residuals are approximately normally distributed. This was not the case prior to transforming the data.
# plot model diagnostics
%>% gg_tsresiduals()+
fit labs(title='Figure 16. South Australia Insdustry Diagnostics', subtitle='Residuals: pharmaceutical, cosmetic, and toiletry')
Produce forecasts for the test data
<- fit %>%
fc forecast(new_data = anti_join(myseries, myseries_train))
%>% autoplot(myseries)+
fc labs(title='Figure 17. South Australia Industry forecast', subtitle='pharmaceutical, cosmetic, and toiletry', y = 'log(Turnover)')+
theme_classic()
Forecast accuracy (MAPE) for training and test sets was 3.50 and 3.63, respectively, indicating a slight reduction in model accuracy with the test set. The general consistency of these values indicates suggests that the model was not under- or over-fit with regard to the training data.
# calculate accuracy of train and test
<-fit %>% accuracy()
fit1
<-fc %>% accuracy(myseries)
fc1
# combine into single table
flextable(rbind(fit1, fc1))
State | Industry | .model | .type | ME | RMSE | MAE | MPE | MAPE | MASE | RMSSE | ACF1 |
South Australia | Pharmaceutical, cosmetic and toiletry goods retailing | SNAIVE(Turnover) | Training | 0.08874908 | 0.1448040 | 0.1171398 | 2.715744 | 3.509398 | 1.00000 | 1.000000 | 0.7516720 |
South Australia | Pharmaceutical, cosmetic and toiletry goods retailing | SNAIVE(Turnover) | Test | 0.17065372 | 0.2067273 | 0.1740920 | 3.554163 | 3.630709 | 1.48619 | 1.427635 | 0.9228345 |
The comparative sensitivity of accuracy measures to the amount of training data will depend, in part, on the forecasting method. For example, the SNAIVE model constructs a forecast equal to the last observed value from the same season. The amount of training data preceding that season is not taken into account. This is demonstrated below (See Figures 16 & 17). If we restrict our Turnover data to the last two years of record, the MAPE scores decrease to 1.04 (training) and 0.79 (test), indicating an increase in forecast accuracy. Other methods will generate accuracy measures that are more sensitive to the long-term record (e.g., MEAN, Drift).
The sensitivity of accuracy measures will also depend on aspects of seasonality and trend in the data. For example, model accuracy will be less sensitive to the amount of training data for a series that is stationary, relative to one that is not.
# restrict analysis to last 2 years of record
<- myseries %>%
my_short filter_index('2016 Jan' ~ .)
#create trainset from 2016-17
<- my_short %>%
short_train filter_index('2016 Jan' ~ '2017 Dec')
#plot series and train set
autoplot(my_short, Turnover) +
autolayer(short_train, Turnover, colour = "red")+
labs(title='Figure 16. South Australia Industry Turnover', subtitle='pharmaceutical, cosmetic, and toiletry: 2016-2018', y = 'log(Turnover)')+
theme_classic()
#fit model
<- short_train %>%
short_fit model(SNAIVE(Turnover))
<- short_fit %>%
short_fc forecast(new_data = anti_join(my_short, short_train))
# plot forecast with emphasis on last 5 yrs of series
%>% autoplot(my_short)+
short_fc labs(title='Figure 17. South Australia Industry forecast', subtitle='pharmaceutical, cosmetic, and toiletry: 2016-2018', y = 'log(Turnover)')+
theme_classic()
# calculate accuracy of train and test
<-short_fit %>% accuracy()
short_fit1
<-short_fc %>% accuracy(my_short)
short_fc1
# combine into single table
flextable(rbind(short_fit1, short_fc1))
State | Industry | .model | .type | ME | RMSE | MAE | MPE | MAPE | MASE | RMSSE | ACF1 |
South Australia | Pharmaceutical, cosmetic and toiletry goods retailing | SNAIVE(Turnover) | Training | -0.03607393 | 0.05920387 | 0.05075744 | -0.7425382 | 1.0481485 | 1.0000000 | 1.0000000 | 0.2746252 |
South Australia | Pharmaceutical, cosmetic and toiletry goods retailing | SNAIVE(Turnover) | Test | -0.03620437 | 0.04397112 | 0.03803446 | -0.7600463 | 0.7977521 | 0.7493375 | 0.7427068 | 0.1837584 |