library(fpp3)
Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:
aus_pop <- global_economy |>
filter(Country == "Australia")
aus_pop_fit <- aus_pop |>
model(
Naive = NAIVE(Population),
Drift = RW(Population ~ drift()),
SNaive = SNAIVE(Population)
)
aus_pop_fc <- aus_pop_fit |> forecast(h = "10 years")
aus_pop_fc |>
autoplot(aus_pop, level = NULL) +
labs(
title = "Australian Population Forecast",
) +
guides(colour = guide_legend(title = "Model"))
Best fit : Drift to account for the upwards trend
bricks <- aus_production |>
filter(!is.na(Bricks))
bricks_fit <- bricks|>
model(
Naive = NAIVE(Bricks),
Drift = RW(Bricks ~ drift()),
SNaive = SNAIVE(Bricks)
)
bricks_fc <- bricks_fit |> forecast(h = "5 years")
bricks_fc |>
autoplot(bricks, level = NULL) +
labs(
title = "Australian Bricks Production Forecast",
) +
guides(colour = guide_legend(title = "Model"))
Best fit : Snaive to account for the seasonality
lambs <- aus_livestock |>
filter(State == "New South Wales", Animal == "Lambs")
lambs_fit <- lambs |>
model(
Naive = NAIVE(Count),
SNaive = SNAIVE(Count),
Drift = RW(Count ~ drift())
)
lambs_fc <- lambs_fit |> forecast(h = "5 years")
lambs_fc |>
autoplot(lambs, level = NULL) +
labs(
title = "NSW Lamb Slaughter Forecast",
) +
guides(colour = guide_legend(title = "Model"))
Best fit : Snaive to account for the seasonality
wealth_fit <- hh_budget |>
model(
Naive = NAIVE(Wealth),
SNaive = SNAIVE(Wealth),
Drift = RW(Wealth ~ drift())
)
wealth_fc <- wealth_fit |> forecast(h = "5 years")
wealth_fc |>
autoplot(hh_budget, level = NULL) +
labs(
title = "Household Wealth Forecast"
) +
guides(colour = guide_legend(title = "Model"))
Best fit : Drift to account for the upwards trend
takeaway <- aus_retail |>
filter(Industry == "Takeaway food services", State == "New South Wales")
takeaway_fit <- takeaway |>
model(
Naive = NAIVE(Turnover),
SNaive = SNAIVE(Turnover),
Drift = RW(Turnover ~ drift())
)
takeaway_fc <- takeaway_fit |> forecast(h = "5 years")
takeaway_fc |>
autoplot(takeaway, level = NULL) +
labs(
title = "Australian Takeaway Food Turnover Forecast",
) +
guides(colour = guide_legend(title = "Model"))
Best fit : Drift for the trend and possible some parts of Snaive to account for the slight seasonality
Use the Facebook stock price (data set gafa_stock) to do the following:
fb <- gafa_stock |>
filter(Symbol == "FB", year(Date) >= 2015) |>
mutate(day = row_number()) |>
update_tsibble(index = day, regular = TRUE)
fb |>
autoplot(Close) +
labs(
title = "Facebook Daily Closing Stock Price"
)
fb_fit_drift <- fb |>
model(Drift = RW(Close ~ drift()))
fb_fc_drift <- fb_fit_drift |> forecast(h = 180)
fb_fc_drift |>
autoplot(fb, level = NULL) +
labs(
title = "Facebook Stock Price Drift Forecast"
)
fb_fit <- fb |> model(Drift = RW(Close ~ drift()))
fb_fc <- fb_fit |> forecast(h = 180)
# First and last points
line_points <- tibble(
day = c(fb$day[1], fb$day[nrow(fb)]),
Close = c(fb$Close[1], fb$Close[nrow(fb)])
) |>
as_tsibble(index = day)
autoplot(fb, Close, level = NULL) +
autolayer(fb_fc, .mean, colour = "blue" , level = NULL) +
autolayer(line_points, Close, colour = "red", linetype = "dashed") +
labs(
title = "Facebook Stock Price with Drift",
subtitle = "Blue = drift forecast | Red dashed = line connecting first and last points"
)
fb_fit <- fb |>
model(
Mean = MEAN(Close),
Naive = NAIVE(Close),
SNaive = SNAIVE(Close),
Drift = RW(Close ~ drift())
)
fb_fc <- fb_fit |> forecast(h = 180)
autoplot(fb, Close) +
autolayer(fb_fc, .mean, level = NULL) +
labs(
title = "Facebook Stock Price Forecasts",
) +
guides(colour = guide_legend(title = "Model"))
Here Naive seems to be the best estimate and maybe Drift , especially
when it comes to stock related data as the the only thing we usually
know is that its going to be reasonably close the trading amount on the
previous day.
Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help. What do you conclude?
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
fit <- recent_production |> model(SNAIVE(Beer))
fit |> gg_tsresiduals()
fit |>
forecast(h = "5 years") |>
autoplot(recent_production) +
labs(
title = "Australian Beer Production Forecasts",
subtitle = "SNaive"
)
The Residual plot shows that there are no major trends or patterns, they
also seem to have roughly constant variance and seem to be randomly
scattered across 0 except for some slightly higher variance in the
middle and a notably lower variance between 96 and 97. The lag plot
shows most correlations within the confidence interval with only one
significant negative corr spike at lag 4 showing for some lack of model
fit. The residuals also seem to be almost normally distributed around
zero with a slight skew to the right. From all these we can say that
SNAIVE residuals can not just be discounted as white noise. There are
fit issues within this model
Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.
aus_exports <- global_economy |>
filter(Country == "Australia") |>
select(Year, Exports) |>
as_tsibble(index = Year)
fit_exports <- aus_exports |> model(NAIVE(Exports))
fit_exports |> gg_tsresiduals()
fit_exports |>
forecast(h = "10 years") |>
autoplot(aus_exports) +
labs(
title = "Australian Exports Forecasts",
subtitle = "Naive"
)
The Residual plot shows that there are no discernible trends or patters, they also seem to have roughly constant variance and seem to be randomly scattered across except for some slightly higher variance towards the end . The lag plot shows most correlations within the confidence interval with only one significant negative corr spike at lag 1 that could possible be an outlier. Even though there seems to be seasonality like pattern across 4 year spans these are statistically insignificant. The residuals also seem to be almost normally distributed around zero but with a wider curve. From all these we can say that the regular NAIVE is an adequately good benchmark for Aus Exports production
bricks <- aus_production |>
filter(!is.na(Bricks)) |>
select(Quarter, Bricks)
fit_bricks <- bricks |> model(SNAIVE(Bricks))
fit_bricks |> gg_tsresiduals()
fit_bricks |>
forecast(h = "5 years") |>
autoplot(bricks) +
labs(
title = "Australian Bricks Production Forecasts",
subtitle = "SNaive"
)
The Residual plot shows that there are no obvious trends or patters, they also seem to have roughly constant variance except for a significant spike in variance around the 1985 . The lag plot here still shows strong seasonality with correlation spikes above the confidence intervals especially at the beginning where the spikes are much higher.The residuals also seem to be almost normally distributed around zero but with a left skew.Here SNAIVE mostly accounts for the seasonality but there seems to be more patterns in the plot that the fit is too basic to account for.
For your retail time series (from Exercise 7 in Section 2.10):
set.seed(90483484)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries_train <- myseries |>
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
fit <- myseries_train |>
model(SNAIVE(Turnover))
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train))
fc |> autoplot(myseries)
bind_rows(
fit |> accuracy() |> mutate(type = "Training"),
fc |> accuracy(myseries) |> mutate(type = "Test")
) |>
select(.model, type, RMSE, MAE, MAPE, MASE, RMSSE) |>
arrange(.model, type)
## # A tibble: 2 × 7
## .model type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Turnover) Test 124. 109. 17.5 6.97 6.14
## 2 SNAIVE(Turnover) Training 20.1 15.7 6.54 1 1
RMSE, MAE, MAPE are low for the training set, which means that the SNAIVE model fits the training data well within reason. RMSE, MAE and MAPE increased significantly when it comes to the test set and the absolute percentage also increased from 6.5 to 17.5 showing use that the forecasts deviate a lot the further into the test set the model tries to predict.Its significantly worse that what a naive or a drift model would do. This confirms what we can see in the gg_tsresiduals() plots with the bias, autocorrelation and increasing variance as well as the plotted forecast vs actual comparison that shows how the test values keep getting away from the forecasts as the trend stays at a steady increase.
While increasing the training data might slightly increase in-sample fit within the test data, the trend that is present wont help it reduce any bias for the test period due to the inherent issues with SNAIVE just focusing on seasonality and not being able to capture trends.
We can check the metrics with a longer training set and a shorter one
short_train <- myseries |> filter(year(Month) < 2000)
short_test <- anti_join(myseries, short_train)
fit_short <- short_train |> model(Seasonal_naive = SNAIVE(Turnover))
fc_short <- fit_short |> forecast(new_data = short_test)
long_train <- myseries |> filter(year(Month) < 2015)
long_test <- anti_join(myseries, long_train)
fit_long <- long_train |> model(Seasonal_naive = SNAIVE(Turnover))
fc_long <- fit_long |> forecast(new_data = long_test)
bind_rows(
# Original training set
bind_rows(
fit |> accuracy() |> mutate(type = "Training", TrainingLength = "Original"),
fc |> accuracy(myseries) |> mutate(type = "Test", TrainingLength = "Original")
),
# Short training set
bind_rows(
fit_short |> accuracy() |> mutate(type = "Training", TrainingLength = "Short"),
fc_short |> accuracy(myseries) |> mutate(type = "Test", TrainingLength = "Short")
),
# Long training set
bind_rows(
fit_long |> accuracy() |> mutate(type = "Training", TrainingLength = "Long"),
fc_long |> accuracy(myseries) |> mutate(type = "Test", TrainingLength = "Long")
)
) |>
select(.model, TrainingLength, type, RMSE, MAE, MAPE, MASE, RMSSE) |>
arrange(.model, TrainingLength, type)
## # A tibble: 6 × 8
## .model TrainingLength type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Turnover) Original Test 124. 109. 17.5 6.97 6.14
## 2 SNAIVE(Turnover) Original Training 20.1 15.7 6.54 1 1
## 3 Seasonal_naive Long Test 51.6 44.2 6.71 2.56 2.37
## 4 Seasonal_naive Long Training 21.7 17.2 6.34 1 1
## 5 Seasonal_naive Short Test 268. 229. 44.0 23.0 22.7
## 6 Seasonal_naive Short Training 11.8 9.96 6.41 1 1
The accuracy results between the training sets show that training data affects forecast performance. Shorter training sets gives us way poorer test forecasts while long training sets seem to reduce testing errors. However the upwards trends still mean that SNAIVE will always under predict and the bias can not be removed. Test errors will still be higher than in sample errors even though it decreases with increase in training data with average percentage errors coming to within .4 with the longest set. But having splits this skewed reduces the potential of forecasting .