Load libraries
library(tidyverse)
library(fpp3)
Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:
Most appropriate to use the drift method since it has an upward trend in the data.
population_fit <- global_economy %>%
filter(Country == "Australia") %>%
model(Drift = RW(Population ~ drift()))
population_fc <- population_fit %>%
forecast(h = 5)
population_fc %>%
autoplot(global_economy) +
labs(title = "Australia Population",
y = "Population") +
guides(colour = guide_legend(title = "Forecast"))
Seasonally naive is the most apprioriate for data since it has seasonality.
brick_fit <- aus_production %>%
filter(!is.na(Bricks)) %>%
model(Seasonal_naive = SNAIVE(Bricks))
brick_fc <- brick_fit %>%
forecast(h = 15)
brick_fc %>%
autoplot(aus_production) +
labs(title = "Brick Production",
y = "Bricks") +
guides(colour = guide_legend(title = "Forecast"))
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
Naive is the most apprioriate since it has no trend or seasonality.
lamb_fit <- aus_livestock %>%
filter(Animal == "Lambs" & State == "New South Wales" ) %>%
model(Naive = NAIVE(Count))
lamb_fc <- lamb_fit %>%
forecast(h = 35)
lamb_fc %>%
autoplot(aus_livestock, level = NULL) +
labs(title = "Lamb Livestock",
y = "lamb") +
guides(colour = guide_legend(title = "Forecast"))
Drift method is the most appropriate since it has an upward trend.
budget_fit <- hh_budget %>%
model(Drift = RW(Wealth ~ drift()))
budget_fc <- budget_fit %>%
forecast(h = 5)
budget_fc %>%
autoplot(hh_budget) +
labs(title = "Wealth household") +
guides(colour = guide_legend(title = "Forecast"))
The drift model is most apprioriate since it has an upward trend.
retail_fit <- aus_retail %>%
filter(Industry == "Cafes, restaurants and takeaway food services") %>%
model(Drift = RW(Turnover ~ drift()))
retail_fc <- retail_fit %>%
forecast(h = 35)
retail_fc %>%
autoplot(aus_retail) +
labs(title = "Australian Food Turnover") +
guides(colour = guide_legend(title = "Forecast")) +
facet_wrap(~State, scales = "free")
Use the Facebook stock price (data set gafa_stock) to do the following:
fb_stock <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
fb_stock %>%
autoplot(Close)
fb_stock <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
fb_stock %>%
model(RW(Close ~ drift())) %>%
forecast(h = 35) %>%
autoplot(fb_stock) +
labs(title = "Facebook Stock", y = "$USD")
fb_stock %>%
model(RW(Close ~ drift())) %>%
forecast(h = 35) %>%
autoplot(fb_stock) +
labs(title = "Facebook Stock", y = "$USD") +
geom_segment(aes(x = 1, y = 53.53, xend = 1300, yend = 134),
colour = "blue", linetype = "dashed")
## Warning in geom_segment(aes(x = 1, y = 53.53, xend = 1300, yend = 134), : All aesthetics have length 1, but the data has 1258 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
The best is to use the drift model since it has captures the data with an upward trend. The next best model would be Naive since it uses last periods data to forecast for the next period. The mean is totally off from the data.
fb_stock %>%
model(
Mean = MEAN(Close),
Naive = NAIVE(Close),
SNaive = SNAIVE(Close)) %>%
forecast(h = 35) %>%
autoplot(fb_stock, level = NULL) +
labs(title = "Facebook Stock with different models", y = "$USD")
Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)
# Box Pierce Test
augment(fit) %>%
features(.innov, box_pierce, lag = 8)
## # A tibble: 1 × 3
## .model bp_stat bp_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 29.7 0.000234
# Ljung Box Test
augment(fit) %>%
features(.innov, ljung_box, lag = 8)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 32.3 0.0000834
The p-value for the Box-Pierce and Ljung Box test are less than .05 therefore we can reject the null hypothesis and there is significant autocorrelation in the residuals, and they are not white noise.
Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.
# Extract data of interest for Australian Export
export_fit <- global_economy %>%
filter(Country == "Australia")
# Define and estimate a model
aus_fit <- export_fit %>%
model(Naive = NAIVE(Exports))
# Look at the residuals
aus_fit %>%
gg_tsresiduals()
# Look at some forecasts
aus_fit %>%
forecast() %>%
autoplot(global_economy)
# Box Pierce Test
augment(aus_fit) %>%
features(.innov, box_pierce, lag = 24)
## # A tibble: 1 × 4
## Country .model bp_stat bp_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 Australia Naive 29.2 0.213
# Ljung Box Test
augment(aus_fit) %>%
features(.innov, ljung_box, lag = 24)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 Australia Naive 38.4 0.0315
The Australian Export has a p-value less than .05 which means this is not white noise. There is no significant correlation as p-value from Box-Pierce test is showing greater than .05
# Extract data of interest for Brick Production
br_fit <- aus_production %>%
filter(!is.na(Bricks))
# Define and estimate a model
brick_fit<- br_fit %>%
model(SNaive = SNAIVE(Bricks))
# Look at the residuals
brick_fit %>%
gg_tsresiduals()
# Look at some forecasts
brick_fit %>%
forecast() %>%
autoplot(aus_production)
# Box Pierce Test
augment(brick_fit) %>%
features(.innov, box_pierce, lag = 8)
## # A tibble: 1 × 3
## .model bp_stat bp_pvalue
## <chr> <dbl> <dbl>
## 1 SNaive 267. 0
# Ljung Box Test
augment(brick_fit) %>%
features(.innov, ljung_box, lag = 8)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 SNaive 274. 0
The Bricks production p-value is 0 it can be suggested there is no white noise but with the data not distributed normally which affects the prediction interval.
For your retail time series (from Exercise 7 in Section 2.10):
set.seed(12345678)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries_train <- myseries |>
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
fit <- myseries_train |>
model(SNAIVE(Turnover))
fit |> gg_tsresiduals()
Do the residuals appear to be uncorrelated and normally distributed? The residuals appear to be normally distributed and uncorrelated.
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train))
fc |> autoplot(myseries)
fit |> accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… SNAIV… Trai… 0.439 1.21 0.915 5.23 12.4 1 1 0.768
fc |> accuracy(myseries)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test 0.836 1.55 1.24 5.94 9.06 1.36 1.28 0.601
The accuracy measures to the amount of training data used depends on how data is split up for training and testing. There are chances of over fitting and under fitting if the dataset is too small. Using cross validation you can check for acurracy of the model. The difference between teh test and actual values are not too far apart.