Do exercises 5.1, 5.2, 5.3, 5.4 and 5.7 in the Hyndman book.
Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:
data("global_economy")
aus_population <- global_economy %>%
filter(Country == "Australia") %>%
select(Year, Population) %>%
as_tsibble(index = Year)
aus_population%>% autoplot(Population) +
labs(title = "Australian Population",
y = "Population",
x = "Year")
# Set training data and fit the models
aus_model_rw <- aus_population %>%
model(RW(Population ~ drift()))
To forecast the Australian Population we can use the RW(y~drift()) method as this data exhibits a random walk behavior.
# Generate forecasts for the next 10 years
forecasts_rw <- aus_model_rw %>%
forecast(h = "10 years")%>%
autoplot(aus_population) +
ggtitle("RW with Drift Forecast for Australian Population")
forecasts_rw
bricks<-aus_production |>
filter(!is.na(Bricks))
# Focus on the Bricks series
bricks_data <- aus_production %>%
select(Quarter, Bricks) %>%
filter(!is.na(Bricks))
# Convert to a tsibble object
bricks_ts <- bricks_data %>%
tsibble(index = Quarter)
# Visualize the data
autoplot(bricks_ts, Bricks) +
labs(title = "Quarterly Clay Brick Production in Australia",
y = "Production (millions)",
x = "Year")
For the Bricks production the SNAIVE method is appropriate because there are seasonal patterns in the quarterly data.
bricks_data|>
model(SNAIVE(Bricks ~ lag("year"))) %>%
forecast(h= 10) %>%
autoplot(aus_production)+
labs(title = "Forecast for Clay Brick Production in Australia",
y = "Production (millions)",
x = "Year")
aus_livestock %>%
filter(State == "New South Wales",
Animal == "Lambs") %>%
autoplot() +
ggtitle("New South Wales Lambs") +
xlab("Month") +
ylab("Number of Lambs Slaughtered")
## Plot variable not specified, automatically selected `.vars = Count`
Given the series has a trend and negligible seasonal effects, the RW(y ~ drift()) will fit nthe best. It accounts for the trend by adding a drift term to the random walk.
aus_livestock %>%
filter(State == "New South Wales",
Animal == "Lambs") %>%
model(RW(Count ~ drift())) %>%
forecast(h = 24) %>%
autoplot(aus_livestock) +
labs(title = "Lambs in New South Wales",
subtitle = "July 1976 - Dec 2018, Forecasted until Dec 2020") +
xlab("Year") +
ylab("Number of Lambs Slaughtered")
wealth <- hh_budget %>%
select(Wealth)
wealth %>%
autoplot()+
labs(title = "Household Wealth")
## Plot variable not specified, automatically selected `.vars = Wealth`
(RW(Wealth ~ drift()) method might be suitable to Forecast for Household Wealth
# Fit the RW model with drift for household wealth
hh_budget %>%
model(RW(Wealth ~ drift())) %>%
forecast(h = "5 years") %>%
autoplot(hh_budget) +
labs(title = "Forecast for Household Wealth", y = "Wealth")
aus_retail %>%
filter(Industry == "Cafes, restaurants and takeaway food services") %>%
model(RW(Turnover ~ drift())) %>%
forecast(h = 24) %>%
autoplot(aus_retail) +
labs(title = "Australian takeaway food Turnover",
subtitle = "Apr 1982 - Dec 2018, Forecasted until Dec 2020") +
facet_wrap(~State, scales = "free")
Use the Facebook stock price (data set gafa_stock) to do the following:
fb_stock <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(Date = as_date(Date))
# Plot the closing price time series with enhanced ggplot2 theme
fb_stock %>%
autoplot(Close, colour = "#2C3E50") +
labs(title = "Facebook Stock Price Over Time",
x = "Date",
y = "Closing Price (USD)") +
theme_minimal()
# Print the dataset structure for verification
print(fb_stock)
## # A tsibble: 1,258 x 8 [1D]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FB 2014-01-02 54.8 55.2 54.2 54.7 54.7 43195500
## 2 FB 2014-01-03 55.0 55.7 54.5 54.6 54.6 38246200
## 3 FB 2014-01-06 54.4 57.3 54.0 57.2 57.2 68852600
## 4 FB 2014-01-07 57.7 58.5 57.2 57.9 57.9 77207400
## 5 FB 2014-01-08 57.6 58.4 57.2 58.2 58.2 56682400
## 6 FB 2014-01-09 58.7 59.0 56.7 57.2 57.2 92253300
## 7 FB 2014-01-10 57.1 58.3 57.1 57.9 57.9 42449500
## 8 FB 2014-01-13 57.9 58.2 55.4 55.9 55.9 63010900
## 9 FB 2014-01-14 56.5 57.8 56.1 57.7 57.7 37503600
## 10 FB 2014-01-15 58.0 58.6 57.3 57.6 57.6 33663400
## # ℹ 1,248 more rows
# Re-index based on trading days to create a regular tsibble
fb_stock_reg <- fb_stock %>%
arrange(Date) %>%
mutate(day = row_number()) %>%
as_tsibble(index = day, regular = TRUE)
# Fit models using drift-based methods
fb_fit <- fb_stock_reg %>%
model(`Naive Drift` = NAIVE(Close ~ drift()),
`Random Walk` = RW(Close ~ drift()))
# Produce forecasts for next 253 trading days (~1 year)
fb_fc <- fb_fit %>% forecast(h = 253)
# Plot forecasts with historical data overlay
fb_fc %>%
autoplot(fb_stock_reg, level = NULL) +
autolayer(fb_stock_reg, Close, colour = "black") +
labs(x = "Trading Day",
y = "Closing Price (USD)",
title = "Facebook Daily Closing Stock Prices") +
guides(colour = guide_legend(title = "Forecast")) +
theme_minimal()
fb_plot <- fb_fc %>%
autoplot(fb_stock_reg, level = NULL) +
autolayer(fb_stock_reg, Close, colour = "black") +
labs(x = "Trading Day",
y = "Closing Price (USD)",
title = "Facebook Daily Closing Stock Prices") +
guides(colour = guide_legend(title = "Forecast")) +
theme_minimal() +
geom_segment(aes(x = first(fb_stock_reg$day), y = first(fb_stock_reg$Close),
xend = last(fb_stock_reg$day), yend = last(fb_stock_reg$Close)),
linetype = 'dashed', colour = "red", size = 1)
print(fb_plot)
# Fit multiple benchmark models
fb_fit_2 <- fb_stock_reg %>%
model(Mean = MEAN(Close),
Naive = NAIVE(Close),
`Seasonal Naive` = SNAIVE(Close, period = 5), # assuming weekly seasonality on trading days
`Random Walk` = RW(Close))
# Produce forecasts for the next 253 trading days
fb_fc_2 <- fb_fit_2 %>% forecast(h = 253)
# Plot forecast comparison from different models
fb_fc_2 %>%
autoplot(fb_stock_reg, level = NULL) +
autolayer(fb_stock_reg, Close, colour = "black") +
labs(x = "Trading Day",
y = "Closing Price (USD)",
title = "Facebook Daily Closing Stock Prices - Benchmark Forecasts") +
guides(colour = guide_legend(title = "Forecast")) + theme_minimal()
Suggested best model: Among the benchmark forecasts, inspection of
residuals, forecast accuracy measures (e.g., MAE, RMSE) and the overall
fit to the historical trend suggests that the
Random Walk with drift model performs best for this
data.
Apply a seasonal naive method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)
The residuals seem to resemble white noise, indicating that the model has successfully captured the seasonal patterns. The forecast plot shows the same seasonal trends as in the historical data.
Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.
For the Australian Exports series we will use the NAIVE() method because the data are annual.
aus_exports <- global_economy %>%
filter(Country == "Australia") %>%
select(Year, Exports)
fit_exports <- aus_exports %>%
model(NAIVE(Exports))
fc_exports <- fit_exports %>% forecast(h = 5)
fit_exports %>% gg_tsresiduals()
fc_exports %>% autoplot(aus_exports) +
ggtitle("Forecasts for Australian Exports (NAIVE)")
For the Bricks series SNAIVE() method is more appropriate because the data have a seasonal pattern.
aus_bricks <- aus_production %>%
select(c(Quarter, Bricks)) %>%
na.omit()
fit <- aus_bricks %>%
model(SNAIVE(Bricks))
fit %>%
gg_tsresiduals()
fit %>%
forecast() %>%
autoplot(aus_bricks)+
ggtitle("Forecasts for Bricks (SNAIVE)")
For your retail time series (from Exercise 7 in Section 2.10):
set.seed(32)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries_train <- myseries |>
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")+
labs(title = "Turnover Over Time")
fit <- myseries_train |>
model(SNAIVE(Turnover))
fit |> gg_tsresiduals()
The residuals don’t look normal or uncorrelated at all. The
autocorrelation plot is way outside the limits, and the residuals are
showing heteroscedasticity.
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train, by = "Month"))
autoplot(myseries, Turnover) +
autolayer(fc, colour = "blue")
Upon analyzing the plotted forecasts, it appears that the model lacks predictive accuracy. The test data set is observed to extend beyond the limits of the 95% confidence level in various areas.
#fit |> accuracy()
Training Data Accuracy
#fc |> accuracy(myseries)
Test Data Accuracy
A lower value for each metric generally indicates better accuracy. Looking at the provided data the model’s accuracy is lower on the test set than on the training set, as indicated by higher values across all accuracy metrics. This suggests potential overfitting to the training data.
How sensitive are the accuracy measures to the amount of training data used?
Generally, a lower metric value reflects enhanced accuracy. Since the metrics are all higher in the test set, the model’s accuracy is inferior on the test set relative to the training set. This points to a potential overfitting issue with the model regarding the training data. Overfitting implies that the model is excessively learning from the training data and failing to generalize effectively to the test set. The provided data does not specifically address how accuracy measures respond to the quantity of training data. More data would be helpful for a comprehensive assessment.