#filt for Australian pop
pop_australia <- global_economy |>
filter(Country == "Australia") |>
select(Country, Year, Population)
#viz identify patterns
pop_australia |>
autoplot(Population) +
labs(title = "Australian Population Over Time",
y = "Population",
x = "Year")
#seems like a clear increasing trend over time with relatively steady growth and no apparent seasonality in annual population data
#since a clear trend, RW(y ~ drift()) is the most appropriate method
#forecasting 10 years aheda
pop_australia_forecast <- pop_australia |>
model(RW(Population ~ drift())) |>
forecast(h = 10)
pop_australia_forecast |>
autoplot(pop_australia) +
scale_y_continuous(labels = comma) +
labs(title = "Australian Population Forecast",
subtitle = "Forecasting 2017 to 2027",
y = "Population",
x = "Year") +
theme_minimal()
brick_prod (aus_production)
brick_prod <- aus_production %>%
select(Bricks)
brick_prod %>%
autoplot(Bricks) +
labs(title = "Australian brick_prod Production Over Time",
subtitle = "Quarterly data from 1956 Q1 to 2010 Q2",
y = "brick_prod Production (millions)",
x = "Year")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
#reveals seasonal patterns so SNAIVE is appropriate
brick_prod_forecast <- brick_prod %>%
filter(!is.na(Bricks)) %>%
model(SNAIVE(Bricks)) %>%
forecast(h = "6 years")
brick_prod_forecast %>%
autoplot(brick_prod) +
scale_y_continuous(labels = comma) +
labs(title = "Australian brick_prod Production Forecast") +
theme_minimal()
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
NSW Lambs (aus_livestock)
lambs_slaught_nsw <- aus_livestock %>%
filter(Animal == "Lambs", State == "New South Wales")
lambs_slaught_nsw %>%
autoplot(Count) +
labs(title = "NSW Lambs Slaughter Count Over Time",
subtitle = "Monthly data from July 1976 to December 2018",
y = "Number of Lambs",
x = "Year")
#because of strong seasonality and absence of drift, SNAIVE() is the most appropriate method
lambs_slaught_nsw %>%
model(SNAIVE(Count)) %>%
forecast(h = "7 years") %>%
autoplot(lambs_slaught_nsw) +
scale_y_continuous(labels = comma) +
labs(title = "NSW Lambs Slaughter Forecast",
subtitle = "Seasonal Naïve Method - 7 Year Forecast",
y = "Number of Lambs",
x = "Year") +
theme_minimal()
Household wealth (hh_budget).
household_wealth <- hh_budget %>%
select(Wealth)
household_wealth %>%
autoplot() +
labs(title = "Australian Household Wealth Over Time",
subtitle = "Quarterly data from 1994 Q3 to 2016 Q1",
y = "Wealth (AUD billions)",
x = "Year")
## Plot variable not specified, automatically selected `.vars = Wealth`
#because of the presence of a strong trend, RW(Wealth ~ drift()) is the most appropriate method
household_wealth_forecast <- household_wealth %>%
filter(!is.na(Wealth)) %>%
model(RW(Wealth ~ drift())) %>%
forecast(h = 8)
household_wealth_forecast %>%
autoplot(hh_budget) +
scale_y_continuous(labels = dollar_format(scale = 1/1e9, suffix = "B")) +
labs(title = "Australian Household Wealth Forecast",
subtitle = "Random Walk with Drift Model - 8 Quarter Forecast (through Q1 2018)",
y = "Wealth (AUD billions)",
x = "Year") +
theme_minimal()
Australian takeaway food turnover (aus_retail). Plot the GDP per capita for each country over time.
food_takeway_aus <- aus_retail %>%
filter(Industry == "Cafes, restaurants and takeaway food services") %>%
select(Industry, Month, Turnover, State)
food_takeway_aus %>%
autoplot(Turnover) +
facet_wrap(~State, scales = "free_y") +
scale_y_continuous(labels = comma) +
labs(title = "Australian Cafes, Restaurants and Takeaway Food Services Turnover",
subtitle = "Monthly data by state from April 1982 to December 2018",
y = "Turnover ($AUD millions)",
x = "Year") +
theme_minimal()
#data shows clear seasonality so SNAIVE() is the most appropriate method
food_takeway_aus_forecast <- food_takeway_aus %>%
model(SNAIVE(Turnover)) %>%
forecast(h = 84) # Forecast 84 months ahead (7 years)
food_takeway_aus_forecast %>%
autoplot(food_takeway_aus) +
facet_wrap(~State, scales = "free_y") +
scale_y_continuous(labels = comma) +
labs(title = "Australian Food Services Turnover Forecast",
subtitle = "Seasonal Naïve Method - 84 Month Forecast (through December 2025)",
y = "Turnover ($AUD millions)",
x = "Year") +
theme_minimal()
a - Produce a time plot of the series
facebook_stock <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(Day = row_number()) %>%
update_tsibble(index = Day, regular = TRUE)
facebook_stock %>%
autoplot(Adj_Close) +
scale_y_continuous(labels = dollar_format()) +
labs(title = "Daily Adjusted Facebook Stock",
y = "Adjusted Closing Price (USD)",
x = "Trading Day") +
theme_minimal()
#drift method
fb_plot <- facebook_stock %>%
model(RW(Adj_Close ~ drift())) %>%
forecast(h = 365) %>%
autoplot(facebook_stock) +
scale_y_continuous(labels = dollar_format()) +
labs(title = "Close Price Facebook Daily Adjusted ") +
theme_minimal()
fb_plot
#yend = 131.09 as it is tge last observation
fb_plot2 <- fb_plot +
geom_segment(aes(x = min(facebook_stock$Day),
y = min(facebook_stock$Adj_Close),
xend = max(facebook_stock$Day), yend = 131.09),
colour = "purple", linetype = "dashed")
fb_plot2
## Warning: Use of `facebook_stock$Day` is discouraged.
## ℹ Use `Day` instead.
## Warning: Use of `facebook_stock$Adj_Close` is discouraged.
## ℹ Use `Adj_Close` instead.
## Warning: Use of `facebook_stock$Day` is discouraged.
## ℹ Use `Day` instead.
## Warning in geom_segment(aes(x = min(facebook_stock$Day), y = min(facebook_stock$Adj_Close), : All aesthetics have length 1, but the data has 1258 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
d- Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?
I believe that the drift benchmark function would be the best to forecast this data set. This is because the drift method is able to extrapolate and move the trend forward, while Naive only uses the last value.
facebook_benchmark_comparison <- facebook_stock %>%
model(
`Naive` = NAIVE(Adj_Close),
`Mean` = MEAN(Adj_Close),
`Drift` = RW(Adj_Close ~ drift()),
`Random Walk` = RW(Adj_Close)
) %>%
forecast(h = 365)
facebook_benchmark_comparison %>%
autoplot(facebook_stock, level = NULL) +
scale_y_continuous(labels = dollar_format()) +
labs(title = "Facebook Stock Price - Benchmark Comparison",
color = "Forecast Method") +
theme_minimal() +
theme(legend.position = "bottom")
Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_rug()`).
# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)
What do you conclude?
The residuals seems to appear as white noise as the residual histogram shows a bell-shaped distribution centered at zero indicating that the residuals are approximately normally distributed with mean zero. The residual plot also shows a random scattering of points around zero with no resembling pattern. These characteristics indicate that the residuals resemble white noise, so the seasonal naïve method adequately captures the patterns in Australian beer production data.
Australian Exports series from global_economy
australian_ex <- global_economy |>
filter(Country == "Australia")
fit <- australian_ex |>
model(NAIVE(Exports))
fit |>
gg_tsresiduals() +
ggtitle("Australian Exports Residual Plot")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_rug()`).
fit |>
forecast() |>
autoplot(australian_ex) +
ggtitle("Australian Exports Annual")
The residuals show a that there is white noise as the bell-shaped histogram show that it is centered at zero and the residual plot show there is no pattern or trend. Because of this, it indicates that the NAIVE model has successfully captured all systematic information in the data.
Bricks series from aus_production
aus_bricks <- aus_production |>
select("Bricks") |>
filter(!is.na(Bricks))
fit <- aus_bricks |>
model(SNAIVE(Bricks))
fit |>
gg_tsresiduals() +
ggtitle("Australian Bricks Production Residual Plot")
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_rug()`).
fit |>
forecast() |>
autoplot(aus_bricks) +
ggtitle("Australian Bricks Production Quarterly")
The residual diagnostics reveal that the model is inadequate as there is a left skew which violates the normality. In addition, the residual only randomly scatter around the horizontal line around Q1 of 1975.This tells us that the residuals are not white noise, so the model has fail to capture all systematic patterns in the data.
set.seed(4469)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries_train <- myseries %>%
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
fit <- myseries_train %>%
model(SNAIVE(Turnover))
d.Check the residuals.
fit %>% gg_tsresiduals() +
ggtitle("Australian Retail Turnover Residual Plots")
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_rug()`).
fc <- fit %>%
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc %>% autoplot(myseries)
f. Compare the accuracy of your forecasts against the actual values.
fit %>% accuracy()
fc %>% accuracy(myseries)
The training accuracy shows reasonable fit with the RMSE = 5.20, MAE = 4.12, and MAPE = 10.16%. However, the test accuracy shows poor forecast performance at RMSE = 19.25, MAE = 16.25, and MAPE = 19.03%. The test MASE of 3.94 is greater than 1 and indicates that the forecasts perform worse than a naive seasonal forecast. In addition, the large jump suggests the seasonal naive model fails to capture changing patterns in the data, making it unsuitable for long-term forecasting of this series.