library(fpp3)
library(tidyverse)
library(kableExtra)
library(seasonal)
library(reactable)
library(tsibble)
options(scipen=100)
data("global_economy")
global_economy |>
select(Country, Population) |>
filter(Country == "Australia") |>
autoplot() +
labs(x = "Year", y = "Total Population") +
ggtitle("Total Population in Australia") +
theme(plot.title = element_text(hjust=0.3))
aus_pop <- global_economy |>
select(Country, Population) |>
filter(Country == "Australia")
aus_pop |>
model(NAIVE(Population)) |>
forecast(h=10) |>
autoplot(aus_pop) +
labs(x = "Year", y = "Total Population") +
ggtitle("10-Year Forecast for Australian Population") +
theme(plot.title = element_text(hjust=0.3))
data("aus_production")
aus_production |>
select(Bricks) |>
filter(!is.na(Bricks)) |>
autoplot() +
labs(x = "Year(Quarter)", y = "Bricks Production") +
ggtitle("Quarterly Production of Bricks in Australia") +
theme(plot.title = element_text(hjust=0.3))
aus_bricks <- aus_production |>
select(Bricks) |>
filter(!is.na(Bricks))
aus_bricks |>
model(SNAIVE(Bricks)) |>
forecast(h=10) |>
autoplot(aus_production) +
labs(x = "Year(Quarter)", y = "Bricks Production") +
ggtitle("10-Year Forecast of Bricks Production in Australia") +
theme(plot.title = element_text(hjust=0.3))
data("aus_livestock")
aus_livestock |>
select(Animal, State, Count) |>
filter(Animal == "Lambs", State == "New South Wales") |>
autoplot() +
labs(x = "Year", y = "Lamb Production") +
ggtitle("Lamb Production in New South Wales") +
theme(plot.title = element_text(hjust=0.3))
#select(Count)
#model(SNAIVE(Count)) |>
#forecast(h=10)
#autoplot(aus_live, show.legend = FALSE)
nsw_lamb_count <- aus_livestock |>
select(Animal, State, Count) |>
filter(Animal == "Lambs", State == "New South Wales")
nsw_lamb_count |>
model(NAIVE(Count)) |>
forecast(h=10) |>
autoplot(nsw_lamb_count) +
labs(x = "Month", y = "Lamb Production") +
ggtitle("10-Year Forecast of Lamb Production in New South Wales") +
theme(plot.title = element_text(hjust=0.3))
nsw_lambs <- aus_livestock %>% filter(Animal=='Lambs',State=='New South Wales') %>% select(Count)
nsw_lambs %>% model(NAIVE(Count)) %>% forecast(h=15) %>% autoplot(nsw_lambs)
data("hh_budget")
hh_budget |>
autoplot(Wealth) +
labs(x = "Year", y = "Household Wealth") +
ggtitle("Yearly Household Wealth Per Country") +
theme(plot.title = element_text(hjust=0.3))
hh_budget |>
model(RW(Wealth ~ drift())) |>
#model(NAIVE(Wealth)) |>
forecast(h = 10) |>
autoplot(hh_budget) +
facet_wrap(Country ~ . , scales = "free") +
labs(x = "Year", y = "Household Wealth ") +
ggtitle("10-Year Forecast Household Wealth Per Country") +
theme(plot.title = element_text(hjust=0.5))
data("aus_retail")
aus_retail |>
filter(Industry == "Takeaway food services") |>
autoplot() +
facet_wrap(State ~ ., scales = "free") +
#labs(x = "Month", y = "Turnover") +
ggtitle("Takeaway Food Services Turnover in Australia") +
theme(plot.title = element_text(hjust=0.07)) +
theme(legend.position = "none")
aus_food_turnover <- aus_retail |>
select(State, Industry, Turnover) |>
filter(Industry == "Takeaway food services")
aus_food_turnover |>
model(SNAIVE(Turnover)) |>
#model(RW(Turnover ~ drift())) |>
forecast(h=10) |>
autoplot() +
facet_wrap(~State, scales = "free") +
ggtitle("10-Year Forecast of Takeaway Food Services Turnover in Australia") +
theme(plot.title = element_text(hjust=0.5))
gafa_stock)
to do the following:data("gafa_stock")
fb_stock <- gafa_stock |>
filter(Symbol == "FB") |>
mutate(trading_day = row_number()) |>
update_tsibble(index = trading_day, regular = TRUE)
fb_stock |>
autoplot(Close) +
labs(x = "Trading Day", y = "Closing Stock Price") +
ggtitle("Trading Day Facebook Stock Closing Price") +
theme(plot.title = element_text(hjust=0.5))
fb_stock |>
model(RW(Close ~ drift())) |>
forecast(h=60) |>
autoplot(fb_stock) +
labs(x = "Trading Day", y = "Closing Stock Price") +
ggtitle("60-Day Trading Day Forecast of Facebook Stock Closing Price") +
theme(plot.title = element_text(hjust=0.5))
fb_stock |>
model(RW(Close ~ drift())) |>
forecast(h=60) |>
autoplot(fb_stock) +
labs(x = "Trading Day", y = "Closing Stock Price") +
ggtitle("60-Day Trading Day Forecast of Facebook Stock Closing Price") +
theme(plot.title = element_text(hjust=0.5)) +
geom_segment(aes(x = 1, y = 54.7, xend = 1258, yend = 131),
colour = "blue", linetype = "dashed")
# Used fb_stock[fb_stock$trading_day == "1", ] and fb_stock[fb_stock$trading_day == "1258", ] to get y values
fb_stock %>%
model(Mean = MEAN(Close),
#Seasonal_Naive = SNAIVE(Close),
Naive = NAIVE(Close),
Drift = NAIVE(Close ~ drift())) %>%
forecast(h = 60) %>%
autoplot(fb_stock) +
labs(x = "Trading Day", y = "Closing Stock Price") +
ggtitle("60-Day Trading Day Forecasts of Facebook Stock Closing Price") +
theme(plot.title = element_text(hjust=0.5))
I think the drift function is the best forecast for this data. The interval levels appear to be smaller compared to Mean and Naive, and therefore it would lend itself to better predict the Facebook closing stock price.
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)
Based on the plots, there doesn’t appear to be any autocorrelation, which indicates that there is white noise. The residuals are not close to zero, and the histogram of the residuals does not show a normal distribution.
I will check the residuals residuals are distinguishable from a white noise series using the Box-Pierce and Ljung-Box tests:
aug <- recent_production |>
model(SNAIVE(Beer)) |>
augment()
autoplot(aug, .innov) +
labs(y = "Beer Production",
title = "Residuals from the Seasonal Naive Method") +
theme(plot.title = element_text(hjust=0.5))
The lag was set to 8, using the \(l = 2m\) method for quarterly seasonal data:
# Box-Pierce Test; lag = 2 * 4
aug |> features(.innov, box_pierce, lag = 8)
## # A tibble: 1 × 3
## .model bp_stat bp_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 29.7 0.000234
# Ljung-Box Test; lag = 2 * 4
aug |> features(.innov, ljung_box, lag = 8)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 32.3 0.0000834
The p-values for each test is less than 0.05, so we can conclude that the residuals are distinguishable from a white noise series.
global_economyaus_exports <- global_economy |>
select(Year, Country, Exports) |>
filter(Country == "Australia")
fit <- aus_exports |> model(NAIVE(Exports))
fit |> gg_tsresiduals()
fit |> forecast() |> autoplot(aus_exports)
While the histogram of residuals appear to show a normal distribution with its center near zero, the innovation residuals does not show autocorrelation due to the residuals not appearing to be close to the zero threshold.
aug_exports <- aus_exports |>
model(NAIVE()) |>
augment()
autoplot(aug_exports, .innov) +
labs(y = "Beer Production",
title = "Residuals from the Naive Method") +
theme(plot.title = element_text(hjust=0.5))
# Box-Pierce Test
aug_exports |> features(.innov, box_pierce, lag = 10)
## # A tibble: 1 × 4
## Country .model bp_stat bp_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 Australia NAIVE() 14.6 0.148
# Ljung-Box Test
aug_exports |> features(.innov, ljung_box, lag = 10)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 Australia NAIVE() 16.4 0.0896
Since the p-values of each test are greater than 0.05, we can conclude that the residuals are not distinguishable from a white noise series.
aus_productionfit <- aus_bricks |> model(SNAIVE(Bricks))
fit |> gg_tsresiduals()
fit |> forecast() |> autoplot(aus_bricks)
aug_bricks <- aus_bricks |>
model(SNAIVE()) |>
augment()
autoplot(aug_bricks, .innov) +
labs(y = "Brick Production",
title = "Residuals from the Seasonal Naive Method") +
theme(plot.title = element_text(hjust=0.5))
The innovation residuals starts off near the zero threshold, and then begins to have random fluctuations around 1975 Q1. The histogram residuals appear to show a skewness to the left. Based on the plots, there appears to be white noise in the data. We’ll use the Box-Pierce and Ljung-Box tests to see if we can conclude that the residuals are distinguishable from white noise:
# Box-Pierce Test; lag = 2 * 4
aug_bricks |> features(.innov, box_pierce, lag = 8)
## # A tibble: 1 × 3
## .model bp_stat bp_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE() 267. 0
# Ljung-Box Test; lag = 2 * 4
aug_bricks |> features(.innov, ljung_box, lag = 8)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE() 274. 0
With a p-value of 0, we can conclude that the residuals are distinguishable from the white noise series.
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries_train <- myseries |>
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, color = "red")
myseries_train).fit <- myseries_train |>
model(SNAIVE(Turnover))
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train))
fc |> autoplot(myseries)
fit |>
accuracy() |>
reactable()
fc |>
accuracy(myseries) |>
reactable()
With the exception of ACF1 and MAPE, the errors in the test data increased in comparison to the training data. The MASE in the test set increased, which is interesting because the data is seasonal and would assume that the MASE in the test set would decrease as a result. This indicates that the accuracy using SNAIVE for this dataset may not be better than using another method.