## Australian Population // global_economy
global_economy %>%
filter(Country == "Australia") %>% # filter for Australia
model(RW = RW(GDP ~ drift())) %>% # use the drift method
forecast(h = 10) -> pop_fc # generate forecast for 10 years
# plot the forecast
autoplot(pop_fc, global_economy) +
labs(title ="10-Year Forecast for Australian Population") +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold')
)
## Bricks // aus_production
aus_production %>%
filter(!is.na(Bricks)) %>% # filter out missing data
model(SNAIVE= SNAIVE(Bricks)) %>% # use the SNAIVE method
forecast(h = 8) -> bricks_fc # generate forecast for 8 quarters
# plot the forecast
autoplot(bricks_fc, aus_production) +
labs(title ="Forecast for Australian Brick Production") +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold')
)
## NSW Lambs // aus_livestock
aus_livestock %>%
filter(State == "New South Wales") %>% # filter for NSW
filter(Animal == "Lambs") %>% # filter for lambs
model(SNAIVE = SNAIVE(Count)) %>% # use the SNAIVE method
forecast(h = 24) -> lambs_fc # generate forecast for 24 months
# plot the forecast
autoplot(lambs_fc, aus_livestock) +
labs(title = "Forecast for NSW Lamb Slaughter") +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold')
)
## Household Wealth // hh_budget
hh_budget %>%
model(RW = RW(Wealth ~ drift())) %>% # use the drift method
forecast(h = 2) -> wealth_fc # forecast for 2 years
# plot the forecast
autoplot(wealth_fc, hh_budget) +
labs(title = "Forecast for Household Budget") +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold')
)
## Australian Takeaway Food Turnover // aus_retail
aus_retail %>%
filter(Industry == "Cafes, restaurants and takeaway food services") %>% # filter for takeaway services
filter(State == "Australian Capital Territory") %>% # filter for Australian Capital Territory
model(RW = RW(Turnover ~ drift())) %>% # use the drift method
forecast(h = 12) -> takeaway_fc # forecast for 12 months
# plot the forecast
autoplot(takeaway_fc, aus_retail) +
labs(title = "Forecast for Australian Takeaway Food Turnover") +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold')
)
# filter for FB stock
fb_stock <- gafa_stock %>%
filter(Symbol == "FB") %>%
# re-index based off trading days --> using example from textbook as guide
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
# plot the series
autoplot(fb_stock, Close) +
labs(
title = "Time Series of Facebook Stock"
) +
xlab("Trading Day") +
ylab("Close Price ($)") +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold')
)
fb_stock %>%
model(RW = RW(Close ~ drift())) %>% # use the drift method
forecast(h = 365) -> fbstock_fc # forecast for 365 days
# plot the forecast
autoplot(fbstock_fc, fb_stock) +
labs(title = "Forecast for Facebook Stock\nUsing Drift Method") +
xlab("Trading Day") +
ylab("Close Price ($)") +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold')
)
# compute first/last and slope
n <- nrow(fb_stock)
y1 <- dplyr::first(fb_stock$Close)
yT <- dplyr::last(fb_stock$Close)
m <- (yT - y1) / (n - 1) # slope
# build the straight line
seg_in <- tibble(
day = c(1, n),
Close = c(y1, yT)
)
# build the manual straight line into the forecast
H <- 365
manual_fc <- tibble(
day = n + 1:H,
Close = yT + m * (1:H)
)
# plot
autoplot(fbstock_fc, fb_stock) +
geom_line(data = seg_in, aes(day, Close),
linetype = "dotted", linewidth = 0.7, colour = "red") +
geom_line(data = manual_fc, aes(day, Close),
linetype = "dashed", linewidth = 0.8, colour = "black") +
labs(
title = "FB Stock Drift Forecast Equals Straight-Line Extension",
subtitle = "Red dotted = line between first and last observation;\nBlack dashed = its forward extension",
x = "Trading Day", y = "Close Price ($)"
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust=0.5, face = 'italic')
)
fb_stock %>%
model(
mean = MEAN(Close), # mean
naive = NAIVE(Close), # naive
snaive = SNAIVE(Close), # snaive
RW = RW(Close ~ drift()) # drift
) %>%
forecast(h = 180) -> various_fc # forecast for 180 days
autoplot(various_fc, fb_stock) +
labs(
title = "Facebook Stock Forecasts"
) +
xlab("Trading Day") +
ylab("Closing Price ($)") +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold')
)
Drift seems like the best option since the overall trend of the stock price is upward.
# Extract data of interest
recent_production <- aus_production %>%
filter(year(Quarter) >= 1992)
# Define and estimate a model
model <- recent_production %>%
model(SNAIVE(Beer)) # SNAIVE method
# Look at the residuals
model %>%
gg_tsresiduals()
# Look a some forecasts
model %>%
forecast() %>%
autoplot(recent_production) +
labs(
title = "Beer Production in Australia",
subtitle = "Since 1992"
) +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold'),
plot.subtitle = element_text(hjust = 0.5, face = 'italic')
)
From the top plot, the residuals seem to be centered around zero and there is a clear seasonal trend. There is a significant spike at lag-4 on the ACF plot, which indicates autocorrelation. We can confirm that the residuals are not white noise by applying the Ljung-Box test:
augment(model) %>%
features(.innov, ljung_box, lag=10)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 37.8 0.0000412
The p-value is very small, so we can confirm that the residuals are not white noise.
Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.
# Define and estimate a model
australian_exports <- global_economy %>%
filter(Country == "Australia") %>% # filter for Australia
model(naive = NAIVE(Exports)) # apply NAIVE method
# Look at the residuals
australian_exports %>%
gg_tsresiduals()
# Look a some forecasts
australian_exports %>%
forecast(h = 10) %>% # look at next 10 years
autoplot(global_economy %>% filter(Country == "Australia")) +
labs(
title = "Australian Exports"
) +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold')
)
From the top plot, the residuals are scattered about 0 with no obvious
trend. There is one spike outside the blue dotted line on the ACF plot,
but it is not large enough to say that these residuals are not white
noise. The residuals are normally distributed according to the
histogram.
Using the Ljung-Box test again:
augment(australian_exports) %>%
features(.innov, ljung_box, lag=10)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 Australia naive 16.4 0.0896
The p-value is 0.08963678, which is large. We can confirm that the residuals are white noise.
# Define and estimate a model
bricks <- aus_production %>%
filter(!is.na(Bricks)) %>% # filter out missing data
model(snaive = SNAIVE(Bricks)) # apply SNAIVE method; SNAIVE is used because there is strong seasonality in this data
# Look at the residuals
bricks %>%
gg_tsresiduals()
# Look a some forecasts
bricks %>%
forecast(h = 8) %>% # look at next 8 quarters
autoplot(aus_production) +
labs(
title = "Australian Brick Production"
) +
theme(
plot.title = element_text(hjust = 0.5, face = 'bold')
)
According to the top plot, there is no obvious trend with the residuals. From the ACF plot, we can see that there are many spikes outside the blue dotted line. This indicates that there is autocorrelation, so the data are not white noise. We can confirm this with the Ljung-Box test.
augment(bricks) %>%
features(.innov, ljung_box, lag=10)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 snaive 301. 0
The p-value is low, so we can confirm that the residuals are not white noise.
library(readxl)
retail <- readxl::read_excel("/Users/kristinlussi/Documents/MSDS/DATA 624/retail.xlsx", skip = 1)
myseries <- retail %>%
select(Month = `Series ID`, Turnover = A3349398A) %>%
mutate(Month = yearmonth(Month)) %>% # convert to yearmonth
as_tsibble(index = Month)
# part a - from text
myseries_train <- myseries |>
filter(year(Month) < 2011)
# part b - from text
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
# part c - from text
fit <- myseries_train %>%
model(SNAIVE())
# part d - from text
fit %>%
gg_tsresiduals()
Do the residuals appear to be uncorrelated and normally
distributed?:
According to the ACF plot, there are significant spikes between lag-0 and lag-9. This indicates that there is autocorrelation. According to the top plot, the residuals are not centered around zero and the variation is not consistent overtime. The histogram is right-skewed, so the distribution is not normal. In conclusion, the residuals are not uncorrelated and not normally distributed.
# part e - from text
fc <- fit %>%
forecast(new_data = anti_join(myseries, myseries_train))
fc %>%
autoplot(myseries)
# part f - from text
fit %>%
accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE() Training 73.9 88.3 75.1 6.07 6.13 1 1 0.631
fc %>%
accuracy(myseries)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE() Test 160. 180. 160. 6.03 6.03 2.13 2.04 0.569
The RMSE more than doubles from training to testing, which shows that the model is not very accurate. The MASE being greater than 1 indicates that this model performs worse than a naive model.
Part g. How sensitive are the accuracy measures to the amount of training data used?:
The RMSE, MAE, and MASE nearly doubled from the training to testing set, which means that these measures are very sensitive to the amount of training data used. The model’s performance declines when it is applied to unseen data.