Since the trend of Australian population is steadily upward I will use a drift method in order to account for the forecast increase over time.
aus <- global_economy%>%
filter(Country=="Australia")
aus %>% model(RW(Population ~ drift()))%>%
forecast() %>%
autoplot(aus)+
labs(title = "Forecast of Australian Population")
### Bricks (aus_production)
Since Australian Brick Production seems to have some regular seasonality we will use the SNAIVE model
aus_production %>%
model(
classical_decomposition(Bricks, type = "multiplicative")
) %>%
components() %>%
autoplot() +
labs(title = "Classical multiplicative decomposition of Australian Brick Production")
## Warning: Removed 22 row(s) containing missing values (geom_path).
ausp<-aus_production %>% drop_na(Bricks)
ausp%>% model(SNAIVE(Bricks))%>%
forecast() %>%
autoplot(ausp)+
labs(title = "Forecast of Australian Brick Production")
### NSW Lambs (aus_livestock)
The Count of NSW Lambs Slaughtered seems to have some regular seasonality we will use the SNAIVE model
ausl<-aus_livestock %>%
filter(Animal == 'Lambs' & State == 'New South Wales')
ausl %>%
model(
classical_decomposition(Count, type = "multiplicative")
) %>%
components() %>%
autoplot() +
labs(title = "Classical multiplicative decomposition of NSW Lambs Slaughtered")
## Warning: Removed 6 row(s) containing missing values (geom_path).
#autoplot(aus_production,Bricks)
ausl %>% model(SNAIVE(Count))%>%
forecast() %>%
autoplot(ausl)+
labs(title = "Forecast of Lambs Slaughtered in New South Wales")
### Household wealth (hh_budget).
With what appears to be limited seasonality and an inconsistent trend across countries we’ll use NAIVE
autoplot(hh_budget,Wealth)
hh_budget%>%
model(NAIVE(Wealth))%>%
forecast() %>%
autoplot(hh_budget)+
labs(title = "Forecast of Household Wealth")
### Australian takeaway food turnover (aus_retail)
With a consistent trend upward over time we will use the drift method.
ausr<-aus_retail%>%filter(Industry=="Takeaway food services")
autoplot(ausr,Turnover)
ausr %>%filter(State=="Australian Capital Territory")%>% model(RW(Turnover ~ drift()))%>%
forecast() %>%
autoplot(ausr)+
labs(title = "Forecast of Australian Capital Territory Takeaway Food Services")
ausr %>%filter(State=="New South Wales")%>% model(RW(Turnover ~ drift()))%>%
forecast() %>%
autoplot(ausr)+
labs(title = "Forecast of New South Wales Takeaway Food Services")
ausr %>%filter(State=="Northern Territory")%>% model(RW(Turnover ~ drift()))%>%
forecast() %>%
autoplot(ausr)+
labs(title = "Forecast of Northern Territory Takeaway Food Services")
ausr %>%filter(State=="Queensland")%>% model(RW(Turnover ~ drift()))%>%
forecast() %>%
autoplot(ausr)+
labs(title = "Forecast of Queensland Takeaway Food Services")
ausr %>%filter(State=="South Australia")%>% model(RW(Turnover ~ drift()))%>%
forecast() %>%
autoplot(ausr)+
labs(title = "Forecast of South Australia Takeaway Food Services")
ausr %>%filter(State=="Tasmania")%>% model(RW(Turnover ~ drift()))%>%
forecast() %>%
autoplot(ausr)+
labs(title = "Forecast of Tasmania Takeaway Food Services")
ausr %>%filter(State=="Victoria")%>% model(RW(Turnover ~ drift()))%>%
forecast() %>%
autoplot(ausr)+
labs(title = "Forecast of Victoria Takeaway Food Services")
ausr %>%filter(State=="Western Australia")%>% model(RW(Turnover ~ drift()))%>%
forecast() %>%
autoplot(ausr)+
labs(title = "Forecast of Western Australia Takeaway Food Services")
fb<-gafa_stock%>% filter(Symbol=="FB")
autoplot(fb,Close)+
labs(title = "Closing Facebook Stock Price",y="Closing Price USD$",x="Date")
### b. Produce forecasts using the drift method and plot them.
fb<-tsibble::update_tsibble(fb, regular = TRUE)
fb %>% tsibble::fill_gaps() %>%model(RW(Close ~ drift()))%>%
forecast(h=100) %>%
autoplot(fb)+
labs(title = "Forecast of Facebook Closing Stock Price")
### c. Show that the forecasts are identical to extending the line drawn between the first and last observations.
fb %>% tsibble::fill_gaps() %>%model(RW(Close ~ drift()))%>%
forecast(h=100) %>%
autoplot(fb)+
geom_segment(
aes(
x = Date[1],y = Close[1],xend = Date[length(Date)],yend = Close[length(Close)]),
color = 'red'
)+
labs(title = "Forecast of Facebook Closing Stock Price")
### d. Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?
I think NAIVE is best because it seems to be close to the last Closing price available. Additionally, it typically performs well with economic data. However since stock prices are volitile it may be hard to predict. In the very short term it is likely that a drift method that looks back at a shorter amount of time would be the best forecast model.
fb %>% tsibble::fill_gaps() %>%model(NAIVE(Close))%>%
forecast(h=100) %>%
autoplot(fb)+
labs(title = "Naive Forecast of Facebook Closing Stock Price")
fb %>% tsibble::fill_gaps() %>%model(SNAIVE(Close))%>%
forecast(h=100) %>%
autoplot(fb)+
labs(title = "Seasonal Naive Forecast of Facebook Closing Stock Price")
## Warning: Removed 1 row(s) containing missing values (geom_path).
fb %>% tsibble::fill_gaps() %>%model(MEAN(Close))%>%
forecast(h=100) %>%
autoplot(fb)+
labs(title = "Mean Forecast of Facebook Closing Stock Price")
The residuals have a close to normal distribution but they seem to have bimodal peaks at the center of the distribution making it not quite normal. However, the mean of the residuals seems to be extremely close to 0 and the residuals seem to be pretty consistent over time with an outlier in the late 90s meaning residual variance appears constant. Due to the lack of normal distribution for the residuals SNAIVE models may be accurate for this data but models that assume normal distribution may not.
recent_production <- aus_production %>%
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production %>% model(SNAIVE(Beer))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
# Look a some forecasts
fit %>% forecast() %>% autoplot(recent_production)
Based on residuals I would go with NAIVE for the exports and SNAIVE for bricks.
ause<-global_economy%>%filter(Country=="Australia")
# Define and estimate a model
fit <- ause %>% model(NAIVE(Exports))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
# Look a some forecasts
fit %>% forecast(h=20) %>% autoplot(ause)
# Define and estimate a model
fit <- aus_production %>% drop_na()%>% model(NAIVE(Bricks, na.rm = TRUE))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
# Look a some forecasts
fit %>% forecast(h=20) %>% autoplot(aus_production, na.rm = TRUE)
## Warning: Removed 20 row(s) containing missing values (geom_path).
# Define and estimate a model
fit <- aus_production %>% drop_na() %>% model(SNAIVE(Bricks, na.rm = TRUE))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
# Look a some forecasts
fit %>% forecast(h=20) %>% autoplot(aus_production, na.rm = TRUE)
## Warning: Removed 20 row(s) containing missing values (geom_path).
set.seed(19865)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries_train <- myseries %>%
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
fit <- myseries_train %>%
model(SNAIVE(Turnover))
fit %>% gg_tsresiduals()
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
fc <- fit %>%
forecast(new_data = anti_join(myseries, myseries_train))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
fc %>% autoplot(myseries)
fit %>% accuracy()
## # A tibble: 1 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South… Other re… SNAIVE… Trai… 6.31 9.50 7.49 5.84 6.97 1 1 0.605
fc %>% accuracy(myseries)
## # A tibble: 1 x 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(… South… Other r… Test 21.8 33.6 25.6 8.21 10.1 3.42 3.53 0.907
We can clearly see from the plots and accuracy measures such as RMSE below that the more data used the more accurate the prediction will be.
myseries_train2 <- myseries %>%
filter(year(Month) < 2001)
myseries_train3 <- myseries %>%
filter(year(Month) < 1991)
fit2 <- myseries_train2 %>%
model(SNAIVE(Turnover))
fit3 <- myseries_train3 %>%
model(SNAIVE(Turnover))
fc2 <- fit2 %>%
forecast(new_data = anti_join(myseries, myseries_train2))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
fc2 %>% autoplot(myseries)
fc3 <- fit3 %>%
forecast(new_data = anti_join(myseries, myseries_train3))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
fc3 %>% autoplot(myseries)
fit %>% accuracy()
## # A tibble: 1 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South… Other re… SNAIVE… Trai… 6.31 9.50 7.49 5.84 6.97 1 1 0.605
fc %>% accuracy(myseries)
## # A tibble: 1 x 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(… South… Other r… Test 21.8 33.6 25.6 8.21 10.1 3.42 3.53 0.907
fit2 %>% accuracy()
## # A tibble: 1 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South… Other re… SNAIVE… Trai… 5.10 7.93 6.3 6.39 7.80 1 1 0.684
fc2 %>% accuracy(myseries)
## # A tibble: 1 x 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(… South… Other r… Test 75.0 83.6 75.0 34.9 34.9 11.9 10.5 0.940
fit3 %>% accuracy()
## # A tibble: 1 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South… Other re… SNAIVE… Trai… 3.87 4.92 4.00 7.07 7.33 1 1 0.356
fc3 %>% accuracy(myseries)
## # A tibble: 1 x 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(… South… Other r… Test 98.1 115. 98.3 52.2 52.3 24.6 23.4 0.962
```