Data 624 - HW3

1) Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

Australian Population (global_economy)

Since the trend of Australian population is steadily upward I will use a drift method in order to account for the forecast increase over time.

aus <- global_economy%>%
  filter(Country=="Australia")
aus %>% model(RW(Population ~ drift()))%>% 
  forecast() %>% 
  autoplot(aus)+
  labs(title = "Forecast of Australian Population")

### Bricks (aus_production)

Since Australian Brick Production seems to have some regular seasonality we will use the SNAIVE model

aus_production %>%
  model(
    classical_decomposition(Bricks, type = "multiplicative")
  ) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical multiplicative decomposition of Australian Brick Production")

## Warning: Removed 22 row(s) containing missing values (geom_path).

ausp<-aus_production %>% drop_na(Bricks)
ausp%>% model(SNAIVE(Bricks))%>% 
  forecast() %>% 
  autoplot(ausp)+
  labs(title = "Forecast of Australian Brick Production")

### NSW Lambs (aus_livestock)

The Count of NSW Lambs Slaughtered seems to have some regular seasonality we will use the SNAIVE model

ausl<-aus_livestock %>% 
  filter(Animal == 'Lambs' & State == 'New South Wales')


ausl %>%
  model(
    classical_decomposition(Count, type = "multiplicative")
  ) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical multiplicative decomposition of NSW Lambs Slaughtered")

## Warning: Removed 6 row(s) containing missing values (geom_path).

#autoplot(aus_production,Bricks)

ausl %>% model(SNAIVE(Count))%>% 
  forecast() %>% 
  autoplot(ausl)+
  labs(title = "Forecast of Lambs Slaughtered in New South Wales")

### Household wealth (hh_budget).

With what appears to be limited seasonality and an inconsistent trend across countries we’ll use NAIVE

autoplot(hh_budget,Wealth)

hh_budget%>%
 model(NAIVE(Wealth))%>% 
  forecast() %>% 
  autoplot(hh_budget)+
  labs(title = "Forecast of Household Wealth")

### Australian takeaway food turnover (aus_retail)

With a consistent trend upward over time we will use the drift method.

ausr<-aus_retail%>%filter(Industry=="Takeaway food services")

autoplot(ausr,Turnover)

ausr %>%filter(State=="Australian Capital Territory")%>% model(RW(Turnover ~ drift()))%>% 
  forecast() %>% 
  autoplot(ausr)+
  labs(title = "Forecast of Australian Capital Territory Takeaway Food Services")

ausr %>%filter(State=="New South Wales")%>% model(RW(Turnover ~ drift()))%>% 
  forecast() %>% 
  autoplot(ausr)+
  labs(title = "Forecast of New South Wales Takeaway Food Services")

ausr %>%filter(State=="Northern Territory")%>% model(RW(Turnover ~ drift()))%>% 
  forecast() %>% 
  autoplot(ausr)+
  labs(title = "Forecast of Northern Territory Takeaway Food Services")

ausr %>%filter(State=="Queensland")%>% model(RW(Turnover ~ drift()))%>% 
  forecast() %>% 
  autoplot(ausr)+
  labs(title = "Forecast of Queensland Takeaway Food Services")

ausr %>%filter(State=="South Australia")%>% model(RW(Turnover ~ drift()))%>% 
  forecast() %>% 
  autoplot(ausr)+
  labs(title = "Forecast of South Australia Takeaway Food Services")

ausr %>%filter(State=="Tasmania")%>% model(RW(Turnover ~ drift()))%>% 
  forecast() %>% 
  autoplot(ausr)+
  labs(title = "Forecast of Tasmania Takeaway Food Services")

ausr %>%filter(State=="Victoria")%>% model(RW(Turnover ~ drift()))%>% 
  forecast() %>% 
  autoplot(ausr)+
  labs(title = "Forecast of Victoria Takeaway Food Services")

ausr %>%filter(State=="Western Australia")%>% model(RW(Turnover ~ drift()))%>% 
  forecast() %>% 
  autoplot(ausr)+
  labs(title = "Forecast of Western Australia Takeaway Food Services")

2) Use the Facebook stock price (data set gafa_stock) to do the following:

a. Produce a time plot of the series.

fb<-gafa_stock%>% filter(Symbol=="FB")

autoplot(fb,Close)+
  labs(title = "Closing Facebook Stock Price",y="Closing Price USD$",x="Date")

### b. Produce forecasts using the drift method and plot them.

fb<-tsibble::update_tsibble(fb, regular = TRUE)

fb %>% tsibble::fill_gaps() %>%model(RW(Close ~ drift()))%>% 
  forecast(h=100) %>% 
  autoplot(fb)+
  labs(title = "Forecast of Facebook Closing Stock Price")

### c. Show that the forecasts are identical to extending the line drawn between the first and last observations.

fb %>% tsibble::fill_gaps() %>%model(RW(Close ~ drift()))%>% 
  forecast(h=100) %>% 
  autoplot(fb)+
  geom_segment(
    aes(
      x = Date[1],y = Close[1],xend = Date[length(Date)],yend = Close[length(Close)]),
    color = 'red'
  )+
  labs(title = "Forecast of Facebook Closing Stock Price")

### d. Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

I think NAIVE is best because it seems to be close to the last Closing price available. Additionally, it typically performs well with economic data. However since stock prices are volitile it may be hard to predict. In the very short term it is likely that a drift method that looks back at a shorter amount of time would be the best forecast model.

fb %>% tsibble::fill_gaps() %>%model(NAIVE(Close))%>% 
  forecast(h=100) %>% 
  autoplot(fb)+
  labs(title = "Naive Forecast of Facebook Closing Stock Price")

fb %>% tsibble::fill_gaps() %>%model(SNAIVE(Close))%>% 
  forecast(h=100) %>% 
  autoplot(fb)+
  labs(title = "Seasonal Naive Forecast of Facebook Closing Stock Price")

## Warning: Removed 1 row(s) containing missing values (geom_path).

fb %>% tsibble::fill_gaps() %>%model(MEAN(Close))%>% 
  forecast(h=100) %>% 
  autoplot(fb)+
  labs(title = "Mean Forecast of Facebook Closing Stock Price")

3) Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

The residuals have a close to normal distribution but they seem to have bimodal peaks at the center of the distribution making it not quite normal. However, the mean of the residuals seems to be extremely close to 0 and the residuals seem to be pretty consistent over time with an outlier in the late 90s meaning residual variance appears constant. Due to the lack of normal distribution for the residuals SNAIVE models may be accurate for this data but models that assume normal distribution may not.

recent_production <- aus_production %>%
  filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production %>% model(SNAIVE(Beer))
# Look at the residuals
fit %>% gg_tsresiduals()

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast() %>% autoplot(recent_production)

4) Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

Based on residuals I would go with NAIVE for the exports and SNAIVE for bricks.

ause<-global_economy%>%filter(Country=="Australia")

# Define and estimate a model
fit <- ause %>% model(NAIVE(Exports))
# Look at the residuals
fit %>% gg_tsresiduals()

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast(h=20) %>% autoplot(ause)

# Define and estimate a model
fit <- aus_production %>% drop_na()%>% model(NAIVE(Bricks, na.rm = TRUE))
# Look at the residuals
fit %>% gg_tsresiduals()

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast(h=20) %>% autoplot(aus_production, na.rm = TRUE)

## Warning: Removed 20 row(s) containing missing values (geom_path).

# Define and estimate a model
fit <- aus_production %>% drop_na() %>% model(SNAIVE(Bricks, na.rm = TRUE))
# Look at the residuals
fit %>% gg_tsresiduals()

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast(h=20) %>% autoplot(aus_production, na.rm = TRUE)

## Warning: Removed 20 row(s) containing missing values (geom_path).

7) For your retail time series (from Exercise 8 in Section 2.10):

a. Create a training dataset consisting of observations before 2011 using

set.seed(19865)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

b. Check that your data have been split appropriately by producing the following plot.

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

c. Fit a seasonal naïve model using SNAIVE() applied to your training data

fit <- myseries_train %>%
  model(SNAIVE(Turnover))

d. Check the Residuals.

fit %>% gg_tsresiduals()

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

e. Produce forecasts for the test data

fc <- fit %>%
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

fc %>% autoplot(myseries)

Compare the accuracy of your forecasts against the actual values.

fit %>% accuracy()

## # A tibble: 1 x 12
##   State  Industry  .model  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>     <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South… Other re… SNAIVE… Trai…  6.31  9.50  7.49  5.84  6.97     1     1 0.605

fc %>% accuracy(myseries)

## # A tibble: 1 x 12
##   .model   State  Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>  <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(… South… Other r… Test   21.8  33.6  25.6  8.21  10.1  3.42  3.53 0.907

g. How sensitive are the accuracy measures to the amount of training data used?

We can clearly see from the plots and accuracy measures such as RMSE below that the more data used the more accurate the prediction will be.

myseries_train2 <- myseries %>%
  filter(year(Month) < 2001)

myseries_train3 <- myseries %>%
  filter(year(Month) < 1991)


fit2 <- myseries_train2 %>%
  model(SNAIVE(Turnover))

fit3 <- myseries_train3 %>%
  model(SNAIVE(Turnover))

fc2 <- fit2 %>%
  forecast(new_data = anti_join(myseries, myseries_train2))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

fc2 %>% autoplot(myseries)

fc3 <- fit3 %>%
  forecast(new_data = anti_join(myseries, myseries_train3))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

fc3 %>% autoplot(myseries)

fit %>% accuracy()

## # A tibble: 1 x 12
##   State  Industry  .model  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>     <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South… Other re… SNAIVE… Trai…  6.31  9.50  7.49  5.84  6.97     1     1 0.605

fc %>% accuracy(myseries)

## # A tibble: 1 x 12
##   .model   State  Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>  <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(… South… Other r… Test   21.8  33.6  25.6  8.21  10.1  3.42  3.53 0.907

fit2 %>% accuracy()

## # A tibble: 1 x 12
##   State  Industry  .model  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>     <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South… Other re… SNAIVE… Trai…  5.10  7.93   6.3  6.39  7.80     1     1 0.684

fc2 %>% accuracy(myseries)

## # A tibble: 1 x 12
##   .model   State  Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>  <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(… South… Other r… Test   75.0  83.6  75.0  34.9  34.9  11.9  10.5 0.940

fit3 %>% accuracy()

## # A tibble: 1 x 12
##   State  Industry  .model  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>     <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South… Other re… SNAIVE… Trai…  3.87  4.92  4.00  7.07  7.33     1     1 0.356

fc3 %>% accuracy(myseries)

## # A tibble: 1 x 12
##   .model   State  Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>  <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(… South… Other r… Test   98.1  115.  98.3  52.2  52.3  24.6  23.4 0.962

```