Data624 Time Series Features

Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate for each case:

Australian Population (global_economy) BRICKS (aus_production) NSW Lambs (aus_livestock) Household wealth (hh_budget) Australian takeaway food turnover (aus_retail)

# I used a drift forecase for the global_economy / australia dataset. I also compared the drift forecast to a linear model forecast, just to kind of eyeball what is different. The most obvious difference is the clear aesthetics of the tsibble dataset which let's you see that we are working with a time series - the clear separation from historical data and projection. The other obvious difference is the tsibble data goes into the future with h = 10 whereas in the ggplot that would be out-of-sample.

global_economy <- tsibbledata::global_economy

global_economy_timeseries <- global_economy %>%
  filter(Code == 'AUS') %>% # this is just the Australian population
  model(Drift = RW(Population ~ drift())) %>% 
  forecast(h = 10) %>% 
  autoplot(global_economy) +
  labs(
    title = 'Population in Australia',
    subtitle = 'Forecast'
  )

global_economy <- as.tibble(global_economy) %>%
  filter(Code == 'AUS')

## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.

global_economy_ggplot <- ggplot(global_economy, aes(x = Year, y = Population)) + 
  geom_point() +
  geom_smooth(method = 'lm', se = TRUE, color = 'red') +
  geom_abline(slope = 251271.2, intercept = 10276477, color = 'blue') + # not sure why this line isn't showing up
  labs(title = 'Population in Australia',
    subtitle = 'Forecast')

global_economy_timeseries

global_economy_ggplot

## `geom_smooth()` using formula 'y ~ x'

( max(global_economy$Population[global_economy$Code == "AUS"]) - min(global_economy$Population[global_economy$Code == "AUS"]) ) /
(max(global_economy$Year) - min(global_economy$Year))

## [1] 251271.2

############
  
# For Australia production I tried both a naive and seasonal naive and put them side-by-side.

aus_production <- tsibbledata::aus_production

aus_production_naive <- aus_production %>% 
  drop_na(Bricks) %>% 
  model(naive = NAIVE(Bricks)) %>% 
  forecast(h = 20) %>% 
  autoplot(aus_production) +
  labs(title = 'Australia Production',
    subtitle = 'Naive')


aus_production_snaive  <- aus_production %>% 
  drop_na(Bricks) %>% 
  model(snaive = SNAIVE(Bricks)) %>% 
  forecast(h = 20) %>% 
  autoplot(aus_production) +
    labs(title = 'Australia Production',
    subtitle = 'Seasonal Naive')

ggarrange(aus_production_naive, aus_production_snaive)

## Warning: Removed 20 row(s) containing missing values (geom_path).

## Warning: Removed 20 row(s) containing missing values (geom_path).

aus_livestock <- tsibbledata::aus_livestock

aus_livestock %>%
  filter(Animal == 'Cows and heifers' & State == 'New South Wales') %>%
  model(drift = RW(Count ~ drift())) %>% 
  forecast(h = 30) %>% 
  autoplot(aus_livestock) +
      labs(title = 'Australia Livestock',
    subtitle = 'Drift Forecast Method')

# hh_budget

hh_budget <- tsibbledata::hh_budget

hh_budget %>%
  model(drift = RW(Wealth ~ drift())) %>% 
  forecast(h = 15) %>% 
  autoplot(hh_budget) + 
  labs(title = 'Wealth')

# we are not super confident about the level of US debt going into the future - i guess the historical data is up and down
# just for the sake of it I try plotting the debt for the next 100 years just to see what the forecasting gives me - it's a huge confidence interval for the US   O_o


hh_budget %>%
  model(drift = RW(Debt ~ drift())) %>% 
  forecast(h = 200) %>% 
  autoplot(hh_budget) +
        labs(title = 'Debt',
             subtitle = 'Wild Projection Next 200 years')

## aus_retail

aus_retail <- tsibbledata::aus_retail

aus_retail %>%
  filter(Industry == "Takeaway food services" & State == "New South Wales") %>%
  model(drift = RW(Turnover ~ drift())) %>% 
  forecast(h = 25) %>% 
  autoplot(aus_retail) +
      labs(title = 'Australia Retail',
    subtitle = 'New South Wales')

Use the Facebook stock price (data set gafa_stock) to do the following:

Produce a time plot of the series.
Produce forecasts using the drift method and plot them.
Show that the forecasts are identical to extending the line drawn between the first and last observations.
Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

unique(gafa_stock$Symbol) # choose FB

## [1] "AAPL" "AMZN" "FB"   "GOOG"

facebook_stock <- 
  gafa_stock %>% 
  filter(Symbol == 'FB')

head(time(facebook_stock$Date))

## [1] 16072 16073 16076 16077 16078 16079

facebook_stock %>% autoplot(Close) + 
  geom_vline(xintercept = 17735, color = 'red', linetype = 'dashed')

facebook_stock <- facebook_stock %>%
  mutate(day = row_number()) %>% # missing stock days we have to re-index
  update_tsibble(index = day, regular = TRUE)

class(facebook_stock)

## [1] "tbl_ts"     "tbl_df"     "tbl"        "data.frame"

54.71 # Close on first day

## [1] 54.71

131.09 # Close on last day

## [1] 131.09

(131.09-54.71) / length(facebook_stock$Date)

## [1] 0.06071542

facebook_stock %>% 
  model(drift = RW(Close ~ drift())) %>%
  forecast(h = 30) %>%
  autoplot(facebook_stock) +
  geom_abline(aes(slope = 0.06071542, intercept = 54.71), color = 'red', linetype = 'dotted') +
    labs(title = 'Facebook',
    subtitle = 'Stock')

######################################################

train <- facebook_stock %>%
  filter_index(0 ~ 1095)
# Fit the models
fb_fit <- train %>%
  model(
    Mean = MEAN(Close),
    `Naïve` = NAIVE(Close),
    `Seasonal naïve` = SNAIVE(Close)
  )

## Warning: 1 error encountered for Seasonal naïve
## [1] Non-seasonal model specification provided, use RW() or provide a different lag specification.

# Generate forecasts 
fc <- fb_fit %>% forecast(h = 200)
# Plot forecasts against actual values
fc %>%
  autoplot(train, level = NULL) +
  autolayer(
    filter_index(facebook_stock, '1096' ~ .),
    colour = "black"
  )

## Plot variable not specified, automatically selected `.vars = Open`

## Warning: Removed 200 row(s) containing missing values (geom_path).

  labs(
    title = "FB Forecasts"
  )

## $title
## [1] "FB Forecasts"
## 
## attr(,"class")
## [1] "labels"

Apply a seasonal naive method to the quarterly Australian beer production from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

seasonal_naive_method <- aus_production %>% model(SNAIVE(Beer))

seasonal_naive_method %>% gg_tsresiduals() +
        labs(title = 'Australian Beer Production',
    subtitle = 'Seasonal Naive Method - Examining Residuals')

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

seasonal_naive_method %>% forecast() %>% autoplot(aus_production)

Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

global_economy <- tsibbledata::global_economy

global_economy <- global_economy %>%
  filter(Code == "AUS")



naive_method <- global_economy %>% model(NAIVE(Exports)) 

naive_method %>% gg_tsresiduals() +
        labs(title = 'Australian Economy',
    subtitle = 'Naive Method - Examining Residuals')

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

naive_method %>% forecast() %>% autoplot(global_economy) +
          labs(title = 'Australian Economy',
    subtitle = 'Naive Method - Forecast')

Data624 Time Series Features

Josef

9/25/2021