5.1

Australian Population

aus_pop = global_economy%>%
  filter(Country == 'Australia' & Year >= 2000)%>%
  select(Population)

aus_pop%>%
  model(RW(Population ~ drift()))%>%
  forecast()%>%
  autoplot(aus_pop)+
  labs(title = 'Australian Population Forecast')

Bricks

aus_brick = aus_production%>%
  na.omit()%>%
  select(Bricks)

aus_brick%>%
  model(NAIVE(Bricks))%>%
  forecast()%>%
  autoplot(aus_brick)+
  labs(title = 'Australian Brick Production Forecast')

NSW Lambs

nsw_lambs = aus_livestock%>%
  filter(Animal == 'Lambs' & State == 'New South Wales')%>%
  select(Count)

nsw_lambs%>%
  model(SNAIVE(Count))%>%
  forecast()%>%
  autoplot(nsw_lambs)+
  labs(title = 'Lamb Slaughter Count in New South Wales')

Household Wealth

usa_wealth = hh_budget%>%
  filter(Country == 'USA')%>%
  select(Wealth)

usa_wealth%>%
  model(NAIVE(Wealth))%>%
  forecast()%>%
  autoplot(usa_wealth)+
  labs(title = 'Forecast of Wealth in the US')

Australian takeaway food turnover

aus_turnover = aus_retail%>%
  filter(Industry == 'Takeaway food services' & State == 'South Australia' )%>%
  select(Turnover)

aus_turnover%>%
  model(SNAIVE(Turnover))%>%
  forecast()%>%
  autoplot(aus_turnover)+
  labs(title = 'South Australian Takeaway Food Turnover')

5.2

a)

fb = gafa_stock%>%
  filter(Symbol == 'FB')%>%
  update_tsibble(regular=TRUE)%>%
  fill_gaps()%>%
  select(Open)

# plot using stock open price
fb%>%
  ggplot(aes(x = Date, y = Open))+
  geom_line()+
  labs(title = 'Facebook Stock Open Price')

b)

fb_forecast = fb%>%
  model(RW(Open~drift()))%>%
  forecast(h=50)
fb_forecast

## # A fable: 50 x 4 [1D]
## # Key:     .model [1]
##    .model             Date              Open .mean
##    <chr>              <date>          <dist> <dbl>
##  1 RW(Open ~ drift()) 2019-01-01 N(135, 6.5)  135.
##  2 RW(Open ~ drift()) 2019-01-02  N(135, 13)  135.
##  3 RW(Open ~ drift()) 2019-01-03  N(135, 20)  135.
##  4 RW(Open ~ drift()) 2019-01-04  N(135, 26)  135.
##  5 RW(Open ~ drift()) 2019-01-05  N(135, 33)  135.
##  6 RW(Open ~ drift()) 2019-01-06  N(135, 39)  135.
##  7 RW(Open ~ drift()) 2019-01-07  N(135, 46)  135.
##  8 RW(Open ~ drift()) 2019-01-08  N(135, 53)  135.
##  9 RW(Open ~ drift()) 2019-01-09  N(135, 59)  135.
## 10 RW(Open ~ drift()) 2019-01-10  N(135, 66)  135.
## # ... with 40 more rows

fb_forecast%>%
  autoplot(fb)+
  labs(title = 'Facebook Stock Price Forecast')

c)

end = dim(fb)[1]

# first point 
x_start = fb[1,]$Date
y_start = fb[1,]$Open

# last point
x_end = fb[end,]$Date
y_end = fb[end,]$Open

fb%>%
  model(RW(Open~drift()))%>%
  forecast(h=50)%>%
  autoplot(fb, level=NULL)+
  geom_segment(aes(x = x_start,
                   y = y_start,
                   xend = x_end,
                   yend = y_end))+
  labs(title = 'Facebook Stock Price Forecast')

d)

I think the drift method is the best at forecasting FB stock because it is able to capture the underlying uptrend.

fb%>%
  model(NAIVE(Open))%>%
  forecast(h=50)%>%
  autoplot(fb)

fb%>%
  model(SNAIVE(Open~lag()))%>%
  forecast(h=50)%>%
  autoplot(fb)

## Warning: Removed 2 row(s) containing missing values (geom_path).

fb%>%
  model(MEAN(Open))%>%
  forecast(h=50)%>%
  autoplot(fb)

5.3

From the histogram, we see that the distribution of residuals is almost normal with a mean near 0. This means the SNAIVE model is a good fit as deviations from the model are not correlated

# Extract data of interest
recent_production <- aus_production %>%
  filter(year(Quarter) >= 1992)

# Define and estimate a model
fit <- recent_production %>% model(SNAIVE(Beer))
# Look at the residuals
fit %>% gg_tsresiduals()

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast() %>% autoplot(recent_production)

5.4

Australian Exports

As there seems to be no seasonal trends, a NAIVE forecast should work better than SNAIVE for the dataset. Using the NAIVE method, we see that the residuals are normal and not correlated which signal a good fit

aus_export = global_economy%>%
  filter(Country == 'Australia')%>%
  select(Exports)

aus_export%>%
  autoplot(Exports)

export_model = aus_export%>%
  model(NAIVE(Exports))
  
export_model%>%
  gg_tsresiduals()

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

export_model%>%
  forecast()%>%
  autoplot(aus_export)

Brick Production

Looking at the residuals, the NAIVE model seems to be a better fit the australian bricks than the SNAIVE model. This is despite the fact that there seems to be seasonal changes in the original plot

aus_brick%>%
  autoplot(Bricks)

brick_model = aus_brick%>%
  model(NAIVE(Bricks))

brick_model%>%
  gg_tsresiduals()

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

brick_model%>%
  forecast()%>%
  autoplot(aus_brick)

5.7

a)

set.seed(8009)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries_train = myseries %>%
  filter(year(Month) < 2011)

head(myseries_train)

## # A tsibble: 6 x 5 [1M]
## # Key:       State, Industry [1]
##   State             Industry                       `Series ID`    Month Turnover
##   <chr>             <chr>                          <chr>          <mth>    <dbl>
## 1 Western Australia Clothing, footwear and person~ A3349825J   1982 Apr     28.8
## 2 Western Australia Clothing, footwear and person~ A3349825J   1982 May     32.1
## 3 Western Australia Clothing, footwear and person~ A3349825J   1982 Jun     28.5
## 4 Western Australia Clothing, footwear and person~ A3349825J   1982 Jul     29  
## 5 Western Australia Clothing, footwear and person~ A3349825J   1982 Aug     25.3
## 6 Western Australia Clothing, footwear and person~ A3349825J   1982 Sep     26.9

b)

Red plot shows less than 2011 filter worked

autoplot(myseries, Turnover)+
  autolayer(myseries_train, Turnover, colour = "red")

c)

turnover_model = myseries_train %>%
  model(SNAIVE(Turnover))

d)

The residuals are normal with a mean of 0. The variance between residuals seem to have grown larger with time

turnover_model%>%
  gg_tsresiduals()

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

e)

model is able to somewhat accurately capture the future trend

turnover_forecast = turnover_model%>%
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

turnover_forecast%>%
  autoplot(myseries)

f)

turnover_model %>% accuracy()

## # A tibble: 1 x 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Western~ Clothin~ SNAIV~ Trai~  4.57  11.9  8.93  5.29  9.64     1     1 0.771

turnover_forecast %>% accuracy(myseries)

## # A tibble: 1 x 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T~ West~ Clothin~ Test   4.31  11.8  9.25  2.00  5.55  1.04 0.992 0.392

g)

by using less data in our training model, our forecast is less accurate based on all accuracy metrics

myseries_train2 = myseries %>%
  filter(year(Month) < 2000)

turnover_forecast2 = myseries_train2%>%
  model(SNAIVE(Turnover))%>%
  forecast(new_data = anti_join(myseries, myseries_train2))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

turnover_forecast2%>%
  accuracy(myseries)

## # A tibble: 1 x 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T~ West~ Clothin~ Test   60.8  66.9  60.8  39.0  39.0  9.84  8.86 0.710

turnover_forecast2%>%
  autoplot(myseries)

HW 2 3.3

I attempted to perform box-cox on the dataset from HW 2 3.3 by splitting the time series into two graphs (Increasing and Decreasing) The results are much better than trying to perform the time series on the entire plot itself

get_lambda = function(data, feature){
  data%>%
    features(data[feature],features = guerrero)%>%
    pull(lambda_guerrero)%>%
    return()
}


under_1980 = canadian_gas%>%
  filter_index('1960 Jan' ~ '1980 Jan')

under_1980%>%
  autoplot(box_cox(x = Volume,
                   lambda = get_lambda(under_1980,'Volume')))

over_1980 = canadian_gas%>%
  filter_index('1980 Jan' ~ '2005 Feb')

over_1980%>%
  autoplot(box_cox(x = Volume,
                   lambda = get_lambda(over_1980,'Volume')))

data624_hw3

Richard

2/25/2022

5.1