Homework #3

Questions

Exercise 1

AUSpop <- global_economy %>%
  select(Country, Year, Population) %>%
  filter(Country == "Australia")

AUSpoppl <- AUSpop %>%
  autoplot(Population/1e3) +
  labs(y = "Population (Thousands)" )
AUSpoppl

AUSgepl <- global_economy %>% 
  filter(Country == "Australia") %>%
  model(RW(Population ~ drift())) %>%
  forecast(h = 15) %>%
  autoplot(global_economy) +
  labs(title = "Australia Population")

AUSgepl

#drift

#1 pt 2 

AUSbricks <- aus_production %>%
  select(Quarter, Bricks)
autoplot(AUSbricks)

## Plot variable not specified, automatically selected `.vars = Bricks`

## Warning: Removed 20 row(s) containing missing values (geom_path).

AUSbrickpl <- aus_production %>% 
  filter(!is.na(Bricks)) %>%
  model(SNAIVE(Bricks)) %>%
  forecast(h = 15) %>%
  autoplot(aus_production) +
  labs(title = "Australian Bricks")

AUSbrickpl

## Warning: Removed 20 row(s) containing missing values (geom_path).

#SNAIVE

#1 pt 3

AUSlamb <- aus_livestock %>%
  filter(Animal == "Lambs") %>%
  filter(State == "New South Wales")
autoplot(AUSlamb)

## Plot variable not specified, automatically selected `.vars = Count`

AUSlambpl <- aus_livestock %>%
  filter(State == "New South Wales", Animal == "Lambs") %>%
  model(NAIVE(Count)) %>%
  forecast(h = 15) %>%
  autoplot(aus_livestock) +
  labs(title = "Lambs in New South Wales")

AUSlambpl

#NAIVE

#1 pt 4

HHAUS <- hh_budget %>%
  select(Country, Year, Wealth) %>%
  filter(Country == "Australia")
autoplot(HHAUS)

## Plot variable not specified, automatically selected `.vars = Wealth`

HHCAD <- hh_budget %>%
  select(Country, Year, Wealth) %>%
  filter(Country == "Canada")
autoplot(HHCAD)

## Plot variable not specified, automatically selected `.vars = Wealth`

HHUSA <- hh_budget %>%
  select(Country, Year, Wealth) %>%
  filter(Country == "USA")
autoplot(HHUSA)

## Plot variable not specified, automatically selected `.vars = Wealth`

HHJAP <- hh_budget %>%
  select(Country, Year, Wealth) %>%
  filter(Country == "Japan")
autoplot(HHJAP)

## Plot variable not specified, automatically selected `.vars = Wealth`

HHwealpl <- hh_budget %>%
  model(RW(Wealth ~ drift())) %>%
  forecast(h = 15) %>%
  autoplot(hh_budget) +
  labs(title = "Household Wealth")

HHwealpl

#Drift

#1 pt 5

FoodTO <- aus_retail %>%
  select(Month, State, Industry, Turnover) %>%
  filter(Industry == "Takeaway food services")
autoplot(FoodTO)

## Plot variable not specified, automatically selected `.vars = Turnover`

AUStakeoutpl <- aus_retail %>%
  filter(Industry == "Takeaway food services") %>%
  model(RW(Turnover ~ drift())) %>%
  forecast(h = 15) %>%
  autoplot(aus_retail) +
  labs(title = "Australian Takeaway Food Turnover") +
  facet_wrap(~State, scales = "free")

AUStakeoutpl

#Drift

Exercise 2

FBClose <- gafa_stock %>%
  filter(Symbol == "FB") %>%
  mutate(day = row_number()) %>%
  update_tsibble(index = day, regular = TRUE)

FBClosepl <- FBClose %>%
  autoplot(Close) +
  labs(title= "Close Prices of Facebook", x = "Days", y = "$$$")

FBClosepl

#2 pt 2

FBdrift <- FBClose %>%
  model(RW(Close ~ drift())) %>%
  forecast(h = 150) %>%
  autoplot(FBClose) +
  labs(title = "Close Prices of Facebook", x = "Days", y = "$$$")

FBdrift

#2 pt 3

FBdriftwline <- FBClose %>%
  model(RW(Close ~ drift())) %>%
  forecast(h = 150) %>%
  autoplot(FBClose) +
  geom_line(data = slice(FBClose, range(cumsum(!is.na(Close)))), aes(y=Close), linetype = 'dashed') +
  labs(title = "Close Prices of Facebook", x = "Days", y = "$$$")

FBdriftwline

#2 pt 4

FBall <- FBClose %>%
  model(Mean = MEAN(Close),
        Naive = NAIVE(Close),
        Drift = NAIVE(Close ~ drift())) %>%
  forecast(h = 150) %>%
  autoplot(FBClose) +
  labs(title = "Close Prices of Facebook", y = "$$$")
FBall

Drift works the best of the methods. All three of the methods are very similar, but I feel like the drift best captures the upward trend.

Exercise 3

I think this is a good benchmark for this series. The data is very seasonal so this model is a very good choice.

Exercise 4

1 True (Classical assumption) 2 False (This isn’t always correct. If the residuals are all very small positive numbers, the mean of the residuals wouldn’t be zero. The mean of the residuals needs to be zero.) 3 False (MAPE doesn’t work well with datasets with a relatively small amount of data) 4 False (Making the model more complicated only makes it harder for you, we will use the classical assumptions to find the issue.) 5 True (The test set is what you use to determine accuracy)

Exercise 5

set.seed(69420)
AUSre <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

#5 pt 2

AUSretrain <- AUSre %>%
  filter(year(Month) < 2011)

autoplot(AUSre, Turnover) +
  autolayer(AUSretrain, Turnover, colour = "blue")

#5 pt 3

AUSfit <- AUSretrain %>%
  model(SNAIVE(Turnover))
AUSfit

## # A mable: 1 x 3
## # Key:     State, Industry [1]
##   State                        Industry                                 SNAIVE…¹
##   <chr>                        <chr>                                     <model>
## 1 Australian Capital Territory Pharmaceutical, cosmetic and toiletry g… <SNAIVE>
## # … with abbreviated variable name ¹`SNAIVE(Turnover)`

#5 pt 4

AUSfit %>% gg_tsresiduals()

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

#5 pt 5

AUStestdata <- AUSfit %>%
  forecast(new_data = anti_join(AUSre, AUSretrain))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

AUStestdata %>% autoplot(AUSre)

#5 pt 6


AUStestcheck <- AUStestdata %>% accuracy(AUSre)

AUStestcheck

## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Tu… Aust… Pharma… Test  0.633  2.91  2.28  1.48  14.6  1.59  1.48 0.884
## # … with abbreviated variable name ¹Industry

Exercise 6

AUGpigs <- aus_livestock %>%
  filter(Animal == "Pigs", State == "New South Wales")
autoplot(AUGpigs)

## Plot variable not specified, automatically selected `.vars = Count`

#6 pt 2

Trainpigs <- AUGpigs %>%
  slice(1:486)
Trainpigs

## # A tsibble: 486 x 4 [1M]
## # Key:       Animal, State [1]
##       Month Animal State            Count
##       <mth> <fct>  <fct>            <dbl>
##  1 1972 Jul Pigs   New South Wales  97400
##  2 1972 Aug Pigs   New South Wales 114700
##  3 1972 Sep Pigs   New South Wales 109900
##  4 1972 Oct Pigs   New South Wales 108300
##  5 1972 Nov Pigs   New South Wales 122200
##  6 1972 Dec Pigs   New South Wales 106900
##  7 1973 Jan Pigs   New South Wales  96600
##  8 1973 Feb Pigs   New South Wales  96700
##  9 1973 Mar Pigs   New South Wales 121200
## 10 1973 Apr Pigs   New South Wales  99300
## # … with 476 more rows

Testpigs <- AUGpigs %>%
  slice(487:558)
Testpigs

## # A tsibble: 72 x 4 [1M]
## # Key:       Animal, State [1]
##       Month Animal State           Count
##       <mth> <fct>  <fct>           <dbl>
##  1 2013 Jan Pigs   New South Wales 71300
##  2 2013 Feb Pigs   New South Wales 69700
##  3 2013 Mar Pigs   New South Wales 79900
##  4 2013 Apr Pigs   New South Wales 74300
##  5 2013 May Pigs   New South Wales 87200
##  6 2013 Jun Pigs   New South Wales 71200
##  7 2013 Jul Pigs   New South Wales 86200
##  8 2013 Aug Pigs   New South Wales 78000
##  9 2013 Sep Pigs   New South Wales 71200
## 10 2013 Oct Pigs   New South Wales 78200
## # … with 62 more rows

#6 pt 3

Fitpig <- Trainpigs %>%
  model( Mean = MEAN(Count), `Naïve` = NAIVE(Count), `Seasonal naïve` = SNAIVE(Count), Drift = RW(Count ~ drift()))
Pigforecast <- Fitpig %>%
  forecast(h = 72)

Piggyfc <- Pigforecast %>%
  autoplot((AUGpigs),level = NULL) +
  labs(y = "Count", title = "Forecasts Pigs") +
  guides(colour = guide_legend(title = "Forecast"))
Piggyfc

Accpigs <- accuracy(Pigforecast, AUGpigs)
Accpigs

## # A tibble: 4 × 12
##   .model       Animal State .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE
##   <chr>        <fct>  <fct> <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Drift        Pigs   New … Test   -4685.  8091.  6967.  -7.36  10.1 0.657 0.557
## 2 Mean         Pigs   New … Test  -39360. 39894. 39360. -55.9   55.9 3.71  2.75 
## 3 Naïve        Pigs   New … Test   -6138.  8941.  7840.  -9.39  11.4 0.740 0.615
## 4 Seasonal na… Pigs   New … Test   -5838. 10111.  8174.  -8.81  11.9 0.771 0.696
## # … with 1 more variable: ACF1 <dbl>

#6 pt 4

MPigs <- Trainpigs %>%
  model( Mean = MEAN(Count))
NPigs <- Trainpigs %>%
  model( Naive = NAIVE(Count))
SNPigs <- Trainpigs %>%
  model(`Seasonal naïve` = SNAIVE(Count))
DPigs <- Trainpigs %>%
  model( Drift = RW(Count ~ drift()))
MPigs %>% 
  gg_tsresiduals() +
  labs( title = "Mean Method Residuals")

NPigs %>% 
  gg_tsresiduals() +
  labs( title = "Naive Model Residuals")

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

SNPigs %>%
  gg_tsresiduals() +
  labs( title = "Seasonal Naive Residuals")

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

DPigs %>%
  gg_tsresiduals() +
  labs( title = "Drift Method Residuals")

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

The seasonal naive method is the best method for this data. My residuals look very close to white noise. It seems like there may be something causing the residuals to be slightly off from white noise.

Exercise 7

Bteam <- aus_production %>%
  select(Quarter, Bricks) %>%
  slice(1:198)
autoplot(Bteam)

## Plot variable not specified, automatically selected `.vars = Bricks`

Bteam %>%
  model(STL(box_cox(Bricks,0) ~ season(window=9), robust=TRUE)) %>%
  components() %>% autoplot() +
  labs(title = "STL decomposition of bricks multiplictive")

Bteam %>%
  model(STL(box_cox(Bricks,0) ~ season(window="periodic"), robust=TRUE)) %>%
  components() %>% autoplot() +
  labs(title = "STL decomposition of bricks multiplictive (periodic)")

Bteam %>%
  model(STL(box_cox(Bricks,1) ~ season(window=9), robust=TRUE)) %>%
  components() %>% autoplot() +
  labs(title = "STL decomposition of bricks additive")

Bteam %>%
  model(STL(box_cox(Bricks,1) ~ season(window="periodic"), robust=TRUE)) %>%
  components() %>% autoplot() +
  labs(title = "STL decomposition of bricks additive (periodic")

fixedBteam <- Bteam %>%
  model(STL(Bricks ~ trend(window=5) + season(window="periodic"), robust = TRUE)) %>%
  components() %>% autoplot() +
  labs(title = "STL decomposition of bricks (periodic)")
fixedBteam

stl_brick_fixed_st <- stl(Bteam, s.window = "periodic", robust = TRUE)

RegBteam <- Bteam %>%
  model(STL(Bricks ~ season(window=9), robust=TRUE)) %>%
  components() %>% autoplot() +
  labs(title = "STL decomposition of bricks")
RegBteam

stl_brick_changing_st <- stl(Bteam, s.window = 5, robust = TRUE)

#7 pt 2

dcmp <- Bteam %>%
  model(stl = STL(Bricks))
Compdcmp <- components(dcmp)

Compdcmp %>%
  as_tsibble() %>%
  autoplot(Bricks, colour="gray") +
  geom_line(aes(y=trend), colour = "#D55E00") +
  labs( y = "Bricks", title = "Bricks Trend" )

components(dcmp) %>%
  as_tsibble() %>%
  autoplot(Bricks, colour = "gray") +
  geom_line(aes(y=season_adjust), colour = "#0072B2") +
  labs(y = "Bricks", title = "Bricks Seasonally adjusted")

#7 pt 3

AUStrend <- Compdcmp %>%
  select(-c(.model, Bricks, trend, season_year, remainder))

AUStrend %>%
  model(NAIVE(season_adjust)) %>%
  forecast(h = "5 years") %>%
  autoplot(AUStrend) +
  labs(title = "Seasonally adjusted naive forecast", y = "Bricks")

#7 pt 4

AUSwall <- Bteam %>%
  model(stlf = decomposition_model(STL(Bricks ~ trend(window = 4), robust = TRUE), NAIVE(season_adjust)))
AUSwall %>%
  forecast() %>%
  autoplot(Bteam) +
  labs(title = "Decomposition with decomposition_model()")

#7 pt 5

gg_tsresiduals(AUSwall)

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

gg_tsresiduals(dcmp)

#7 pt 6

AUSbrick2year <- aus_production %>%
  filter(year(Quarter) > 2003) %>%
  select(-c(Beer, Tobacco, Cement, Electricity, Gas))

AUSBrickomit <- na.omit(AUSbrick2year)

AUSBrick2year_snaive <- AUSBrickomit %>%
  model(SNAIVE(Bricks ~ lag("1 year"))) %>%
  forecast(h = "2 years") %>%
  autoplot(AUSBrickomit) +
  labs(title = "Forecast with SNAIVE since 2004")

AUSBrick2year_snaive

AUSBrickdcmp2 <- AUSBrickomit %>%
  model(stlf = decomposition_model(STL(Bricks ~ trend(window = 4), robust = TRUE), NAIVE(season_adjust)))
AUSBrickdcmp2 %>%
  forecast() %>%
  autoplot(AUSBrickomit) +
  labs(title = "Forecast with decomposition_model() since 2004")

The one using the decomposition model () was the best for this data. This is true because the Decomposition model accounts for periods in the future being much different from the current periods. The SNAIVE model has slight bias because the periods are very similar from those in the past. The decomposition model uses trend as well.

Exercise 8

tourism

## # A tsibble: 24,320 x 5 [1Q]
## # Key:       Region, State, Purpose [304]
##    Quarter Region   State           Purpose  Trips
##      <qtr> <chr>    <chr>           <chr>    <dbl>
##  1 1998 Q1 Adelaide South Australia Business  135.
##  2 1998 Q2 Adelaide South Australia Business  110.
##  3 1998 Q3 Adelaide South Australia Business  166.
##  4 1998 Q4 Adelaide South Australia Business  127.
##  5 1999 Q1 Adelaide South Australia Business  137.
##  6 1999 Q2 Adelaide South Australia Business  200.
##  7 1999 Q3 Adelaide South Australia Business  169.
##  8 1999 Q4 Adelaide South Australia Business  134.
##  9 2000 Q1 Adelaide South Australia Business  154.
## 10 2000 Q2 Adelaide South Australia Business  169.
## # … with 24,310 more rows

GCtourism <- tourism %>%
  filter(Region == "Gold Coast") %>%
  summarise(Trips = sum(Trips))

gctrain1 <- GCtourism %>%
  filter(year(Quarter) <= 2016)

gctrain2 <- GCtourism %>%
  filter(year(Quarter) <= 2015)

gctrain3 <- GCtourism %>%
  filter(year(Quarter) <= 2014)
 
gcfc1 <- gctrain1 %>%
  model(SNAIVE(Trips ~ lag("year"))) %>%
  forecast(h = "1 years")

gcfc1 %>%
  autoplot(gctrain1, level = NULL) +
  labs(title = "SNAIVE forecast")

gcfc2 <- gctrain2 %>%
  model(SNAIVE(Trips ~ lag("year"))) %>%
  forecast(h = "1 years")
 
gcfc2 %>%
  autoplot(gctrain2, level = NULL) +
  labs(title = "SNAIVE forecast")

gcfc3 <- gctrain3 %>%
  model(SNAIVE(Trips ~ lag("year"))) %>%
  forecast(h = "1 years")

gcfc3 %>%
  autoplot(gctrain3, level = NULL) +
  labs(title = "SNAIVE forecast")

gcfc1 %>%
  accuracy(GCtourism)

## # A tibble: 1 × 10
##   .model                  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>                   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 "SNAIVE(Trips ~ lag(\"… Test   75.1  167.  154.  6.36  15.1  2.66  2.36 -0.410

gcfc2 %>%
  accuracy(GCtourism)

## # A tibble: 1 × 10
##   .model                  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>                   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 "SNAIVE(Trips ~ lag(\"… Test   12.0  43.1  39.5  1.14  4.32 0.670 0.599 -0.792

gcfc3 %>%
  accuracy(GCtourism)

## # A tibble: 1 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(Trips ~ lag(\"y… Test   35.8  91.4  83.9  3.56  9.07  1.46  1.30 0.239

gcfc2 by far had the best MAPE with 4.32, compared to gcfc1 (15.1) and gcfc3 (9.07). A MAPE lower than 5% is “acceptably accurate”.

Exercise 9

Steve <- gafa_stock %>%
  filter(Symbol == "AAPL") %>%
  mutate(trading_day = row_number()) %>%
  update_tsibble(index = trading_day, regular = TRUE)

appstretch <- Steve %>%
  stretch_tsibble(.init = 10, .step = 1) %>%
  relocate(Date, Symbol, .id)

tval <- appstretch %>%
  model(RW(Close ~ drift())) %>%
  forecast(h=1) %>%
  accuracy(Steve)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 1259

tvalmean <- appstretch %>%
  model(MEAN(Close)) %>%
  forecast(h=1) %>%
  accuracy(Steve)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 1259

tval

## # A tibble: 1 × 11
##   .model       Symbol .type      ME  RMSE   MAE     MPE  MAPE  MASE RMSSE   ACF1
##   <chr>        <chr>  <chr>   <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 RW(Close ~ … AAPL   Test  -0.0158  2.10  1.41 -0.0139  1.06  1.00  1.00 0.0330

tvalmean

## # A tibble: 1 × 11
##   .model      Symbol .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>       <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Close) AAPL   Test   26.9  37.6  28.5  16.9  18.6  20.2  17.9 0.996

Exercise 10

prices2 <- prices %>%
  na.omit()
prices2 %>%
  autoplot(wheat)

price_fit <- prices2 %>%
  model(RW(log(wheat) ~ drift()))
price_fc <- price_fit %>%
  forecast(h=50)

price_fc %>% autoplot(prices2) +
  labs(title = "RW Forecast of wheat with Drift")

wheatbs <- price_fit %>% forecast(h=50, bootstrap = TRUE, times=500)
wheatbs

## # A fable: 50 x 4 [1Y]
## # Key:     .model [1]
##    .model                    year          wheat .mean
##    <chr>                    <dbl>         <dist> <dbl>
##  1 RW(log(wheat) ~ drift())  1994 t(sample[500])  113.
##  2 RW(log(wheat) ~ drift())  1995 t(sample[500])  111.
##  3 RW(log(wheat) ~ drift())  1996 t(sample[500])  110.
##  4 RW(log(wheat) ~ drift())  1997 t(sample[500])  110.
##  5 RW(log(wheat) ~ drift())  1998 t(sample[500])  109.
##  6 RW(log(wheat) ~ drift())  1999 t(sample[500])  108.
##  7 RW(log(wheat) ~ drift())  2000 t(sample[500])  107.
##  8 RW(log(wheat) ~ drift())  2001 t(sample[500])  106.
##  9 RW(log(wheat) ~ drift())  2002 t(sample[500])  105.
## 10 RW(log(wheat) ~ drift())  2003 t(sample[500])  105.
## # … with 40 more rows

autoplot(wheatbs, prices2)

bootstrap <- price_fit %>% generate (h=50,times=500, bootstrap=TRUE)
bootstrap

## # A tsibble: 25,000 x 4 [1Y]
## # Key:       .model, .rep [500]
##    .model                    year .rep   .sim
##    <chr>                    <dbl> <chr> <dbl>
##  1 RW(log(wheat) ~ drift())  1994 1      80.1
##  2 RW(log(wheat) ~ drift())  1995 1      74.0
##  3 RW(log(wheat) ~ drift())  1996 1      76.9
##  4 RW(log(wheat) ~ drift())  1997 1      75.0
##  5 RW(log(wheat) ~ drift())  1998 1      75.5
##  6 RW(log(wheat) ~ drift())  1999 1      53.4
##  7 RW(log(wheat) ~ drift())  2000 1      46.4
##  8 RW(log(wheat) ~ drift())  2001 1      45.6
##  9 RW(log(wheat) ~ drift())  2002 1      80.9
## 10 RW(log(wheat) ~ drift())  2003 1      86.3
## # … with 24,990 more rows

prices2 %>%
  ggplot(aes(x = year)) +
  geom_line(aes(y = wheat)) +
  geom_line(aes(y = .sim, color = as.factor(.rep)),
            data = bootstrap) +
  guides(color = "none")

When a normal distribution is an unreasonable assumption, and when residuals are uncorrelated with constant variance.