Data624

Libraries

library(tsibble)

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(tsibbledata)
library(ggfortify)

## Loading required package: ggplot2

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(USgas)
library(readr)
library(tidyr)
library(readxl)
library(httr)
library(feasts)

## Loading required package: fabletools

library(stats)
library(fpp3)

## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --

## v tibble    3.1.6     v fable     0.3.1
## v lubridate 1.8.0

## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()     masks base::date()
## x dplyr::filter()       masks stats::filter()
## x tsibble::intersect()  masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag()          masks stats::lag()
## x tsibble::setdiff()    masks base::setdiff()
## x tsibble::union()      masks base::union()

library(seasonal)

## 
## Attaching package: 'seasonal'

## The following object is masked from 'package:tibble':
## 
##     view

Q.8.1a)

pigs_slaughtered = aus_livestock %>% filter(Animal=='Pigs',State=='Victoria')

pigs_fit = pigs_slaughtered %>% model(ETS(Count~error("A") + trend("N") + season("N")))

pig_fc = pigs_fit %>% forecast(h=4)

report(pigs_fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

pig_fc

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                          Month             Count  .mean
##   <fct>  <fct>    <chr>                           <mth>            <dist>  <dbl>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") +~ 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") +~ 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria "ETS(Count ~ error(\"A\") +~ 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria "ETS(Count ~ error(\"A\") +~ 2019 Apr N(95187, 1.1e+08) 95187.

pig_fc %>% autoplot(pigs_slaughtered) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(pigs_fit)) +
  labs(y="Count Slaughter", title="Slaughter Count: Pigs") +
  guides(colour = "none")

Q.8.1b)

Calculation by Hand

yh = pig_fc$.mean[1]

std_dev = sd(augment(pigs_fit)$.resid)

upperbound_95 = yh + (std_dev*1.96)
lowerbound_95 = yh - (std_dev*1.96)

paste0("upper bound (95% C.I): ",upperbound_95,';'," lower bound (95% C.I)): ",lowerbound_95)

## [1] "upper bound (95% C.I): 113502.102384467; lower bound (95% C.I)): 76871.0124775157"

Utilizing package

pig_intervals = pig_fc %>% select('Count') %>% hilo(95)

pig_intervals$`95%`[1]

## <hilo[1]>
## [1] [76854.79, 113518.3]95

# Comparison

pig_intervals$`95%`[1]

## <hilo[1]>
## [1] [76854.79, 113518.3]95

c(lowerbound_95,upperbound_95)

## [1]  76871.01 113502.10

The manual calculation of the 95% confidence interval has a slightly higher lower bound and slightly smaller upper bound.

Q.8.5a)

ge = global_economy

ge %>% select(Exports) %>% filter(Country == 'China') %>% autoplot(.vars=Exports)

The data increases over time with exports peaking in the mid 2000s than experiencing a sharp decline. Seasonality is not really exhibited and the data shows an upward trend.

Q.8.5b)

china_ex = ge %>% select(Exports) %>% filter(Country == 'China') 

china_fit = china_ex %>% model(ETS(Exports~error("A") + trend("N") + season("N")))

china_fc = china_fit %>% forecast(h=12)


china_fc %>% autoplot(china_ex) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(china_fit)) +
  labs(y="Exports", title="China: Exports") +
  guides(colour = "none")

report(china_fit)

## Series: Exports 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
## 
##   Initial states:
##      l[0]
##  4.308241
## 
##   sigma^2:  3.7396
## 
##      AIC     AICc      BIC 
## 315.9713 316.4157 322.1526

Q.8.5c)

paste0("RMSE: ",accuracy(china_fit)$RMSE)

## [1] "RMSE: 1.90017231591602"

Q.8.5d)

china_fit2 = china_ex %>% model(ETS(Exports~error("A") + trend("A") + season("N")))

china_fc2 = china_fit %>% forecast(h=12)


china_fc2 %>% autoplot(china_ex) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(china_fit2)) +
  labs(y="Exports", title="China: Exports") +
  guides(colour = "none")

report(china_fit2)

## Series: Exports 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998999 
##     beta  = 0.0992376 
## 
##   Initial states:
##    l[0]       b[0]
##  4.0399 0.09995689
## 
##   sigma^2:  3.9037
## 
##      AIC     AICc      BIC 
## 320.3530 321.5069 330.6552

ETS(A,A,N) model captures the patterns of the underlying data better and provide slightly tighter forecasts.

Q.8.5e)

china_ex %>%
 model(
    `Holt's method` = ETS(Exports ~ error("A") +
                       trend("A") + season("N")),
    `Simple ETS` = ETS(Exports ~ error("A") +
                       trend("N") + season("N"))
  ) %>% forecast(h=12) %>%autoplot(china_ex,level=NULL)

paste0("RMSE: ",accuracy(china_fit)$RMSE)

## [1] "RMSE: 1.90017231591602"

paste0("RMSE_2: ",accuracy(china_fit2)$RMSE)

## [1] "RMSE_2: 1.90643623623462"

With this data the simple ETS(A,N,N) has slightly better RMSE, however the Holt’s method I feel better captures the patterns of the data and the predictions are more in line with the historical down trend.

Q.8.5f)

yhc = mean(china_fc$.mean)

ll_95 = yhc - (accuracy(china_fit)$RMSE*1.96)
ul_95 = yhc + (accuracy(china_fit)$RMSE*1.96)

yhc2 = mean(china_fc2$.mean)

ll_95_2 = yhc2 - (accuracy(china_fit2)$RMSE*1.96)
ul_95_2 = yhc2 + (accuracy(china_fit2)$RMSE*1.96)

int1 = c(ll_95,ul_95)
int2 = c(ll_95_2,ul_95_2)

#Manual
int1

## [1] 16.03303 23.48171

int2

## [1] 16.02076 23.49399

# From R

r_int_1 = china_fc %>% select('Exports') %>% hilo(95)

r_int_1$`95%`[1]

## <hilo[1]>
## [1] [15.96718, 23.54756]95

# Comparison

r_int_2 = china_fc2 %>% select('Exports') %>% hilo(95)

r_int_2$`95%`[1]

## <hilo[1]>
## [1] [15.96718, 23.54756]95

Q.8.6)

# Pre Box Cox Transformation

china_gdp = global_economy %>% select(GDP) %>% filter(Country == 'China')
china_gdp %>%
 model(
    `Holt's method` = ETS(GDP ~ error("A") +
                       trend("A") + season("N")),
    `Simple ETS` = ETS(GDP ~ error("A") +
                       trend("N") + season("N")),
    `Damped Holt's method` = ETS(GDP ~ error("A") +
                       trend("Ad", phi = 0.9) + season("N"))
  ) %>% forecast(h=12) %>%autoplot(china_gdp,level=NULL) + 
  labs(y="USD", title="GDP: China (Pre Transformation) ")

# Post Box Cox Transformation

china_gdp_lambda <- china_gdp %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

china_gdp %>%
 model(
    `Holt's method` = ETS(box_cox(GDP,china_gdp_lambda) ~ error("A") +
                       trend("A") + season("N")),
    `Simple ETS` = ETS(box_cox(GDP,china_gdp_lambda) ~ error("A") +
                       trend("N") + season("N")),
    `Damped Holt's method` = ETS(box_cox(GDP,china_gdp_lambda) ~ error("A") +
                       trend("Ad", phi = 0.9) + season("N"))
  ) %>% forecast(h=12) %>%autoplot(china_gdp,level=NULL) + 
  labs(y="USD", title="GDP: China (Post Transformation) ")

Q.8.7)

aus_production_gas  = aus_production  %>% select(Gas)
aus_production_gas %>%
 model(
    simple_additive = ETS(Gas ~ error('A')+trend("A")+season("N")),
    simple_mult = ETS(Gas ~ error("M")+trend("A")+season("N")),
    seasonality_add = ETS(Gas ~ error('A')+trend("A")+season("A")),
    seasonality_mult = ETS(Gas ~ error("M")+trend("A")+season("M")),
    mult_season_damp = ETS(Gas ~ error("M")+trend("Ad")+season("M"))
  ) %>% forecast(h=25) %>%autoplot(aus_production_gas,level=NULL) + 
  labs(y="Amount", title="Gas: Aus Production ")

We can use multiplicative seasonality for this dataset because the changes in seasonality are proportional to the level of the series over time. Non seasonal models would be poor representations here, thus using multiplicative models would be the most appropriate solution here. Dampening improves the forecast only slightly.

Q.8.8a)

set.seed(45678913)

myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

autoplot(as.ts(myseries %>% select(Month,Turnover)))

Multiplicative seasonality is necessary for this series because changes occur proportional the level of the series.

Q.8.8b.)

fit_retail = myseries %>% model(
  Multi_Season = ETS(Turnover ~ error("M") + trend("A") + season("M")),
  Multi_Season_Damp = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)

fc_retail = fit_retail %>% forecast(h = 40)

fc_retail %>%
  autoplot(myseries, level=NULL) +
  labs(y="AUD", title="Retail Turnover")

Q.8.8.c.)

accuracy(fit_retail)

## # A tibble: 2 x 12
##   State    Industry   .model .type      ME  RMSE   MAE     MPE  MAPE  MASE RMSSE
##   <chr>    <chr>      <chr>  <chr>   <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 Tasmania Hardware,~ Multi~ Trai~ -0.0788  1.36 0.949 -1.21    6.71 0.427 0.422
## 2 Tasmania Hardware,~ Multi~ Trai~  0.0477  1.35 0.948  0.0402  6.65 0.426 0.419
## # ... with 1 more variable: ACF1 <dbl>

I prefer the RMSE of the Multi_Season_Damp model.

Q.8.8.d.)

myseries %>% model(
  
  Multi_Season_Damp = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)%>% gg_tsresiduals()

Q.8.8.e.)

myseries_sub_2011 = myseries %>% filter(year(Month)<2011)

lambda = .24


sub_2011_fit = myseries_sub_2011 %>% model(SNAIVE(box_cox(Turnover,lambda)~drift()))

sub_2011_fit %>% gg_tsresiduals()

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

myseries_sub_2011_fc <- sub_2011_fit %>%
  forecast(new_data = anti_join(myseries, myseries_sub_2011))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

myseries_sub_2011_fc %>% autoplot(myseries)

sub_2011_fit %>% accuracy()

## # A tibble: 1 x 12
##   State  Industry .model .type      ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>    <chr>  <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Tasma~ Hardwar~ SNAIV~ Trai~ -0.0184  2.56  1.72 -2.03  14.0 0.947 0.933 0.840

myseries_sub_2011_fc %>% accuracy(myseries)

## # A tibble: 1 x 12
##   .model   State Industry  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr> <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(~ Tasm~ Hardware~ Test  -13.9  14.9  13.9 -43.1  43.1  7.69  5.44 0.887

myseries_sub_2011_train <- myseries_sub_2011 %>%
  model(ETS(box_cox(Turnover,.5) ~ error("M") + trend("Ad", phi = 0.9) + season("M")))

fc_sub_2011 <- myseries_sub_2011_train %>%
  forecast(new_data = anti_join(myseries, myseries_sub_2011))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

fc_sub_2011 %>% autoplot(myseries)

myseries_sub_2011_train %>% accuracy()

## # A tibble: 1 x 12
##   State  Industry .model .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr>    <chr>  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Tasma~ Hardwar~ "ETS(~ Trai~ 0.0723  1.18 0.820 0.195  7.67 0.452 0.430 0.0620

fc_sub_2011 %>% accuracy(myseries)

## # A tibble: 1 x 12
##   .model   State Industry  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr> <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(bo~ Tasm~ Hardware~ Test  -2.25  4.33  3.36 -8.48  11.4  1.85  1.58 0.856

Naive model is superior.

Q.8.9)

stl_retail_myseries = myseries_sub_2011 %>% model(STL(Turnover))

components(stl_retail_myseries) %>% autoplot()

stl_retail_myseries_trans = myseries_sub_2011 %>% model(STL(box_cox(Turnover,lambda)))

components(stl_retail_myseries_trans) %>% autoplot()

my_series_stl_trans_fit = myseries_sub_2011 %>% model(ETS(box_cox(Turnover,lambda)))

my_series_stl_trans_fit_fc <- my_series_stl_trans_fit %>%
  forecast(new_data = anti_join(myseries, myseries_sub_2011))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

my_series_stl_trans_fit_fc %>% autoplot(myseries)

my_series_stl_trans_fit_fc %>% accuracy(myseries)

## # A tibble: 1 x 12
##   .model   State Industry  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr> <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(box~ Tasm~ Hardware~ Test   1.07  4.69  4.14  1.08  13.0  2.28  1.71 0.906

Data624_HW5

Hector Santana

3/13/2022