Libraries
library(tsibble)
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(tsibbledata)
library(ggfortify)
## Loading required package: ggplot2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(USgas)
library(readr)
library(tidyr)
library(readxl)
library(httr)
library(feasts)
## Loading required package: fabletools
library(stats)
library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.6 v fable 0.3.1
## v lubridate 1.8.0
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
Q.8.1a)
pigs_slaughtered = aus_livestock %>% filter(Animal=='Pigs',State=='Victoria')
pigs_fit = pigs_slaughtered %>% model(ETS(Count~error("A") + trend("N") + season("N")))
pig_fc = pigs_fit %>% forecast(h=4)
report(pigs_fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
pig_fc
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") +~ 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") +~ 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") +~ 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") +~ 2019 Apr N(95187, 1.1e+08) 95187.
pig_fc %>% autoplot(pigs_slaughtered) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(pigs_fit)) +
labs(y="Count Slaughter", title="Slaughter Count: Pigs") +
guides(colour = "none")
Q.8.1b)
Calculation by Hand
yh = pig_fc$.mean[1]
std_dev = sd(augment(pigs_fit)$.resid)
upperbound_95 = yh + (std_dev*1.96)
lowerbound_95 = yh - (std_dev*1.96)
paste0("upper bound (95% C.I): ",upperbound_95,';'," lower bound (95% C.I)): ",lowerbound_95)
## [1] "upper bound (95% C.I): 113502.102384467; lower bound (95% C.I)): 76871.0124775157"
Utilizing package
pig_intervals = pig_fc %>% select('Count') %>% hilo(95)
pig_intervals$`95%`[1]
## <hilo[1]>
## [1] [76854.79, 113518.3]95
# Comparison
pig_intervals$`95%`[1]
## <hilo[1]>
## [1] [76854.79, 113518.3]95
c(lowerbound_95,upperbound_95)
## [1] 76871.01 113502.10
The manual calculation of the 95% confidence interval has a slightly higher lower bound and slightly smaller upper bound.
Q.8.5a)
ge = global_economy
ge %>% select(Exports) %>% filter(Country == 'China') %>% autoplot(.vars=Exports)
The data increases over time with exports peaking in the mid 2000s than experiencing a sharp decline. Seasonality is not really exhibited and the data shows an upward trend.
Q.8.5b)
china_ex = ge %>% select(Exports) %>% filter(Country == 'China')
china_fit = china_ex %>% model(ETS(Exports~error("A") + trend("N") + season("N")))
china_fc = china_fit %>% forecast(h=12)
china_fc %>% autoplot(china_ex) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(china_fit)) +
labs(y="Exports", title="China: Exports") +
guides(colour = "none")
report(china_fit)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998999
##
## Initial states:
## l[0]
## 4.308241
##
## sigma^2: 3.7396
##
## AIC AICc BIC
## 315.9713 316.4157 322.1526
Q.8.5c)
paste0("RMSE: ",accuracy(china_fit)$RMSE)
## [1] "RMSE: 1.90017231591602"
Q.8.5d)
china_fit2 = china_ex %>% model(ETS(Exports~error("A") + trend("A") + season("N")))
china_fc2 = china_fit %>% forecast(h=12)
china_fc2 %>% autoplot(china_ex) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(china_fit2)) +
labs(y="Exports", title="China: Exports") +
guides(colour = "none")
report(china_fit2)
## Series: Exports
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998999
## beta = 0.0992376
##
## Initial states:
## l[0] b[0]
## 4.0399 0.09995689
##
## sigma^2: 3.9037
##
## AIC AICc BIC
## 320.3530 321.5069 330.6552
ETS(A,A,N) model captures the patterns of the underlying data better and provide slightly tighter forecasts.
Q.8.5e)
china_ex %>%
model(
`Holt's method` = ETS(Exports ~ error("A") +
trend("A") + season("N")),
`Simple ETS` = ETS(Exports ~ error("A") +
trend("N") + season("N"))
) %>% forecast(h=12) %>%autoplot(china_ex,level=NULL)
paste0("RMSE: ",accuracy(china_fit)$RMSE)
## [1] "RMSE: 1.90017231591602"
paste0("RMSE_2: ",accuracy(china_fit2)$RMSE)
## [1] "RMSE_2: 1.90643623623462"
With this data the simple ETS(A,N,N) has slightly better RMSE, however the Holt’s method I feel better captures the patterns of the data and the predictions are more in line with the historical down trend.
Q.8.5f)
yhc = mean(china_fc$.mean)
ll_95 = yhc - (accuracy(china_fit)$RMSE*1.96)
ul_95 = yhc + (accuracy(china_fit)$RMSE*1.96)
yhc2 = mean(china_fc2$.mean)
ll_95_2 = yhc2 - (accuracy(china_fit2)$RMSE*1.96)
ul_95_2 = yhc2 + (accuracy(china_fit2)$RMSE*1.96)
int1 = c(ll_95,ul_95)
int2 = c(ll_95_2,ul_95_2)
#Manual
int1
## [1] 16.03303 23.48171
int2
## [1] 16.02076 23.49399
# From R
r_int_1 = china_fc %>% select('Exports') %>% hilo(95)
r_int_1$`95%`[1]
## <hilo[1]>
## [1] [15.96718, 23.54756]95
# Comparison
r_int_2 = china_fc2 %>% select('Exports') %>% hilo(95)
r_int_2$`95%`[1]
## <hilo[1]>
## [1] [15.96718, 23.54756]95
Q.8.6)
# Pre Box Cox Transformation
china_gdp = global_economy %>% select(GDP) %>% filter(Country == 'China')
china_gdp %>%
model(
`Holt's method` = ETS(GDP ~ error("A") +
trend("A") + season("N")),
`Simple ETS` = ETS(GDP ~ error("A") +
trend("N") + season("N")),
`Damped Holt's method` = ETS(GDP ~ error("A") +
trend("Ad", phi = 0.9) + season("N"))
) %>% forecast(h=12) %>%autoplot(china_gdp,level=NULL) +
labs(y="USD", title="GDP: China (Pre Transformation) ")
# Post Box Cox Transformation
china_gdp_lambda <- china_gdp %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
china_gdp %>%
model(
`Holt's method` = ETS(box_cox(GDP,china_gdp_lambda) ~ error("A") +
trend("A") + season("N")),
`Simple ETS` = ETS(box_cox(GDP,china_gdp_lambda) ~ error("A") +
trend("N") + season("N")),
`Damped Holt's method` = ETS(box_cox(GDP,china_gdp_lambda) ~ error("A") +
trend("Ad", phi = 0.9) + season("N"))
) %>% forecast(h=12) %>%autoplot(china_gdp,level=NULL) +
labs(y="USD", title="GDP: China (Post Transformation) ")
Q.8.7)
aus_production_gas = aus_production %>% select(Gas)
aus_production_gas %>%
model(
simple_additive = ETS(Gas ~ error('A')+trend("A")+season("N")),
simple_mult = ETS(Gas ~ error("M")+trend("A")+season("N")),
seasonality_add = ETS(Gas ~ error('A')+trend("A")+season("A")),
seasonality_mult = ETS(Gas ~ error("M")+trend("A")+season("M")),
mult_season_damp = ETS(Gas ~ error("M")+trend("Ad")+season("M"))
) %>% forecast(h=25) %>%autoplot(aus_production_gas,level=NULL) +
labs(y="Amount", title="Gas: Aus Production ")
We can use multiplicative seasonality for this dataset because the changes in seasonality are proportional to the level of the series over time. Non seasonal models would be poor representations here, thus using multiplicative models would be the most appropriate solution here. Dampening improves the forecast only slightly.
Q.8.8a)
set.seed(45678913)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(as.ts(myseries %>% select(Month,Turnover)))
Multiplicative seasonality is necessary for this series because changes occur proportional the level of the series.
Q.8.8b.)
fit_retail = myseries %>% model(
Multi_Season = ETS(Turnover ~ error("M") + trend("A") + season("M")),
Multi_Season_Damp = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
fc_retail = fit_retail %>% forecast(h = 40)
fc_retail %>%
autoplot(myseries, level=NULL) +
labs(y="AUD", title="Retail Turnover")
Q.8.8.c.)
accuracy(fit_retail)
## # A tibble: 2 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Tasmania Hardware,~ Multi~ Trai~ -0.0788 1.36 0.949 -1.21 6.71 0.427 0.422
## 2 Tasmania Hardware,~ Multi~ Trai~ 0.0477 1.35 0.948 0.0402 6.65 0.426 0.419
## # ... with 1 more variable: ACF1 <dbl>
I prefer the RMSE of the Multi_Season_Damp model.
Q.8.8.d.)
myseries %>% model(
Multi_Season_Damp = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)%>% gg_tsresiduals()
Q.8.8.e.)
myseries_sub_2011 = myseries %>% filter(year(Month)<2011)
lambda = .24
sub_2011_fit = myseries_sub_2011 %>% model(SNAIVE(box_cox(Turnover,lambda)~drift()))
sub_2011_fit %>% gg_tsresiduals()
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
myseries_sub_2011_fc <- sub_2011_fit %>%
forecast(new_data = anti_join(myseries, myseries_sub_2011))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
myseries_sub_2011_fc %>% autoplot(myseries)
sub_2011_fit %>% accuracy()
## # A tibble: 1 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Tasma~ Hardwar~ SNAIV~ Trai~ -0.0184 2.56 1.72 -2.03 14.0 0.947 0.933 0.840
myseries_sub_2011_fc %>% accuracy(myseries)
## # A tibble: 1 x 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(~ Tasm~ Hardware~ Test -13.9 14.9 13.9 -43.1 43.1 7.69 5.44 0.887
myseries_sub_2011_train <- myseries_sub_2011 %>%
model(ETS(box_cox(Turnover,.5) ~ error("M") + trend("Ad", phi = 0.9) + season("M")))
fc_sub_2011 <- myseries_sub_2011_train %>%
forecast(new_data = anti_join(myseries, myseries_sub_2011))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
fc_sub_2011 %>% autoplot(myseries)
myseries_sub_2011_train %>% accuracy()
## # A tibble: 1 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Tasma~ Hardwar~ "ETS(~ Trai~ 0.0723 1.18 0.820 0.195 7.67 0.452 0.430 0.0620
fc_sub_2011 %>% accuracy(myseries)
## # A tibble: 1 x 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(bo~ Tasm~ Hardware~ Test -2.25 4.33 3.36 -8.48 11.4 1.85 1.58 0.856
Naive model is superior.
Q.8.9)
stl_retail_myseries = myseries_sub_2011 %>% model(STL(Turnover))
components(stl_retail_myseries) %>% autoplot()
stl_retail_myseries_trans = myseries_sub_2011 %>% model(STL(box_cox(Turnover,lambda)))
components(stl_retail_myseries_trans) %>% autoplot()
my_series_stl_trans_fit = myseries_sub_2011 %>% model(ETS(box_cox(Turnover,lambda)))
my_series_stl_trans_fit_fc <- my_series_stl_trans_fit %>%
forecast(new_data = anti_join(myseries, myseries_sub_2011))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
my_series_stl_trans_fit_fc %>% autoplot(myseries)
my_series_stl_trans_fit_fc %>% accuracy(myseries)
## # A tibble: 1 x 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(box~ Tasm~ Hardware~ Test 1.07 4.69 4.14 1.08 13.0 2.28 1.71 0.906