library(readxl)
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.9 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.2.0 ✔ feasts 0.2.2
## ✔ lubridate 1.8.0 ✔ fable 0.3.1
## ✔ ggplot2 3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(moments)
library(tsibble)
library(tsibbledata)
library(ggplot2)
library(dplyr)
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
library(fabletools)
library(fable)
library(ggfortify)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following object is masked from 'package:tsibble':
##
## index
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(feasts)
pig <- aus_livestock %>%
filter(State == "Victoria", Animal == "Pigs")
pig_fit <- pig %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
pig_fit %>%
report()
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
pig_fore <- pig_fit %>%
forecast(h = 4)
piggy <- pig %>%
filter(year(Month) > 2009)
pig_fore %>%
autoplot(piggy)
pig_fore_mean <- pig_fore %>%
select(Animal, State, Month, Count, .mean)
pig_mean_pi <- pig_fore_mean %>%
mutate(pi_lower = .mean - 1.96 * 9353.11) %>%
mutate(pi_upper = .mean + 1.96 * 9353.11)
pig_mean_pi
## # A fable: 4 x 7 [1M]
## # Key: Animal, State [1]
## Animal State Month Count .mean pi_lower pi_upper
## <fct> <fct> <mth> <dist> <dbl> <dbl> <dbl>
## 1 Pigs Victoria 2019 Jan N(95187, 8.7e+07) 95187. 76854. 113519.
## 2 Pigs Victoria 2019 Feb N(95187, 9.7e+07) 95187. 76854. 113519.
## 3 Pigs Victoria 2019 Mar N(95187, 1.1e+08) 95187. 76854. 113519.
## 4 Pigs Victoria 2019 Apr N(95187, 1.1e+08) 95187. 76854. 113519.
pig_fore %>%
hilo(x, level = 95) %>%
select(Animal, State, Month, Count, .mean, '95%')
## # A tsibble: 4 x 6 [1M]
## # Key: Animal, State [1]
## Animal State Month Count .mean `95%`
## <fct> <fct> <mth> <dist> <dbl> <hilo>
## 1 Pigs Victoria 2019 Jan N(95187, 8.7e+07) 95187. [76854.79, 113518.3]95
## 2 Pigs Victoria 2019 Feb N(95187, 9.7e+07) 95187. [75927.17, 114445.9]95
## 3 Pigs Victoria 2019 Mar N(95187, 1.1e+08) 95187. [75042.22, 115330.9]95
## 4 Pigs Victoria 2019 Apr N(95187, 1.1e+08) 95187. [74194.54, 116178.6]95
The manual parameters turned out fine but I cannot get the hilo() to work. Question 2
aus_econ <- global_economy %>%
filter(Country == "Australia")
aus_train <- aus_econ %>%
filter(Year <= year(as.Date("2015-01-01")))
aus_econ %>%
autoplot(Exports) +
labs(title = "Australian Exports")
aus_expo <- aus_train %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
aus_expo %>%
forecast(h = 10)%>%
autoplot(aus_train) +
labs(title = "Aus Exports Forecast")
accuracy(aus_expo)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia "ETS(Export… Trai… 0.225 1.14 0.898 1.06 5.36 0.926 0.931 0.00134
aus_linear <- aus_train %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
accuracy(aus_linear)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia "ETS(Expo… Trai… -0.0260 1.11 0.887 -0.574 5.43 0.915 0.910 0.0836
aus_linear %>%
forecast(h = 10)%>%
autoplot(aus_train) +
labs(title = "Aus Exports Forecast Linear")
A.From the 80’s to teh 00’s there seemed to be a large amount of growth
in exports. There also seems to be a big spike every 15 years or so
E. and D. Both are not bad forecasting methods, but the linear trend model works out better for this data in my opinion. This is because there is a clear upward trend that I notice. The ANN does not account for this trend. My conclusison is supported by the RMSE values.
Question 3
china <- global_economy %>%
filter(Country == "China") %>%
select(Year, GDP)
china %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = GDP`
china_guerro <- china %>%
features(GDP, features = guerrero)
china_box <- china %>%
autoplot(box_cox(GDP, china_guerro)) +
labs(y = "China's Transformed GDP")
china_box
china_method <- china %>%
model(
'Auto' = ETS(GDP),
'Simple' = ETS(GDP ~ error("A") + trend("N") + season("N")),
'H-W' = ETS(GDP ~ error("A") + trend("A") + season("N")),
'Damped H-W' = ETS(GDP ~ error("A") + trend("Ad") + season("N"))
)
china_method %>%
forecast(h = 20) %>%
autoplot(china)
I think the damped method is the best because China’s growth rate will most likely start to slow as they reach the late stages of first worldness. The damped method will be able to account for this.
Question 4
gas <- aus_production %>%
select(Quarter, Gas)
gas %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = Gas`
gas_res <- gas %>%
model(ets = ETS(Gas),
'Multi Holt' = ETS(Gas ~ error("A") + trend("A") + season("M")),
'Damped Multi Holt'= ETS(Gas ~ error("A") + trend("Ad") + season("M")))
gas_res %>%
glance()
## # A tibble: 3 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets 0.00324 -831. 1681. 1682. 1711. 21.1 32.2 0.0413
## 2 Multi Holt 18.2 -899. 1816. 1817. 1847. 17.6 25.1 2.84
## 3 Damped Multi Holt 18.5 -901. 1821. 1822. 1855. 17.8 25.9 2.81
gas_auto <- gas %>%
model(ets = ETS(Gas))
gas_auto %>%
report()
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
The data further in the past has much lower variance than the data in the future. This makes multiplicative seasonality necessary. I dont think damped trend helps because it doesnt appear the trend will slow its rate.
Question 5
set.seed(2345678)
myseed <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseed %>% autoplot(Turnover)
myseed_holt <- myseed %>%
model(
hw = ETS(Turnover ~ error("M") + trend("A") + season("M")))%>%
forecast(h = 36) %>%
autoplot(myseed) +
labs(title = "Three Year Forecast")
myseed_holt
myseed_damped <- myseed %>%
model(
damped = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M"))) %>%
forecast(h = 36) %>%
autoplot(myseed) +
labs(title = "Three Year Forecast Damped")
myseed_damped
c - e. for some reason I cant get anything I tried past here to run, maybe my computer cant process it for whatever reason. Visiaully though, they looks similar, but I think I prefer the damped because the trend doesnt look very constant. Im sure that they would have white noise if it would load. My best prediction given my data is that nothing will beat SNAIVE.
Question 6
trips <- tourism %>%
summarize(Trips = sum(Trips))
autoplot(trips)
## Plot variable not specified, automatically selected `.vars = Trips`
trips_stl <- trips %>%
model(STL(Trips ~ season(window = 4), robust = TRUE))
components(trips_stl) %>%
autoplot() +
labs(title = "STL Decomposition")
season_trips <- trips %>%
model(decomposition_model(STL(Trips),
ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))))
season_trips %>%
forecast(h = 7) %>%
autoplot(trips)
trips_ets <- trips %>%
model(ETS(Trips ~ error("A") + trend("A") + season("N")))
trips_ets %>%
forecast(h = 2)
## # A fable: 2 x 4 [1Q]
## # Key: .model [1]
## .model Quarter Trips .mean
## <chr> <qtr> <dist> <dbl>
## 1 "ETS(Trips ~ error(\"A\") + trend(\"A\") + s… 2018 Q1 N(27594, 1559325) 27594.
## 2 "ETS(Trips ~ error(\"A\") + trend(\"A\") + s… 2018 Q2 N(27920, 1652392) 27920.
trips_auto <- trips %>%
model(ETS(Trips))
trips_auto
## # A mable: 1 x 1
## `ETS(Trips)`
## <model>
## 1 <ETS(A,A,A)>
accuracy(trips_auto)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Trips) Training 105. 794. 604. 0.379 2.86 0.636 0.653 -0.00151
accuracy(trips_ets)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Trips ~ error(\"A… Trai… 123. 1217. 1002. 0.317 4.67 1.06 1.00 0.0895
accuracy(season_trips)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "decomposition_model(… Trai… 103. 763. 576. 0.367 2.72 0.607 0.629 -0.0174
trips_test <- trips %>%
model(
'holt' = ETS(Trips ~ error("A") + trend("A") + season("N")),
'Auto' = ETS(Trips))
trips_test %>%
forecast(h = "4 years") %>%
autoplot(trips)
season_trips %>%
gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
It is relatively flat until 2010 where there is an increasing trend
The seasonally adjusted game the best values
I think the best forecast is the seasonally adjusted using STL. It follows the trend and moves seasonally just like the data does. The other forecasts can follow the trend but they dont account for the variation.
Question 7
nz <- aus_arrivals %>%
filter(Origin == "NZ")
nz %>%
autoplot(Arrivals / 1e3)
nz_train <- nz %>%
slice(1:(n() - 8))
nz_train %>%
model(ETS(Arrivals ~ error("M") + trend("A") + season("M"))) %>%
forecast(h = "2 years") %>%
autoplot() +
autolayer(nz, Arrivals)
nz_fore <- nz_train %>%
model(
ets = ETS(Arrivals),
log = ETS(log(Arrivals)),
snaive = SNAIVE(Arrivals),
stl = decomposition_model(STL(log(Arrivals)), ETS(season_adjust))
) %>%
forecast(h = "2 years")
nz_fore %>%
autoplot(level = NULL) +
autolayer(filter(nz, year(Quarter) > 2000), Arrivals)
nz_fore %>%
autoplot(level = NULL) +
autolayer(nz, Arrivals)
nz_fore %>%
accuracy(nz)
## # A tibble: 4 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets NZ Test -3495. 14913. 11421. -0.964 3.78 0.768 0.771 -0.0260
## 2 log NZ Test 2467. 13342. 11904. 1.03 4.03 0.800 0.689 -0.0786
## 3 snaive NZ Test 9709. 18051. 17156. 3.44 5.80 1.15 0.933 -0.239
## 4 stl NZ Test -12535. 22723. 16172. -4.02 5.23 1.09 1.17 0.109
log_ets <- nz_train %>%
model(ETS(log(Arrivals)))
log_ets %>% gg_tsresiduals()
nz_comp <- nz %>%
slice(1:(n() - 3)) %>%
stretch_tsibble(.init = 36, .step = 3)
nz_comp %>%
model(
ets = ETS(Arrivals),
log_ets = ETS(log(Arrivals)),
snaive = SNAIVE(Arrivals),
stl = decomposition_model(STL(log(Arrivals)), ETS(season_adjust))
) %>%
forecast(h = 3) %>%
accuracy(nz)
## # A tibble: 4 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets NZ Test 4627. 15327. 11799. 2.23 6.45 0.793 0.797 0.283
## 2 log_ets NZ Test 4388. 15047. 11566. 1.99 6.36 0.778 0.782 0.268
## 3 snaive NZ Test 8244. 18768. 14422. 3.83 7.76 0.970 0.976 0.566
## 4 stl NZ Test 4252. 15618. 11873. 2.04 6.25 0.798 0.812 0.244
The variance is much lower in the earlier data and continues to increase in variance over time.
the log method appears to give the best forecasts and it passes the residual tests.
yes
Question 8
cement <- aus_production %>%
slice(1:(n() - 4)) %>%
stretch_tsibble(.init = 5 * 4)
cement_fore <- cement %>%
model(ETS(Cement), SNAIVE(Cement)) %>%
forecast(h = "1 year")