## Libraries
library(fastDummies)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(generics)
##
## Attaching package: 'generics'
## The following object is masked from 'package:caret':
##
## train
## The following object is masked from 'package:lubridate':
##
## as.difftime
## The following objects are masked from 'package:base':
##
## as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
## setequal, union
library(tsibble)
##
## Attaching package: 'tsibble'
## The following object is masked from 'package:lubridate':
##
## interval
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:generics':
##
## explain
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✓ tibble 3.1.6 ✓ feasts 0.2.2
## ✓ tidyr 1.2.0 ✓ fable 0.3.1
## ✓ tsibbledata 0.4.0
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks generics::intersect(), base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x fabletools::MAE() masks caret::MAE()
## x fabletools::RMSE() masks caret::RMSE()
## x tsibble::setdiff() masks generics::setdiff(), base::setdiff()
## x tsibble::union() masks generics::union(), base::union()
library(modeest)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 method overwritten by 'forecast':
## method from
## predict.default statip
##
## Attaching package: 'forecast'
## The following object is masked from 'package:modeest':
##
## naive
## The following objects are masked from 'package:generics':
##
## accuracy, forecast
library(latex2exp)
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
library(fable)
##Chapter 8 Exercises
Forecasts below, could not see the optimal values because report() function was not working.
The interval is from 76871 to 113502.
#a
vic_pigs <- aus_livestock %>% filter(Animal == "Pigs", State == "Victoria")
vic_pigs %>%
autoplot(Count) +
labs(title = 'Pigs Slaughtered in Victoria')
fit <- vic_pigs %>%
model(ses = ETS(Count ~ error('A') + trend('N') + season('N')))
"reports <- fit %>% report()" ##For some reason report() isn't working for me so it is really difficult to complete next 3 questions
## [1] "reports <- fit %>% report()"
fe <- forecast(fit, h= 4)
acc <- accuracy(fit)
acc
## # A tibble: 1 × 12
## Animal State .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Pigs Victoria ses Train… -30.4 9336. 7190. -1.06 8.35 0.776 0.751 0.0103
fe %>% autoplot(vic_pigs) +
labs(title = 'Victorian Pigs Slaughtered Forecast') + theme(plot.title = element_text(hjust = 0.5))
#b
y <- fe %>%
pull(Count) %>%
head(1)
sd1 <- augment(fit) %>%
pull(.resid) %>%
sd()
lo <- y - 1.96 * sd1
up <- y + 1.96 * sd1
interval1<- c(lo, up)
names(interval1) <- c('Lower', 'Upper')
interval1
## <distribution[2]>
## Lower Upper
## N(76871, 8.7e+07) N(113502, 8.7e+07)
Again could not find number for alpha.
"function(y, alpha, l0){
yhat <- l0
for(index in 1:length(y)){
yhat <- alpha*y[index] + (1 - alpha)*yhat
}
cat('Forecast, SES function: ',
as.character(yhat),
sep = '\n')
}" ## This is what I would use if I could see alpha value.
## [1] "function(y, alpha, l0){\n yhat <- l0\n for(index in 1:length(y)){\n yhat <- alpha*y[index] + (1 - alpha)*yhat \n }\n cat('Forecast, SES function: ',\n as.character(yhat),\n sep = '\n')\n}"
Same issues as 2.
## Not quite sure how to correct this
## Same as 2 and 3
The data shows that exports do not show any clear trend. There was a dip in exports during the mid-1980s, followed by a steady rise, and now the exports recently seem to be trending downwards in recent years.
Plot below.
Values in part #c below
Based on the plots and the lower RMSE levels I would say the ANN model performs better and is a better fit.
The interval is 11 to 34. Which is a bit tighter than the interval on the plot.
#a
algeria_economy <- global_economy %>%
filter(Country == "Algeria")
algeria_economy %>%
autoplot(Exports) +
labs(y = "% of GDP", title = "Exports: Algeria")
#b
fit <- algeria_economy %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
fc <- fit %>%
forecast(h = 5)
fc %>% autoplot(algeria_economy) +
labs(title = 'Algerian Exports Forecast, ANN') + theme(plot.title = element_text(hjust = 0.5))
#c
acc1 <- accuracy(fit)
acc1
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Algeria "ETS(Exports … Trai… -0.351 5.87 4.00 -3.86 15.1 0.963 0.981 0.0110
#d
fit2 <- algeria_economy %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
fc2 <- fit2 %>%
forecast(h = 5)
fc2 %>% autoplot(algeria_economy) +
labs(title = 'Algerian Exports Forecast, AAN') + theme(plot.title = element_text(hjust = 0.5))
acc2 <- accuracy(fit2)
acc2
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Algeria "ETS(Exports ~… Trai… 0.173 5.89 4.05 -2.12 15.1 0.975 0.985 0.0617
#f
y1 <- fc %>%
pull(Exports) %>%
head(1)
sd2 <- augment(fit) %>%
pull(.resid) %>%
sd()
lower <- y1 - 1.96 * sd2
upper <- y1 + 1.96 * sd2
interval2<- c(lower, upper)
names(interval2) <- c('Lower', 'Upper')
interval2
## <distribution[2]>
## Lower Upper
## N(11, 36) N(34, 36)
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
Seems as though the Holt’s method is trying to follow the trend of the data while the damped method, dampens the trend’s effect. Interesting plot where the Holt’s follows the line as is while the damped method rounds it out and flattens it a bit.
#ETS with Holt's and damped trend
chi_economy <- global_economy %>%
filter(Country == "China")
chi_economy %>%
model(
`Holt's method` = ETS(GDP ~ error("A") +
trend("A") + season("N")),
`Damped Holt's method` = ETS(GDP ~ error("A") +
trend("Ad", phi = 0.9) + season("N"))
) %>%
forecast(h = 15) %>%
autoplot(chi_economy, level = NULL) +
labs(title = "Chinese GDP",
y = "USD") + guides(colour = guide_legend(title = "Forecast"))
#ETS with Box-Cox
lambda <- chi_economy %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
box <- chi_economy %>% mutate(gdp = box_cox(GDP, lambda))
box %>%
model(
`Holt's method` = ETS(GDP ~ error("A") +
trend("A") + season("N")),
`Damped Holt's method` = ETS(GDP ~ error("A") +
trend("Ad", phi = 0.9) + season("N")))
## # A mable: 1 x 3
## # Key: Country [1]
## Country `Holt's method` `Damped Holt's method`
## <fct> <model> <model>
## 1 China <ETS(A,A,N)> <ETS(A,Ad,N)>
Multiplicative is necessary because the seasonal trend is constantly increasing.
The trend being damped does not seem to improve these forecasts.
aus_production %>% autoplot(Gas)
fit3 <- ets(aus_production$Gas)
fit3
## ETS(M,A,N)
##
## Call:
## ets(y = aus_production$Gas)
##
## Smoothing parameters:
## alpha = 0.1111
## beta = 0.1019
##
## Initial states:
## l = 5.7619
## b = 0.0725
##
## sigma: 0.1631
##
## AIC AICc BIC
## 2137.521 2137.804 2154.443
fo <- forecast(fit3, h = 8)
fo
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 219 223.0607 176.4492 269.6722 151.77455 294.3468
## 220 221.3845 174.0428 268.7262 148.98160 293.7874
## 221 219.7083 170.4274 268.9892 144.33972 295.0769
## 222 218.0321 165.3109 270.7533 137.40202 298.6623
## 223 216.3560 158.5771 274.1348 127.99089 304.7210
## 224 214.6798 150.2645 279.0951 116.16509 313.1945
## 225 213.0036 140.5028 285.5043 102.12327 323.8839
## 226 211.3274 129.4521 293.2027 86.10987 336.5449
accuracy(fo)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.07874544 16.35735 11.92955 -1.321798 13.49607 0.8199909
## ACF1
## Training set 0.08876701
#Damped
fit4 <- aus_production %>%
model(
`Damped Holt's method` = ETS(Gas ~ error("A") +
trend("Ad", phi = 0.9) + season("M")))
fo2 <- forecast(fit4, h = 8)
fo2 %>% autoplot(aus_production)
fo2
## # A fable: 8 x 4 [1Q]
## # Key: .model [1]
## .model Quarter Gas .mean
## <chr> <qtr> <dist> <dbl>
## 1 Damped Holt's method 2010 Q3 sample[5000] 255.
## 2 Damped Holt's method 2010 Q4 sample[5000] 211.
## 3 Damped Holt's method 2011 Q1 sample[5000] 200.
## 4 Damped Holt's method 2011 Q2 sample[5000] 239.
## 5 Damped Holt's method 2011 Q3 sample[5000] 256.
## 6 Damped Holt's method 2011 Q4 sample[5000] 212.
## 7 Damped Holt's method 2012 Q1 sample[5000] 201.
## 8 Damped Holt's method 2012 Q2 sample[5000] 240.
The trend seems to have a constant increase, followed by a sharp decrease.
Seen below
Based on the values the Holt’s Winter Damped method has the best performance and fit.
The residuals show that the damped method is white noise.
Yes, the damped method performs the best and beats the seasonal naive approach.
set.seed(100000000)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries %>% autoplot(Turnover)
#b
fit5 <- myseries %>%
model(
'Holt Winters Multiplicative Method' = ETS(Turnover ~ error('M') + trend('A') + season('M')),
'Holt Winters Damped Method' = ETS(Turnover ~ error('M') + trend('Ad') + season('M'))
)
ft <- forecast(fit5, h=12)
ft %>% autoplot(myseries, level=NULL)
#c
accuracy(fit5)
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australia… Newspap… Holt … Trai… -0.0590 0.691 0.506 -1.55 7.61 0.468 0.450
## 2 Australia… Newspap… Holt … Trai… 0.00322 0.688 0.499 -0.552 7.46 0.461 0.448
## # … with 1 more variable: ACF1 <dbl>
#d
fit5 %>%
select('Holt Winters Damped Method') %>%
gg_tsresiduals()
#e
train <- myseries %>%
filter(year(Month) < 2011)
test <- myseries %>%
filter(year(Month) > 2010)
fit6 <- train %>% model(
'Holt Winters Multiplicative Method' = ETS(Turnover ~ error('M') + trend('A') + season('M')),
'Holt Winters Damped Method' = ETS(Turnover ~ error('M') + trend('Ad') + season('M')),
'Seasonal Naive' = SNAIVE(Turnover)
)
ft2 <- forecast(fit6, h=95)
ft2 %>% autoplot(myseries) +
labs(title = 'Retail Forecasts') + theme(plot.title = element_text(hjust = 0.5))
accuracy(ft2, test)
## # A tibble: 3 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Holt Win… Aust… Newspap… Test -3.20 3.32 3.20 -61.6 61.6 NaN NaN 0.337
## 2 Holt Win… Aust… Newspap… Test -4.96 5.13 4.96 -93.7 93.7 NaN NaN 0.580
## 3 Seasonal… Aust… Newspap… Test -3.51 3.62 3.51 -66.4 66.4 NaN NaN 0.463
This one I had trouble with mutate() the attempted code is below.
## ERROR IN mutate()
"lambda <- train %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
box2 <- train %>% mutate(turnover = box_cox(Turnover, lambda))
fit7 <- box2 %>%
model(
'STL Box-Cox' = STL(turnover ~ season(window = 'periodic'), robust = TRUE))
fits <- box2 %>%
model ('ETS Box-Cox' = ETS(turnover))
ft3 <- forecast(fit7, h=36) ##ERROR here
plot(ft3)
ft4 <- forecast(fits,h=36)
accuracy(fit7,test)
accuracy(fits, test)"
## [1] "lambda <- train %>%\n features(Turnover, features = guerrero) %>%\n pull(lambda_guerrero)\nbox2 <- train %>% mutate(turnover = box_cox(Turnover, lambda))\n\nfit7 <- box2 %>%\n model(\n 'STL Box-Cox' = STL(turnover ~ season(window = 'periodic'), robust = TRUE))\nfits <- box2 %>%\n model ('ETS Box-Cox' = ETS(turnover))\nft3 <- forecast(fit7, h=36) ##ERROR here\nplot(ft3)\nft4 <- forecast(fits,h=36)\naccuracy(fit7,test)\naccuracy(fits, test)"
From the data we see a fairly normal trend, a dip in trips around 2010, and recently a steady increase in trips since that dip.
Decomposition below.
Forecasts below.
Forecast below.
Below.
The ETS performs worse than both the RMSE models with STL decomposition. The best in-sample fits came from the one without the damped trend.
Based on the plot the RMSE without the damped trend seems to be the most reasonable although all three are similar.
Appears to be white noise.
#a
trips <- tourism %>%
summarise(trips = sum(Trips))
trips %>%
autoplot(trips)
#b
decomp <- trips %>%
model(STL(trips)) %>% components()
decomp %>%
as_tsibble() %>%
autoplot(season_adjust)
#c
trips %>%
model(
decomposition_model(STL(trips),
ETS(season_adjust ~ error("A") +
trend("Ad") +
season("N")
)
)
) %>%
forecast(h = "2 years") %>%
autoplot(trips)
#d
trips %>%
model(
decomposition_model(STL(trips),
ETS(season_adjust ~ error("A") +
trend("A") +
season("N")
)
)
) %>%
forecast(h = "2 years") %>%
autoplot(trips)
#e
trips %>%
model(
ETS(trips)
) %>%
forecast(h = "2 years") %>%
autoplot(trips)
#f
fit8 <- trips %>%
model(
STL_AAdN = decomposition_model(STL(trips),
ETS(season_adjust ~ error("A") +
trend("Ad") +
season("N"))),
STL_AAN = decomposition_model(STL(trips),
ETS(season_adjust ~ error("A") +
trend("A") +
season("N"))),
ETS = ETS(trips)
)
accuracy(fit8)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 STL_AAdN Training 103. 763. 576. 0.367 2.72 0.607 0.629 -0.0174
## 2 STL_AAN Training 99.7 763. 574. 0.359 2.71 0.604 0.628 -0.0182
## 3 ETS Training 105. 794. 604. 0.379 2.86 0.636 0.653 -0.00151
#h
choice <- fit8 %>%
select(STL_AAN)
choice %>%
gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
There seems to be a constant increase in the trend.
Below.
As pointed out before there is a constant increase in the trend of the data.
All below
The ETS model performs the best and passes the residual test.
In this case the additive to log transformation model performs the best followed by the ETS model.
#a
nz_arrivals <- aus_arrivals %>% filter(Origin == "NZ")
autoplot(nz_arrivals, Arrivals) +
ggtitle("New Zealand Arrivals")
#b
train1 <- nz_arrivals %>%
slice(1:(nrow(nz_arrivals)-8))
model <- train1 %>%
model(
ETS(Arrivals ~ error("M") +
trend("A") +
season("M"))
) %>% forecast(h="2 years") %>%
autoplot(level = NULL) +
autolayer(nz_arrivals, Arrivals) +
labs(title = "Forecast vs. actual data")
#d
comparison <- anti_join(nz_arrivals, train1,
by = c("Quarter",
"Origin",
"Arrivals"))
fit9 <- train1 %>%
model(
'ETS model' = ETS(Arrivals),
'Additive to Log' = ETS(log(Arrivals) ~ error("A") + trend("A") + season("A")),
'Seasonal Naïve Method' = SNAIVE(Arrivals),
'STL to log' = decomposition_model(STL(log(Arrivals)),
ETS(season_adjust)))
fore<- fit9 %>%
forecast(h="2 years")
fore %>%
autoplot(level = NULL) +
autolayer(comparison, Arrivals) +
guides(colour=guide_legend(title="Forecast")) +
labs(title = "New Zealand Arrivals Forecast vs. Actual Data.")
#e
fore %>% accuracy(nz_arrivals)
## # A tibble: 4 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Additive … NZ Test -7093. 17765. 12851. -2.17 4.20 0.864 0.918 0.0352
## 2 ETS model NZ Test -3495. 14913. 11421. -0.964 3.78 0.768 0.771 -0.0260
## 3 Seasonal … NZ Test 9709. 18051. 17156. 3.44 5.80 1.15 0.933 -0.239
## 4 STL to log NZ Test -12535. 22723. 16172. -4.02 5.23 1.09 1.17 0.109
choice2 <- fit9 %>% select('ETS model')
choice2 %>%
gg_tsresiduals()
#f
nz <- nz_arrivals %>%
slice(1:(n() - 3)) %>%
stretch_tsibble(.init = 36, .step = 3)
nz %>%
model(
'ETS model' = ETS(Arrivals),
'Additive to Log' = ETS(log(Arrivals) ~ error("A") + trend("A") + season("A")),
'Seasonal Naïve Method' = SNAIVE(Arrivals),
'STL to log transformed' = decomposition_model(STL(log(Arrivals)),
ETS(season_adjust))
) %>% forecast(h = 3) %>% accuracy(nz_arrivals)
## # A tibble: 4 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Additive to Log NZ Test -375. 14347. 11021. 0.200 5.83 0.741 0.746 0.309
## 2 ETS model NZ Test 4627. 15327. 11799. 2.23 6.45 0.793 0.797 0.283
## 3 Seasonal Naïve… NZ Test 8244. 18768. 14422. 3.83 7.76 0.970 0.976 0.566
## 4 STL to log tra… NZ Test 4252. 15618. 11873. 2.04 6.25 0.798 0.812 0.244
a. Apply cross-validation techniques to produce 1 year ahead ETS and seasonal naïve forecasts for Portland cement production (from aus_production). Use a stretching data window with initial size of 5 years, and increment the window by one observation.##Struggled with this one, results are incorrect
#a
port_ets <- aus_production %>% model(ses = ETS(Bricks))
## Warning: 1 error encountered for ses
## [1] ETS does not support missing values.
port_fc <- forecast(port_ets, h=5)
port_fc %>% autoplot(aus_production)
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 5 row(s) containing missing values (geom_path).
## Warning: Removed 20 row(s) containing missing values (geom_path).
port_naive <- aus_production %>% model(snaive = SNAIVE(Bricks))
port_fc2 <- forecast(port_naive, h=5)
port_fc2 %>% autoplot(aus_production)
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 5 row(s) containing missing values (geom_path).
## Warning: Removed 20 row(s) containing missing values (geom_path).
#b
accuracy(port_ets)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ses Training NaN NaN NaN NaN NaN NaN NaN NA
accuracy(port_naive)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 snaive Training 4.21 48.3 35.5 0.742 8.84 1 1 0.796
For beer, the ETS model has the lowest RMSE and we can see from the plots that is the best performing model.
For bricks, the ETS model is the best as well.
• Cost of drug subsidies for diabetes (ATC2 == “A10”) and corticosteroids (ATC2 == “H02”) from PBS.
For diabetes and corticosteroids, the STL decomposition models performed the best in RMSE and seemingly had the best plots.
• Total food retailing turnover for Australia from aus_retail.
For food turnover, the best performing model is the STL decomposition model.
##Beer Production
fr <- aus_production %>%
slice(1:(nrow(aus_production)-12)) %>%
model(
'ETS' = ETS(Beer),
'Seasonal Naïve' = SNAIVE(Beer),
'STL Decomposition' = decomposition_model(STL(log(Beer)),
ETS(season_adjust))
) %>%
forecast(h = "3 years")
fr %>%
autoplot(filter_index(aus_production, "2000 Q1" ~ .),level=NULL) +
guides(colour=guide_legend(title="Forecast")) +
ggtitle("Australian Beer production")
fr %>% accuracy(aus_production)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS Test 0.127 9.62 8.92 0.00998 2.13 0.563 0.489 0.376
## 2 Seasonal Naïve Test -2.92 10.8 9.75 -0.651 2.34 0.616 0.549 0.325
## 3 STL Decomposition Test -2.85 9.87 8.95 -0.719 2.16 0.565 0.502 0.283
## Bricks
bricks <- aus_production %>%
filter(!is.na(Bricks))
bricks %>% autoplot(Bricks)
br_fc <- bricks %>%
slice(1:(nrow(bricks)-12)) %>%
model(
'ETS' = ETS(Bricks),
'Seasonal Naïve' = SNAIVE(Bricks),
'STL Decomposition' = decomposition_model(STL(log(Bricks)),
ETS(season_adjust))
) %>%
forecast(h = "3 years")
br_fc %>%
autoplot(filter_index(bricks, "1980 Q1" ~ .),level=NULL) +
guides(colour=guide_legend(title="Forecast")) +
ggtitle("Australian Bricks production")
br_fc %>% accuracy(bricks)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS Test 2.27 17.5 13.2 0.474 3.31 0.365 0.354 0.339
## 2 Seasonal Naïve Test 32.6 36.5 32.6 7.85 7.85 0.898 0.739 -0.322
## 3 STL Decomposition Test 9.71 18.7 14.9 2.29 3.65 0.411 0.378 0.0933
## Drugs
drugs <- PBS %>%
filter(ATC2 %in% c("A10", "H02")) %>%
group_by(ATC2) %>%
summarise(Cost = sum(Cost))
drugs %>%
autoplot(Cost) +
facet_grid(vars(ATC2), scales = "free_y") +
ggtitle("Cost of subsidies for diabetes and corticosteroids")
diabetes <- drugs %>%
filter(ATC2 %in% "A10")
lambda1 <- diabetes %>%
features(Cost, features = guerrero) %>%
pull(lambda_guerrero)
f1 <- diabetes %>%
filter(Month < max(Month) - 35) %>%
model(
'ETS' = ETS(Cost),
'Seasonal Naïve' = SNAIVE(Cost),
'STL Decomposition' = decomposition_model(STL(box_cox(log(Cost), lambda1)),
ETS(season_adjust))
) %>%
forecast(h = "3 years")
# Corticosteroids
cor <- drugs %>%
filter(ATC2 %in% "H02")
lambda2 <- cor %>%
features(Cost, features = guerrero) %>%
pull(lambda_guerrero)
f2 <- cor %>%
filter(Month < max(Month) - 35) %>%
model(
'ETS' = ETS(Cost),
'Seasonal Naïve' = SNAIVE(Cost),
'STL Decomposition' = decomposition_model(STL(box_cox(log(Cost), lambda2)),
ETS(season_adjust))
) %>%
forecast(h = "3 years")
f <- bind_rows(f1,f2)
f %>% autoplot(drugs, level=NULL) +
guides(colour=guide_legend(title="Forecast")) +
ggtitle("3-year Forecasts")
f %>% accuracy(drugs)
## # A tibble: 6 × 11
## .model ATC2 .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS A10 Test 1.38e6 2.36e6 1.85e6 5.83 8.74 1.89 2.00 0.177
## 2 ETS H02 Test 2.70e4 7.65e4 6.45e4 1.99 7.05 1.09 1.07 -0.0990
## 3 Seasonal N… A10 Test 4.32e6 5.18e6 4.33e6 19.8 19.9 4.42 4.40 0.638
## 4 Seasonal N… H02 Test -1.48e4 8.55e4 7.16e4 -1.31 7.88 1.21 1.20 0.0226
## 5 STL Decomp… A10 Test 9.41e4 1.57e6 1.29e6 -0.269 6.63 1.32 1.33 -0.153
## 6 STL Decomp… H02 Test 2.27e4 6.83e4 5.63e4 1.66 6.24 0.952 0.960 -0.219
## Food
food <- aus_retail %>%
filter(Industry=="Food retailing") %>%
summarise(Turnover=sum(Turnover))
food %>%
autoplot(Turnover) +
labs(title = "Food Turnover, Australia") +
theme(plot.title = element_text(hjust = 0.5))
lambda <- food %>% features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
food_fc <- food %>%
filter(Month < max(Month) - 35) %>%
model(ETS = ETS(Turnover),
SNAIVE = SNAIVE(Turnover),
STL_Decomp = decomposition_model(STL(box_cox(log(Turnover),
lambda)),
ETS(season_adjust))) %>% forecast(h = "3 years")
food_fc %>% autoplot(filter_index(food, "2005 Jan" ~ .), level = NULL) +
labs(title = "Food Turnover Forecasts") +
theme(plot.title = element_text(hjust = 0.5))
food_fc %>% accuracy(food)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS Test -151. 194. 170. -1.47 1.65 0.639 0.634 0.109
## 2 SNAIVE Test 625. 699. 625. 5.86 5.86 2.35 2.29 0.736
## 3 STL_Decomp Test -8.34 155. 132. -0.135 1.24 0.496 0.506 0.256
For trips, it appears to give a forecast that fits to the data set.
For closing prices, the forecast seems okay, it might not be able to show volatility of daily stocks.
For pelts, the model is not great. A wide range for confidence intervals and a constant line for predictions.
The pelt series, the lynx data is where the ETS model does not seem to perform well. Looking at the data I would guess it is because there is no clear trend, and the data almost rises and falls seasonally or periodically.
#a
##Trips
trip <- tourism %>%
summarise(Trips = sum(Trips))
ets1 <- trip %>% model(ETS(Trips))
"report(ets1)"
## [1] "report(ets1)"
ets1 %>%
forecast() %>%
autoplot(trip) +
ggtitle("Australian Trips ETS Forecast")
##Closing Prices
gafa_stock %>%
autoplot(Close) +
facet_grid(vars(Symbol), scales = "free_y")
df <- gafa_stock %>%
group_by(Symbol) %>%
mutate(trading_day = row_number()) %>%
ungroup() %>%
as_tsibble(index = trading_day, regular = TRUE)
df_fit <- df %>%
model(
ETS(Close)
)
"report(df_fit)"
## [1] "report(df_fit)"
df_fit %>%
forecast(h=50) %>%
autoplot(df %>% group_by_key() %>% slice((n() - 100):n())) +
labs(title = "Forecasted Closing Prices") +
theme(plot.title = element_text(hjust = 0.5))
## `mutate_if()` ignored the following grouping variables:
## • Column `Symbol`
#Pelt
pelt %>% model(ETS(Lynx))
## # A mable: 1 x 1
## `ETS(Lynx)`
## <model>
## 1 <ETS(A,N,N)>
pelt %>%
model(ETS(Lynx)) %>% forecast(h=10) %>%
autoplot(pelt) +
ggtitle("Pelt Trading Forecasts")
Shown below. They are fairly close using the time series gas.
## Using ts "gas"
ets2 <- ets(gas, model ="MAM")
ets_fc <- forecast(ets2, h=1)
Holt <- hw(gas, seasonal = "multiplicative", h=1)
ets_fc$mean
## Sep
## 1995 54227.29
Holt$mean
## Sep
## 1995 55764.04
Shown below. Again using the time series gas, we see the results are fairly similar.
##Using gas again
gas_ets <- ets(gas, model = "ANN")
gas_fc <- forecast(gas_ets, 1)
gas_fc
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Sep 1995 60054.66 56440.21 63669.11 54526.83 65582.49
sigma <- 2820.374
alpha <- 0.9999
h <- 2
conf <- sigma*(1+alpha*(h-1))
gas_fc$mean
## Sep
## 1995 60054.66
gas_fc$mean[1] - conf
## [1] 54414.19
gas_fc$lower[1, '95%']
## 95%
## 54526.83
gas_fc$mean[1] + conf
## [1] 65695.13
gas_fc$upper[1, '95%']
## 95%
## 65582.49
##Chapter 9 Exercises 1. Figure 9.32 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers. a. Explain the differences among these figures. Do they all indicate that the data are white noise?
They do all indicate white noise. All of them are within the bounds of the 95% interval. The reason the bounds narrow is because of the increase in the numbers added to the time series.
Figure 9.32: Left: ACF for a white noise series of 36 numbers. Middle: ACF for a white noise series of 360 numbers. Right: ACF for a white noise series of 1,000 numbers. b. Why are the critical values at different distances from the mean of zero? Why are the autocorrelations different in each figure when they each refer to white noise?
The differing random numbers in the time series causes the critical values to be at different distances.
Looking at the ACF, we see that all of the lags are outside of the intervals, and the PACF the first lag shows non-stationarity. The data very clearly must be differenced.
gafa_stock %>%
filter(Symbol == 'AMZN') %>%
autoplot(Close) +
labs(title='Amazon Closing Prices')
gafa_stock %>%
filter(Symbol == 'AMZN') %>%
gg_tsdisplay(Close, plot_type = 'partial')
## Warning: Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
## Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
Shown below.
Shown below.
Shown below.
#a
turkey <- global_economy %>%
filter(Country == 'Turkey') %>%
select(Country, GDP)
lambda <- turkey %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
turkey %>%
mutate(GDP = box_cox(GDP, lambda)) %>%
features(GDP, unitroot_ndiffs)
## # A tibble: 1 × 2
## Country ndiffs
## <fct> <int>
## 1 Turkey 1
#b
tasmania <- aus_accommodation %>%
filter(State == 'Tasmania') %>%
select(State, Takings)
lambda <- tasmania %>%
features(Takings, features = guerrero) %>%
pull(lambda_guerrero)
tasmania %>%
mutate(Takings = box_cox(Takings, lambda)) %>%
features(Takings, unitroot_ndiffs)
## # A tibble: 1 × 2
## State ndiffs
## <chr> <int>
## 1 Tasmania 1
#c
lambda <- souvenirs %>%
features(Sales, features = guerrero) %>%
pull(lambda_guerrero)
souvenirs %>%
mutate(Sales = box_cox(Sales, lambda)) %>%
features(Sales, unitroot_ndiffs)
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 1
(1-B)^1(yt)
Shown below.
set.seed(111111112)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
lambda <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
myseries %>%
mutate(Turnover = box_cox(Turnover, lambda)) %>%
features(Turnover, unitroot_ndiffs)
## # A tibble: 1 × 3
## State Industry ndiffs
## <chr> <chr> <int>
## 1 Australian Capital Territory Liquor retailing 1
Below.
Shown below by approaching phi closer to 0 and 1. The plot
Below.
Again, shown below this time with phi = 0.9 and phi = 0.2.
Below.
Below.
Graphs below (one is in part e). Based on the ACF, the part f AR(2) model is non-stationary as hinted in the question. The ARMA model in part e appears to look like white noise.
#a
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
simulate <- tsibble(idx = seq_len(100), y = y, index = idx)
#b
simulate %>% autoplot()
## Plot variable not specified, automatically selected `.vars = y`
ggAcf(y)
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.8*y[i-1] + e[i]
autoplot(y) + ggtitle("phi=0.8")
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.4*y[i-1] + e[i]
autoplot(y) + ggtitle("phi=0.4")
#c
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*e[i-1] + e[i]
#d
simulate2 <- tsibble(idex = seq_len(100), y = y, index = idex)
simulate2 %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = y`
for(i in 2:100)
y[i] <- 0.9*e[i-1] + e[i]
simulate3 <- tsibble(idex = seq_len(100), y = y, index = idex)
simulate3 %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = y`
for(i in 2:100)
y[i] <- 0.2*e[i-1] + e[i]
simulate4 <- tsibble(idex = seq_len(100), y = y, index = idex)
simulate4 %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = y`
#e
phi <- 0.6
theta <- 0.6
sigma <- 1
y_new <- ts(numeric(100))
e <- rnorm(1000, sigma)
for(i in 2:100)
y_new[i] <- phi*y_new[i-1] + theta*e[i-1] + e[i]
#f
phi_1 <- -0.8
phi_2 <- 0.3
sigma <- 1
y_new2 <- ts(numeric(100))
e <- rnorm(100, sigma)
for(i in 3:100)
y_new2[i] <- phi_1*y_new2[i-1] + phi_2*y_new2[i-2] + e[i]
#g
simulate5 <- tsibble(idex = seq_len(100), y = y_new, index = idex)
simulate5 %>% autoplot()
## Plot variable not specified, automatically selected `.vars = y`
autoplot(ACF(simulate4))
## Response variable not specified, automatically selected `var = y`
autoplot(ACF(simulate5))
## Response variable not specified, automatically selected `var = y`
ARIMA(0,2,1) is selected. Residuals show that model is white noise. Plots below.
(1−B)2(yt) = c + (1+θ1B)*et
AIC is slightly higher, the forecast is pretty similar. The RMSE for part b is slightly lower (nearly identical).
Shown below.
The model performs well, lower AIC than the original.
#a
f <- aus_airpassengers %>%
model(
search = ARIMA(Passengers, stepwise = FALSE)
)
f
## # A mable: 1 x 1
## search
## <model>
## 1 <ARIMA(0,2,1)>
glance(f)
## # A tibble: 1 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 search 4.31 -97.0 198. 198. 202. <cpl [0]> <cpl [1]>
f %>%
gg_tsresiduals()
augment(f) %>%
features(.innov, ljung_box, lag = 10, dof = 3)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 search 6.70 0.461
f %>%
forecast(h=10) %>%
autoplot(aus_airpassengers)
accuracy(f)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 search Training 0.345 2.01 1.22 0.953 4.81 0.700 0.807 -0.174
#b
#c
f2 <- aus_airpassengers %>%
model(
arima1 = ARIMA(Passengers ~ pdq(0,1,0))
)
glance(f2)
## # A tibble: 1 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima1 4.27 -98.2 200. 201. 204. <cpl [0]> <cpl [0]>
accuracy(f2)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima1 Training 0.000126 2.02 1.31 -2.46 5.79 0.754 0.813 -0.0239
f2 %>%
gg_tsresiduals()
augment(f2) %>%
features(.innov, ljung_box, lag = 10, dof = 3)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 arima1 6.77 0.453
f2 %>%
forecast(h=10) %>%
autoplot(aus_airpassengers)
#d
f3 <- aus_airpassengers %>%
model(
arima2 = ARIMA(Passengers ~ pdq(0,1,2))
)
glance(f3)
## # A tibble: 1 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima2 4.43 -97.9 204. 205. 211. <cpl [0]> <cpl [2]>
accuracy(f3)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima2 Training 0.00392 2.01 1.28 -2.33 5.67 0.740 0.809 0.00355
f3 %>%
gg_tsresiduals()
augment(f3) %>%
features(.innov, ljung_box, lag = 10, dof = 3)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 arima2 6.10 0.528
f3 %>%
forecast(h=10) %>%
autoplot(aus_airpassengers)
#e
f4 <- aus_airpassengers %>%
model(
arima3 = ARIMA(Passengers ~ pdq(0,2,1))
)
glance(f4)
## # A tibble: 1 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima3 4.31 -97.0 198. 198. 202. <cpl [0]> <cpl [1]>
accuracy(f4)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima3 Training 0.345 2.01 1.22 0.953 4.81 0.700 0.807 -0.174
f4 %>%
gg_tsresiduals()
augment(f4) %>%
features(.innov, ljung_box, lag = 10, dof = 3)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 arima3 6.70 0.461
f4 %>%
forecast(h=10) %>%
autoplot(aus_airpassengers)
Below
Below
Below
The best model appears to be the ARIMA from part c, it has the lower AIC and AICc.
The residuals show that the model is white noise.
Shown below, the plots show that the forecasts look reasonable.
Plot below, the ETS model does not look to have reasonable forecasts.
#a
us <- global_economy %>%
filter(Code == 'USA') %>%
select(Country, GDP)
us%>% autoplot(GDP)
lambda <- us %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
us <- us %>%
mutate(GDP = box_cox(GDP, lambda))
#b
arima <- us %>%
model(
arima = ARIMA(GDP, stepwise = FALSE, approx = FALSE)
)
glance(arima)
## # A tibble: 1 × 9
## Country .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 United States arima 5479. -325. 657. 657. 663. <cpl [1]> <cpl [0]>
#c
arima2 <- us %>%
model(
arima2 = ARIMA(GDP ~ pdq(1,2,2))
)
glance(arima2)
## # A tibble: 1 × 9
## Country .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 United States arima2 5845. -321. 650. 651. 658. <cpl [1]> <cpl [2]>
#d
arima2 %>%
gg_tsresiduals()
augment(arima2) %>%
features(.innov, ljung_box, lag = 10, dof = 3)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States arima2 3.83 0.799
#e
arima2 %>%
forecast(h=10) %>%
autoplot(us)
#f
us %>% model(
ETS(GDP)) %>% forecast(h = 10) %>% autoplot(global_economy)
I selected Japan and the plot shows exponential growth then a steady decline at about 2000. There is also a steep decline in the early 2000s.
Differencing and plots below.
The first lag is still strong, indicating it could still not be white noise.
Similar to the ACF, the PACF graph indicates the data could still be white noise. The lags do seem to decat.
Based on the ACF and PACF I would say an ARIMA(0, d, q), I am unsure of the values though.
The ARIMA is ARIMA(0,1,1)(1,1,1), I think the codes model will perform better.
#a
japan <- aus_arrivals %>% filter(Origin == "Japan")
japan %>% autoplot(Arrivals)
#b
japan %>% ACF(Arrivals) %>%
autoplot() + labs(subtitle = "Japanese Arrivals")
japan %>% PACF(Arrivals) %>%
autoplot() + labs(subtitle = "Japanese Arrivals")
japan %>% ACF(difference(Arrivals)) %>%
autoplot() + labs(subtitle = "Changes in Japanese Arrivals")
japan %>%
mutate(diff_arrival = difference(Arrivals)) %>%
features(diff_arrival, ljung_box, lag = 10)
## # A tibble: 1 × 3
## Origin lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 Japan 346. 0
#c
japan %>% ACF(difference(Arrivals)) %>%
autoplot() + labs(subtitle = "Changes in Japanese Arrivals")
#d
japan %>% PACF(difference(Arrivals)) %>%
autoplot() + labs(subtitle = "Changes in Japanese Arrivals")
#f
arima <- japan %>% model(arima = ARIMA(Arrivals))
arima
## # A mable: 1 x 2
## # Key: Origin [1]
## Origin arima
## <chr> <model>
## 1 Japan <ARIMA(0,1,1)(1,1,1)[4]>
The trend is steadily increasing, and the seasonality is highly volatile.
Below.
No the residuals show that it is not stationary, differencing below.
The model with the lowest AICc values was the ARIMA(2,0,1)(2,1,1)[12] with drift.
The residuals appear to be white noise, although some lags are still outside of the intervals.
Probably 1-2 years, based on the intervals in the plot.
#a
tp <- us_employment %>% filter(Series_ID == "CEU0500000001")
tp %>% model(stl = STL(Employed ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()
#b
lambda <- tp %>%
features(Employed, features = guerrero) %>%
pull(lambda_guerrero)
tp <- tp %>%
mutate(Employed = box_cox(Employed, lambda))
#c
tp %>%
mutate(diff_employed = difference(Employed)) %>%
features(diff_employed, ljung_box, lag = 10)
## # A tibble: 1 × 3
## Series_ID lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 CEU0500000001 204. 0
tp %>% ACF(difference(Employed)) %>%
autoplot() + labs(subtitle = "Changes in Employed")
tp %>% PACF(difference(Employed)) %>%
autoplot() + labs(subtitle = "Changes in Employed")
#d
arima_new <- tp %>% model(arima = ARIMA((Employed)))
arima_new
## # A mable: 1 x 2
## # Key: Series_ID [1]
## Series_ID arima
## <chr> <model>
## 1 CEU0500000001 <ARIMA(2,0,1)(2,1,1)[12] w/ drift>
glance(arima_new)
## # A tibble: 1 × 9
## Series_ID .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 CEU0500000001 arima 44.9 -3182. 6380. 6380. 6419. <cpl [26]> <cpl [13]>
arima_new2 <- tp %>% model(arima = ARIMA(Employed ~ 0 + pdq(0, 1, 1) + PDQ(0, 1, 1)))
arima_new2
## # A mable: 1 x 2
## # Key: Series_ID [1]
## Series_ID arima
## <chr> <model>
## 1 CEU0500000001 <ARIMA(0,1,1)(0,1,1)[12]>
glance(arima_new2)
## # A tibble: 1 × 9
## Series_ID .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 CEU0500000001 arima 55.1 -3278. 6562. 6562. 6576. <cpl [0]> <cpl [13]>
#e
arima_new %>% gg_tsresiduals()
#f
a <- 12*15
arima_new %>%
forecast(h=a) %>%
autoplot(tp)
Box-Cox transformation below.
Differencing shown below.
The model with the lowest AIC value is ARIMA(2,1,2)(1,1,1)[4].
The residuals resemble white noise.
Forecasts below.
The forecasts perform very similarly, although the ARIMA has a slightly lower RMSE.
#a
aus_production %>% autoplot(Gas)
lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_new <- aus_production %>%
mutate(Gas = box_cox(Gas, lambda))
aus_new %>% autoplot(Gas)
#b
aus_new %>% ACF(Gas) %>%
autoplot() + labs(subtitle = "Gas")
aus_new %>% PACF(Gas) %>%
autoplot() + labs(subtitle = "Gas")
aus_new %>% features(Gas, ljung_box, lag = 10)
## # A tibble: 1 × 2
## lb_stat lb_pvalue
## <dbl> <dbl>
## 1 1960. 0
aus_new %>%
mutate(diff_gas = difference(Gas)) %>%
features(diff_gas, ljung_box, lag = 10)
## # A tibble: 1 × 2
## lb_stat lb_pvalue
## <dbl> <dbl>
## 1 947. 0
aus_new %>% ACF(difference(Gas)) %>%
autoplot() + labs(subtitle = "Changes in Gas")
aus_new %>% PACF(difference(Gas)) %>%
autoplot() + labs(subtitle = "Changes in Gas")
#c
arima_n <- aus_new %>% model(arima = ARIMA((Gas)))
arima_n
## # A mable: 1 x 1
## arima
## <model>
## 1 <ARIMA(2,1,2)(1,1,1)[4]>
glance(arima_n)
## # A tibble: 1 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima 0.00610 242. -469. -469. -446. <cpl [6]> <cpl [6]>
arima_n2 <- aus_new %>% model(arima = ARIMA(Gas ~ 0 + pdq(0, 1, 1) + PDQ(0, 1, 1)))
arima_n2
## # A mable: 1 x 1
## arima
## <model>
## 1 <ARIMA(0,1,1)(0,1,1)[4]>
glance(arima_n2)
## # A tibble: 1 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima 0.00641 235. -464. -463. -453. <cpl [0]> <cpl [5]>
#d
arima_n %>% gg_tsresiduals()
#e
arima_n %>%
forecast(h=24) %>%
autoplot(aus_new)
accuracy(arima_n)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Training 0.000248 0.0761 0.0573 0.0694 1.44 0.454 0.392 0.0336
#f
ets_n <- aus_new %>% model(ETS(Gas))
glance(ets_n)
## # A tibble: 1 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Gas) 0.00638 -31.8 81.6 82.5 112. 0.00614 0.0112 0.0602
ets_n %>%
forecast(h=24) %>%
autoplot(aus_new)
accuracy(ets_n)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Gas) Training -0.000240 0.0784 0.0602 0.0460 1.55 0.477 0.404 -0.00703
The forecasts and plots below, I think the previous model is the better approach based on the plots and the accuracy() reports.
aus_gas <- aus_production %>% select("Quarter", "Gas")
aus_stl <- stl(aus_gas, t.window = 13, s.window = "periodic")
sadj <- seasadj(aus_stl)
model <- stlf(sadj, method = "arima")
stl_fc <- forecast(model)
plot(stl_fc)
accuracy(stl_fc)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.001806907 3.474857 2.513441 -4.664653 9.987273 0.4508603
## ACF1
## Training set -0.01193205
Below.
Forecasts below.
They both seem reasonable.
#a
arima_t <- tourism %>% model(arima = ARIMA(Trips))
## Warning in sqrt(diag(best$var.coef)): NaNs produced
#b
arima_t %>% forecast(h=24) %>% autoplot(tourism)
t_fc <- arima_t %>% forecast(h=24)
#c
Mel <- t_fc %>% filter(Region == "Melbourne")
Mel
## # A fable: 96 x 7 [1Q]
## # Key: Region, State, Purpose, .model [4]
## Region State Purpose .model Quarter Trips .mean
## <chr> <chr> <chr> <chr> <qtr> <dist> <dbl>
## 1 Melbourne Victoria Business arima 2018 Q1 N(616, 3668) 616.
## 2 Melbourne Victoria Business arima 2018 Q2 N(683, 4394) 683.
## 3 Melbourne Victoria Business arima 2018 Q3 N(695, 4559) 695.
## 4 Melbourne Victoria Business arima 2018 Q4 N(677, 4723) 677.
## 5 Melbourne Victoria Business arima 2019 Q1 N(638, 5268) 638.
## 6 Melbourne Victoria Business arima 2019 Q2 N(706, 5575) 706.
## 7 Melbourne Victoria Business arima 2019 Q3 N(718, 5801) 718.
## 8 Melbourne Victoria Business arima 2019 Q4 N(701, 6027) 701.
## 9 Melbourne Victoria Business arima 2020 Q1 N(664, 6653) 664.
## 10 Melbourne Victoria Business arima 2020 Q2 N(729, 7033) 729.
## # … with 86 more rows
Snow <- t_fc %>% filter(Region == "Snowy Mountains")
Snow
## # A fable: 96 x 7 [1Q]
## # Key: Region, State, Purpose, .model [4]
## Region State Purpose .model Quarter Trips .mean
## <chr> <chr> <chr> <chr> <qtr> <dist> <dbl>
## 1 Snowy Mountains New South Wales Business arima 2018 Q1 N(16, 100) 16.2
## 2 Snowy Mountains New South Wales Business arima 2018 Q2 N(16, 100) 16.2
## 3 Snowy Mountains New South Wales Business arima 2018 Q3 N(16, 100) 16.2
## 4 Snowy Mountains New South Wales Business arima 2018 Q4 N(16, 100) 16.2
## 5 Snowy Mountains New South Wales Business arima 2019 Q1 N(16, 100) 16.2
## 6 Snowy Mountains New South Wales Business arima 2019 Q2 N(16, 100) 16.2
## 7 Snowy Mountains New South Wales Business arima 2019 Q3 N(16, 100) 16.2
## 8 Snowy Mountains New South Wales Business arima 2019 Q4 N(16, 100) 16.2
## 9 Snowy Mountains New South Wales Business arima 2020 Q1 N(16, 100) 16.2
## 10 Snowy Mountains New South Wales Business arima 2020 Q2 N(16, 100) 16.2
## # … with 86 more rows
Below.
Reasonably accurate.
#a
set.seed(111111112)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
model <- myseries %>% model(arima = ARIMA(Turnover))
#b
model %>%
forecast(h=24) %>%
autoplot(myseries)
my_fc <- model %>%
forecast(h=24)
my_fc
## # A fable: 24 x 6 [1M]
## # Key: State, Industry, .model [1]
## State Industry .model Month Turnover .mean
## <chr> <chr> <chr> <mth> <dist> <dbl>
## 1 Australian Capital Territory Liquor retail… arima 2019 Jan N(13, 0.31) 13.1
## 2 Australian Capital Territory Liquor retail… arima 2019 Feb N(13, 0.37) 12.6
## 3 Australian Capital Territory Liquor retail… arima 2019 Mar N(15, 0.43) 14.6
## 4 Australian Capital Territory Liquor retail… arima 2019 Apr N(14, 0.49) 13.5
## 5 Australian Capital Territory Liquor retail… arima 2019 May N(13, 0.53) 13.3
## 6 Australian Capital Territory Liquor retail… arima 2019 Jun N(13, 0.58) 12.8
## 7 Australian Capital Territory Liquor retail… arima 2019 Jul N(13, 0.61) 13.0
## 8 Australian Capital Territory Liquor retail… arima 2019 Aug N(14, 0.65) 13.6
## 9 Australian Capital Territory Liquor retail… arima 2019 Sep N(14, 0.67) 13.7
## 10 Australian Capital Territory Liquor retail… arima 2019 Oct N(14, 0.7) 14.3
## # … with 14 more rows
Below.
Below, ARIMA(2,0,1)
The residuals show it is not white noise. The acf has strong lags that in both directions, while the pacf has lags that are slowly decreasing in intensity.
Below.
They are very different although may be a step behind. I think the drop-off from the last two years of data may have something to do with that.
#a
pelt %>% autoplot(Hare)
#b
auto.arima(pelt$Hare)
## Series: pelt$Hare
## ARIMA(2,0,1) with non-zero mean
##
## Coefficients:
## ar1 ar2 ma1 mean
## 1.3811 -0.7120 -0.5747 45143.349
## s.e. 0.1016 0.0784 0.1240 3242.191
##
## sigma^2 = 583278195: log likelihood = -1046.07
## AIC=2102.15 AICc=2102.85 BIC=2114.7
#c
pelt %>% ACF((Hare)) %>%
autoplot() + labs(subtitle = "Changes in Hare Pelts")
pelt %>% PACF((Hare)) %>%
autoplot() + labs(subtitle = "Changes in Hare Pelts")
pelt %>%
mutate(hare = (Hare)) %>%
features(hare, ljung_box, lag = 10)
## # A tibble: 1 × 2
## lb_stat lb_pvalue
## <dbl> <dbl>
## 1 129. 0
#d
c = 30993
phi1 = 0.82
phi2 = -0.29
phi3 = -0.01
phi4 = -0.22
Hares_1931 <- 19520
Hares_1932 <- 82110
Hares_1933 <- 89760
Hares_1934 <- 81660
Hares_1935 <- 15760
Hares_1936 = c + phi1*(Hares_1935) + phi2*(Hares_1934) + phi3*(Hares_1933) + phi4*(Hares_1932)
Hares_1937 = c + phi1*Hares_1936 + phi2*(Hares_1935) + phi3*(Hares_1934) + phi4*(Hares_1933)
Hares_1938 = c + phi1*Hares_1937 + phi2*(Hares_1936) + phi3*(Hares_1935) + phi4*(Hares_1934)
c(Hares_1936, Hares_1937, Hares_1938)
## [1] 1273.00 6902.66 18161.21
#e
hare_arima <- pelt %>% model(arima = ARIMA(Hare))
hare_arima %>%
forecast(h=24) %>%
autoplot(pelt)
hare_fc <- hare_arima %>%
forecast(h=24)
hare_fc
## # A fable: 24 x 4 [1Y]
## # Key: .model [1]
## .model Year Hare .mean
## <chr> <dbl> <dist> <dbl>
## 1 arima 1936 N(6634, 5.8e+08) 6634.
## 2 arima 1937 N(12878, 9.6e+08) 12878.
## 3 arima 1938 N(27999, 1.1e+09) 27999.
## 4 arima 1939 N(44437, 1.1e+09) 44437.
## 5 arima 1940 N(56373, 1.1e+09) 56373.
## 6 arima 1941 N(61156, 1.2e+09) 61156.
## 7 arima 1942 N(59263, 1.3e+09) 59263.
## 8 arima 1943 N(53244, 1.3e+09) 53244.
## 9 arima 1944 N(46278, 1.3e+09) 46278.
## 10 arima 1945 N(40943, 1.3e+09) 40943.
## # … with 14 more rows
Below.
Below. ARIMA(0,2,1) c. Explain why this model was chosen using the ACF and PACF of the differenced series.
The residuals show white noise, with the acf lags slowly decreasing and the pacf has a very strong first lag.
Below.
They are very similar, this may be because the data is fairly constant and predictable in its trend
#a
swiss <- global_economy %>% filter(Country == "Switzerland")
swiss %>% autoplot(Population)
#b
auto.arima(swiss$Population)
## Series: swiss$Population
## ARIMA(0,2,1)
##
## Coefficients:
## ma1
## 0.8765
## s.e. 0.0962
##
## sigma^2 = 130226144: log likelihood = -602.65
## AIC=1209.3 AICc=1209.53 BIC=1213.35
#c
swiss %>% ACF((Population)) %>%
autoplot() + labs(subtitle = "Changes in Swiss Population")
swiss %>% PACF((Population)) %>%
autoplot() + labs(subtitle = "Changes in Swiss Population")
swiss %>%
mutate(pop = (Population)) %>%
features(pop, ljung_box, lag = 10)
## # A tibble: 1 × 3
## Country lb_stat lb_pvalue
## <fct> <dbl> <dbl>
## 1 Switzerland 289. 0
#d
c= 0.0053
phi1= 1.64
phi2= -1.17
phi3= 0.45
pop2013 = 8.09
pop2014= 8.19
pop2015 = 8.28
pop2016 = 8.37
pop2017 = 8.47
pop2018= c + pop2017 + phi1*(pop2017 - pop2016) + phi2*(pop2016 - pop2015) + phi3*(pop2015 - pop2014)
pop2019= c + pop2018 + phi1*(pop2018 - pop2017) + phi2*(pop2017 - pop2016) + phi3*(pop2016 - pop2015)
pop2020= c + pop2019 + phi1*(pop2019 - pop2018) + phi2*(pop2018 - pop2017) + phi3*(pop2017 - pop2016)
c(pop2018, pop2019, pop2020)
## [1] 8.57450 8.67468 8.76701
#e
pop_arima <- swiss %>% model(arima = ARIMA(Population))
pop_arima %>%
forecast(h=24) %>%
autoplot(swiss)
pop_fc <- pop_arima %>%
forecast(h=24)
pop_fc
## # A fable: 24 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Population .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Switzerland arima 2018 N(8561084, 1.2e+08) 8561084.
## 2 Switzerland arima 2019 N(8649542, 1e+09) 8649542.
## 3 Switzerland arima 2020 N(8732611, 3.1e+09) 8732611.
## 4 Switzerland arima 2021 N(8811287, 6.4e+09) 8811287.
## 5 Switzerland arima 2022 N(8886382, 1.1e+10) 8886382.
## 6 Switzerland arima 2023 N(9e+06, 1.7e+10) 8958558.
## 7 Switzerland arima 2024 N(9e+06, 2.4e+10) 9028354.
## 8 Switzerland arima 2025 N(9096210, 3.2e+10) 9096210.
## 9 Switzerland arima 2026 N(9162484, 4e+10) 9162484.
## 10 Switzerland arima 2027 N(9227469, 5e+10) 9227469.
## # … with 14 more rows
Below.
Below.
The residuals show they are white noise.
Below.
Below.
The residuals show that they might not be white noise.
Below.
I prefer the ARIMA because the residuals show white noise and the forecasts appear to be practically identical.
#a
library(Quandl)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following object is masked from 'package:tsibble':
##
## index
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Registered S3 method overwritten by 'httr':
## method from
## print.response rmutil
y <- as_tsibble(Quandl::Quandl("NSE/OIL", collapse="monthly", start_date="2013-01-01", type="ts"), index = Date)
#b
plot(y)
y %>% autoplot(value)
y %>% ACF(value)
## # A tsibble: 126 x 3 [1M]
## # Key: key [7]
## key lag acf
## <chr> <lag> <dbl>
## 1 Close 1M 0.927
## 2 Close 2M 0.849
## 3 Close 3M 0.770
## 4 Close 4M 0.698
## 5 Close 5M 0.628
## 6 Close 6M 0.552
## 7 Close 7M 0.498
## 8 Close 8M 0.450
## 9 Close 9M 0.420
## 10 Close 10M 0.396
## # … with 116 more rows
y %>% PACF(value)
## # A tsibble: 126 x 3 [1M]
## # Key: key [7]
## key lag pacf
## <chr> <lag> <dbl>
## 1 Close 1M 0.927
## 2 Close 2M -0.0697
## 3 Close 3M -0.0446
## 4 Close 4M 0.000637
## 5 Close 5M -0.0334
## 6 Close 6M -0.0865
## 7 Close 7M 0.121
## 8 Close 8M -0.0107
## 9 Close 9M 0.0855
## 10 Close 10M 0.0240
## # … with 116 more rows
arima_y <- y %>% model(arima = ARIMA(value))
#c
arima_y[1,] %>%
gg_tsresiduals()
arima_y[2,] %>%
gg_tsresiduals()
arima_y[3,] %>%
gg_tsresiduals()
arima_y[4,] %>%
gg_tsresiduals()
arima_y[5,] %>%
gg_tsresiduals()
arima_y[6,] %>%
gg_tsresiduals()
arima_y[7,] %>%
gg_tsresiduals()
#d
arima_y %>%
forecast(h=36) %>%
autoplot(y)
y_fc <- arima_y %>%
forecast(h=36)
y_fc
## # A fable: 252 x 5 [1M]
## # Key: key, .model [7]
## key .model index value .mean
## <chr> <chr> <mth> <dist> <dbl>
## 1 Close arima 2019 Feb N(175, 1230) 175.
## 2 Close arima 2019 Mar N(175, 2459) 175.
## 3 Close arima 2019 Apr N(175, 3689) 175.
## 4 Close arima 2019 May N(175, 4918) 175.
## 5 Close arima 2019 Jun N(175, 6148) 175.
## 6 Close arima 2019 Jul N(175, 7378) 175.
## 7 Close arima 2019 Aug N(175, 8607) 175.
## 8 Close arima 2019 Sep N(175, 9837) 175.
## 9 Close arima 2019 Oct N(175, 11067) 175.
## 10 Close arima 2019 Nov N(175, 12296) 175.
## # … with 242 more rows
#e
ets_y <- y %>% model(ets = ETS(value))
#f
ets_y[1,] %>%
gg_tsresiduals()
ets_y[2,] %>%
gg_tsresiduals()
ets_y[3,] %>%
gg_tsresiduals()
ets_y[4,] %>%
gg_tsresiduals()
ets_y[5,] %>%
gg_tsresiduals()
ets_y[6,] %>%
gg_tsresiduals()
ets_y[7,] %>%
gg_tsresiduals()
#g
ets_y %>%
forecast(h=36) %>%
autoplot(y)
ye_fc <- arima_y %>%
forecast(h=36)
ye_fc
## # A fable: 252 x 5 [1M]
## # Key: key, .model [7]
## key .model index value .mean
## <chr> <chr> <mth> <dist> <dbl>
## 1 Close arima 2019 Feb N(175, 1230) 175.
## 2 Close arima 2019 Mar N(175, 2459) 175.
## 3 Close arima 2019 Apr N(175, 3689) 175.
## 4 Close arima 2019 May N(175, 4918) 175.
## 5 Close arima 2019 Jun N(175, 6148) 175.
## 6 Close arima 2019 Jul N(175, 7378) 175.
## 7 Close arima 2019 Aug N(175, 8607) 175.
## 8 Close arima 2019 Sep N(175, 9837) 175.
## 9 Close arima 2019 Oct N(175, 11067) 175.
## 10 Close arima 2019 Nov N(175, 12296) 175.
## # … with 242 more rows
#h
accuracy(arima_y)
## # A tibble: 7 × 11
## key .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Close arima Trai… -5.00 3.48e1 2.38e1 -2.03 6.35 0.254 0.312 0.0541
## 2 High arima Trai… -4.60 3.47e1 2.44e1 -1.90 6.38 0.257 0.308 0.139
## 3 Last arima Trai… -5.00 3.46e1 2.34e1 -2.03 6.33 0.252 0.311 0.0413
## 4 Low arima Trai… -4.63 3.31e1 2.28e1 -1.95 6.23 0.244 0.296 0.0779
## 5 Open arima Trai… -4.83 3.57e1 2.50e1 -2.00 6.62 0.265 0.319 0.101
## 6 Total Tra… arima Trai… 66.2 1.79e6 8.35e5 -139. 153. 0.736 0.665 0.00321
## 7 Turnover … arima Trai… -4.19 7.07e3 2.99e3 -134. 146. 0.694 0.686 0.00611
accuracy(ets_y)
## # A tibble: 7 × 11
## key .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Close ets Trai… -4.96e0 3.48e1 2.38e1 -2.02 6.36 0.254 0.312 0.0563
## 2 High ets Trai… -5.14e0 3.58e1 2.50e1 -2.04 6.51 0.263 0.318 0.116
## 3 Last ets Trai… -5.00e0 3.46e1 2.35e1 -2.04 6.34 0.253 0.311 0.0504
## 4 Low ets Trai… -4.59e0 3.31e1 2.28e1 -1.94 6.24 0.244 0.296 0.0785
## 5 Open ets Trai… -4.78e0 3.57e1 2.50e1 -2.00 6.63 0.266 0.319 0.101
## 6 Total Tr… ets Trai… -4.65e4 1.64e6 9.05e5 -108. 143. 0.798 0.609 0.281
## 7 Turnover… ets Trai… -1.53e3 7.36e3 4.21e3 -166. 189. 0.980 0.715 0.305