library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.4 v tsibble 1.0.1.9999
## v dplyr 1.0.7 v tsibbledata 0.3.0.9000
## v tidyr 1.1.3 v feasts 0.2.2
## v lubridate 1.7.10 v fable 0.3.1
## v ggplot2 3.3.5
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
Figure 9.32 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers.
The figures indicate random noise, as best as I can tell they are within the dashed lines of the criital values
The larger the sample the smaller the critical value becomes (as it is over the square root of the sample size). Apparent autocorrelation will frequently occur in small sample sets by chance, while larger sets increase the rareness of runs.
A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.
amznStock <-gafa_stock%>% filter(Symbol=='AMZN')
amznStock %>% autoplot( vars(Close))
amznStock %>% gg_tsdisplay(plot="partial", y=Close)
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.
It is rather clear that the plot isn’t horizontal but has a strong trend.
amznStock %>% gg_tsdisplay(difference(Close), plot_type='partial')
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
Running a simple difference we see a much more horizontal plot, though the increasing trend causes the differences to be magnified versus the initial stock price. This suggests a box-cox transformation might be a good idea.
For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.
turkishGDP <- global_economy%>%filter(Country=="Turkey")
lambda<-BoxCox.lambda((turkishGDP%>% select(GDP))[[1]])
turkishGDP<-turkishGDP%>% mutate(boxGDP = box_cox(GDP,lambda=lambda))
turkishGDP%>% gg_tsdisplay(boxGDP,plot_type = "partial")+labs(title=glue("no difference lambda:{lambda}"))
turkishGDP <- global_economy%>%filter(Country=="Turkey")%>%mutate(difGDP=difference(GDP))
lambda<-BoxCox.lambda((turkishGDP%>% select(difGDP))[[1]])
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
turkishGDP<-turkishGDP%>% mutate(boxGDP = box_cox(difGDP,lambda))
turkishGDP%>% gg_tsdisplay(boxGDP,plot_type = "partial")+labs(title=glue("single difference lambda:{lambda}"))
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
turkishGDP <- global_economy%>%filter(Country=="Turkey")%>%mutate(difGDP=difference(GDP,differences=2))
lambda<-BoxCox.lambda((turkishGDP%>% select(difGDP))[[1]])
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
turkishGDP<-turkishGDP%>% mutate(boxGDP = box_cox(difGDP,lambda))
turkishGDP%>% gg_tsdisplay(boxGDP,plot_type = "partial")+labs(title=glue("double difference lambda:{lambda}"))
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).
A single difference with a box cox lambda of .122 should be decent
tasmaniaTakings <-aus_accommodation%>%filter(State=="Tasmania")
lambda<-BoxCox.lambda((tasmaniaTakings%>% as.tibble%>% select(Takings ))[[1]])
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
tasmaniaTakings<-tasmaniaTakings%>% mutate(boxTakings = box_cox(Takings ,lambda=lambda))
tasmaniaTakings%>% gg_tsdisplay(boxTakings ,plot_type = "partial")+labs(title=glue("no difference lambda:{lambda}"))
tasmaniaTakings <-aus_accommodation%>%filter(State=="Tasmania")%>%mutate(difTakings=difference(Takings ))
lambda<-BoxCox.lambda((tasmaniaTakings%>% as.tibble%>% select(difTakings ))[[1]])
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
turkishGDP<-tasmaniaTakings %>% mutate(boxTakings = box_cox(difTakings ,lambda))
turkishGDP%>% gg_tsdisplay(boxTakings ,plot_type = "partial")+labs(title=glue("single difference lambda:{lambda}"))
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
tasmaniaTakings <- aus_accommodation%>%filter(State=="Tasmania")%>%mutate(difTakings =difference(Takings ,differences=2))
lambda<-BoxCox.lambda((tasmaniaTakings %>% as.tibble%>% select(difTakings ))[[1]])
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
tasmaniaTakings<-tasmaniaTakings %>% mutate(boxTakings = box_cox(difTakings ,lambda))
tasmaniaTakings%>% gg_tsdisplay(boxTakings ,plot_type = "partial")+labs(title=glue("double difference lambda:{lambda}"))
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).
A double difference seems preferable.
ourSouvenirs <-souvenirs
lambda<-BoxCox.lambda((ourSouvenirs%>% as.tibble%>% select(Sales ))[[1]])
ourSouvenirs<-ourSouvenirs%>% mutate(boxSales = box_cox(Sales ,lambda=lambda))
ourSouvenirs%>% gg_tsdisplay(boxSales ,plot_type = "partial")+labs(title=glue("no difference lambda:{lambda}"))
ourSouvenirs <-souvenirs%>%mutate(difSales=difference(Sales ))
lambda<-BoxCox.lambda((ourSouvenirs%>% as.tibble%>% select(difSales ))[[1]])
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
ourSouvenirs<-ourSouvenirs %>% mutate(boxSales = box_cox(difSales ,lambda))
ourSouvenirs%>% gg_tsdisplay(boxSales ,plot_type = "partial")+labs(title=glue("single difference lambda:{lambda}"))
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
ourSouvenirs <- ourSouvenirs%>%mutate(difSales =difference(Sales ,differences=2))
lambda<-BoxCox.lambda((ourSouvenirs %>% as.tibble%>% select(difSales ))[[1]])
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
ourSouvenirs<-ourSouvenirs %>% mutate(boxSales = box_cox(difSales ,lambda))
ourSouvenirs%>% gg_tsdisplay(boxSales ,plot_type = "partial")+labs(title=glue("double difference lambda:{lambda}"))
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).
A single difference seems correct, but again box cox doesn’t seem to entirely correct the seasonal trend differences.
For your retail data (from Exercise 8 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.
set.seed(12345678-4321)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
lambda<-BoxCox.lambda((myseries%>% select(Turnover))[[1]])
myseries<-myseries%>% mutate(boxTurnover = box_cox(Turnover,lambda=lambda))%>%mutate(difTurnover =difference(boxTurnover ,differences=1))
myseries%>% gg_tsdisplay(difTurnover,plot_type = "partial") +labs(Title="Single difference")
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
myseries <- myseries %>% mutate(difTurnover =difference(boxTurnover ,differences=2))
myseries%>% gg_tsdisplay(difTurnover,plot_type = "partial") +labs(Title="Double difference")
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).
Neither single or double difference with box cox can avoid all auto correlations, but a single difference seems to suffice to make it reasonably stationary, though there does seem to be an uncorrectable difference in seasonal patterns. ## 9.6
imulate and plot some data from simple ARIMA models.
Use the following R code to generate data from an AR(1) model with
Ï• 1 = 0.6 and
σ 2 = 1 . The process starts with
y 1 = 0 .
set.seed(6789)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
Produce a time plot for the series. How does the plot change as you change
Ï• 1 ?
sim%>%autoplot()
## Plot variable not specified, automatically selected `.vars = y`
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 1*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
sim%>%autoplot()+labs(title='Theta=1')
## Plot variable not specified, automatically selected `.vars = y`
Write your own code to generate data from an MA(1) model with
θ 1 = 0.6 and
σ 2 = 1 .
sim%>%autoplot()
## Plot variable not specified, automatically selected `.vars = y`
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 1*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
sim%>%autoplot()+labs(title='Theta=1')
## Plot variable not specified, automatically selected `.vars = y`
Produce a time plot for the series. How does the plot change as you change
θ 1 ?
Generate data from an ARMA(1,1) model with
Ï• 1 = 0.6 ,
θ 1 = 0.6 and
σ 2 = 1 .
Generate data from an AR(2) model with
ϕ 1 = − 0.8 ,
Ï• 2 = 0.3 and
σ 2 = 1 . (Note that these parameters will give a non-stationary series.)
Graph the latter two series and compare them.
Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.
ausAir<-aus_airpassengers%>%model(ARIMA(Passengers))
report(ausAir)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8963
## s.e. 0.0594
##
## sigma^2 estimated as 4.308: log likelihood=-97.02
## AIC=198.04 AICc=198.32 BIC=201.65
ausAir[[1]][[1]]$fit%>%checkresiduals()
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
ausAir%>%forecast(h=10)%>% autoplot(aus_airpassengers)
Looks good
\[ y'_{t} = y_{t} - y_{t-1} = y_t - By_{t} = (1 - B)y_{t}\: \]
ausAir<-aus_airpassengers%>%model(arimaDefault = ARIMA(Passengers), arima010 = ARIMA(Passengers ~ 1 +pdq(0,1,0)) )
report(ausAir)
## Warning in report.mdl_df(ausAir): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 2 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arimaDefault 4.31 -97.0 198. 198. 202. <cpl [0]> <cpl [1]>
## 2 arima010 4.27 -98.2 200. 201. 204. <cpl [0]> <cpl [0]>
ausAir[[2]][[1]]$fit%>%checkresiduals()
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
ausAir%>%forecast(h=10)%>% autoplot(aus_airpassengers)
d) Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens.
ausAir<-aus_airpassengers%>%model(arimaDefault = ARIMA(Passengers), arima010 = ARIMA(Passengers ~ 1 +pdq(0,1,0)), arima212= ARIMA(Passengers ~ 1 +pdq(2,1,2)) )
report(ausAir )
## Warning in report.mdl_df(ausAir): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 3 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arimaDefault 4.31 -97.0 198. 198. 202. <cpl [0]> <cpl [1]>
## 2 arima010 4.27 -98.2 200. 201. 204. <cpl [0]> <cpl [0]>
## 3 arima212 4.03 -96.2 204. 207. 215. <cpl [2]> <cpl [2]>
ausAir[[3]][[1]]$fit%>%checkresiduals()
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
ausAir%>%forecast(h=10)%>% autoplot(aus_airpassengers)
ausAir<-aus_airpassengers%>%model(arimaDefault = ARIMA(Passengers), arima212= ARIMA(Passengers ~ 1 +pdq(2,1,2)), arima212noConstant= ARIMA(Passengers ~ -1 ) )
report(ausAir )
## Warning in report.mdl_df(ausAir): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 3 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arimaDefault 4.31 -97.0 198. 198. 202. <cpl [0]> <cpl [1]>
## 2 arima212 4.03 -96.2 204. 207. 215. <cpl [2]> <cpl [2]>
## 3 arima212noConstant 4.31 -97.0 198. 198. 202. <cpl [0]> <cpl [1]>
ausAir[[3]][[1]]$fit%>%checkresiduals()
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
ausAir%>%forecast(h=10)%>% autoplot(aus_airpassengers)
e) Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens?
```r
ausAir<-aus_airpassengers%>%model(arimaDefault = ARIMA(Passengers), arima010 = ARIMA(Passengers ~ 1 +pdq(0,1,0)) )
report(ausAir)
## Warning in report.mdl_df(ausAir): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 2 x 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arimaDefault 4.31 -97.0 198. 198. 202. <cpl [0]> <cpl [1]>
## 2 arima010 4.27 -98.2 200. 201. 204. <cpl [0]> <cpl [0]>
ausAir[[2]][[1]]$fit%>%checkresiduals()
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
ausAir%>%forecast(h=10)%>% autoplot(aus_airpassengers)
## 9.8
For the United States GDP series (from global_economy):
```r
usaGDP <- global_economy%>%filter(Country=="United States")
lambda<-BoxCox.lambda((usaGDP%>% select(GDP))[[1]])
usaGDP<-usaGDP%>% mutate(boxGDP = box_cox(GDP,lambda=lambda))
usaGDP%>% gg_tsdisplay(boxGDP,plot_type = "partial")+labs(title=glue("USA GDP lambda:{lambda}"))
b) fit a suitable ARIMA model to the transformed data using ARIMA();
usaGDPmodel<-usaGDP%>% model(ARIMA(boxGDP))
report(usaGDPmodel)
## Series: boxGDP
## Model: ARIMA(1,1,0) w/ drift
##
## Coefficients:
## ar1 constant
## 0.4586 118.2764
## s.e. 0.1198 9.5123
##
## sigma^2 estimated as 5488: log likelihood=-325.37
## AIC=656.74 AICc=657.19 BIC=662.87
usaGDPmodel<-usaGDP%>% model(ARIMA(boxGDP~0+pdq(1,1,1)+PDQ(0,1,1)))
report(usaGDPmodel)
## Series: boxGDP
## Model: ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.9905 -0.6583
## s.e. 0.0126 0.1460
##
## sigma^2 estimated as 6094: log likelihood=-329.45
## AIC=664.9 AICc=665.35 BIC=671.02
usaGDPmodel<-usaGDP%>% model(ARIMA(boxGDP~1+pdq(1,0,1)+PDQ(0,0,1)))
report(usaGDPmodel)
## Series: boxGDP
## Model: ARIMA(1,0,1) w/ mean
##
## Coefficients:
## ar1 ma1 constant
## 1 0.7673 0.0192
## s.e. 0 0.0668 0.0342
##
## sigma^2 estimated as 23800: log likelihood=-373.96
## AIC=755.92 AICc=756.67 BIC=764.16
usaGDPmodel<-usaGDP%>% model(ARIMA(boxGDP))
usaGDPmodel[[2]][[1]]$fit %>% checkresiduals()
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
e) produce forecasts of your fitted model. Do the forecasts look reasonable?
fit<-usaGDPmodel%>%forecast(h=10)
fit%>%autoplot(usaGDP)
f) compare the results with what you would obtain using ETS() (with no transformation).
usaGDPmodel<-usaGDP%>% model( ets=ETS(GDP))
fit<-usaGDPmodel%>%forecast(h=10)
fit%>%autoplot(usaGDP)