#9.1
Figure 9.32 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers.
They all seem to indicate white noise as they are all close to a mean of zero. The major difference among the figures is that the confidence interval seems to get smaller and smaller as we increase the numbers as well as the bars seem to be getting smaller.
The critical values differ because the sample size affects the range of what is considered normal variability. Auto-correlations differ due to randomness in white noise, which leads to slight variations across different samples.
#9.2
A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.
amzn_stock <- gafa_stock %>%
filter(Symbol == "AMZN")
amzn_stock %>% gg_tsdisplay(Close, plot_type = "partial") +
labs(y = "$US",
title = "Amazon daily closing price")
## Warning: Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
## Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
In this plot we can see that this violates all the rules for stationary
there is an upward trend as well as an increasing variance. For the ACF
graph it is showing values way above the mean of 0 and it is slowly
decaying. For the PACF shows a lag at 1 so there is strong
autocorrelation.
#9.3
For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.
Turkey <- global_economy %>%
filter(Country == "Turkey")
Turkey %>% gg_tsdisplay(GDP, plot_type = "partial") +
labs(title = "Turkey's GDP")
# Transformed Data
lambda <-Turkey %>%
filter(Country == "Turkey") %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
Turkey |>
features(GDP, unitroot_ndiffs)
## # A tibble: 1 × 2
## Country ndiffs
## <fct> <int>
## 1 Turkey 1
Turkey %>%
gg_tsdisplay(difference(box_cox(GDP, lambda)), plot_type = 'partial') +
labs(title = paste0("Difference Turkish GDP with lambda = ", round(lambda, 4)))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
accom_tasmania <- aus_accommodation %>%
filter(State == "Tasmania")
accom_tasmania %>% gg_tsdisplay(Takings, plot_type = "partial") +
labs(title = "Tasmania Accommodation Takings")
#Transformation
lambda <- aus_accommodation %>%
filter(State == "Tasmania") %>%
features(Takings, features = guerrero) %>%
pull(lambda_guerrero)
accom_tasmania |>
features(Takings, unitroot_ndiffs)
## # A tibble: 1 × 2
## State ndiffs
## <chr> <int>
## 1 Tasmania 1
Turkey %>%
gg_tsdisplay(difference(box_cox(GDP, lambda)), plot_type = 'partial') +
labs(title = paste0("Difference Tasmania's Takings with lambda = ", round(lambda, 4)))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
souvenirs %>% autoplot(Sales)
#Transformation
lambda <- souvenirs %>%
features(Sales, features = guerrero) %>%
pull(lambda_guerrero)
souvenirs |>
features(Sales, unitroot_ndiffs)
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 1
souvenirs %>%
gg_tsdisplay(difference(box_cox(Sales, lambda)), plot_type = 'partial') +
labs(title = paste0("Difference in Sales with lambda = ", round(lambda, 4)))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
#9.5
For your retail data (from Exercise 7 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
lambda <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
myseries |>
features(Turnover, unitroot_ndiffs)
## # A tibble: 1 × 3
## State Industry ndiffs
## <chr> <chr> <int>
## 1 Northern Territory Clothing, footwear and personal accessory retailing 1
myseries %>%
gg_tsdisplay(difference(box_cox(Turnover, lambda)), plot_type = 'partial') +
labs(title = paste0("Difference in Turnover with lambda = ", round(lambda, 4)))
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
#9.6
Simulate and plot some data from simple ARIMA models.
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
#phi_1 = - 1
for(i in 2:100)
y[i] <- -1*y[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "phi_1 = -1")
## Plot variable not specified, automatically selected `.vars = y`
# phi_1 = - 0.8
for(i in 2:100)
y[i] <- -0.8*y[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "phi_1 = -0.8")
## Plot variable not specified, automatically selected `.vars = y`
# phi_1 = - 0.6
for(i in 2:100)
y[i] <- -0.6*y[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "phi_1 = -0.6")
## Plot variable not specified, automatically selected `.vars = y`
# phi_1 = - 0.4
for(i in 2:100)
y[i] <- -0.4*y[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "phi_1 = -0.4")
## Plot variable not specified, automatically selected `.vars = y`
# phi_1 = - 0.2
for(i in 2:100)
y[i] <- -0.2*y[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "phi_1 = -0.2")
## Plot variable not specified, automatically selected `.vars = y`
# phi_1 = 0
for(i in 2:100)
y[i] <- 0*y[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "phi_1 = 0")
## Plot variable not specified, automatically selected `.vars = y`
# phi_1 = 0.2
for(i in 2:100)
y[i] <- 0.2*y[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "phi_1 = 0.2")
## Plot variable not specified, automatically selected `.vars = y`
# phi_1 = 0.4
for(i in 2:100)
y[i] <- 0.4*y[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "phi_1 = 0.4")
## Plot variable not specified, automatically selected `.vars = y`
# phi_1 = 0.8
for(i in 2:100)
y[i] <- 0.8*y[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "phi_1 = 0.8")
## Plot variable not specified, automatically selected `.vars = y`
# phi_1 = 1
for(i in 2:100)
y[i] <- 1*y[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "phi_1 = 1")
## Plot variable not specified, automatically selected `.vars = y`
We can see that as we decrease the value of phi_1 the series becomes
more stationary.
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*e[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
#theta_1 = - 1
for(i in 2:100)
y[i] <- -1*e[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "theta_1 = -1")
## Plot variable not specified, automatically selected `.vars = y`
# theta_1 = - 0.8
for(i in 2:100)
y[i] <- -0.8*e[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "theta_1 = -0.8")
## Plot variable not specified, automatically selected `.vars = y`
# theta_1 = - 0.6
for(i in 2:100)
y[i] <- -0.6*e[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "theta_1 = -0.6")
## Plot variable not specified, automatically selected `.vars = y`
# theta_1 = - 0.4
for(i in 2:100)
y[i] <- -0.4*e[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "theta_1 = -0.4")
## Plot variable not specified, automatically selected `.vars = y`
# theta_1 = - 0.2
for(i in 2:100)
y[i] <- -0.2*e[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "theta_1 = -0.2")
## Plot variable not specified, automatically selected `.vars = y`
# theta_1 = 0
for(i in 2:100)
y[i] <- 0*e[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "theta_1 = 0")
## Plot variable not specified, automatically selected `.vars = y`
# theta_1 = 0.2
for(i in 2:100)
y[i] <- 0.2*e[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "theta_1 = 0.2")
## Plot variable not specified, automatically selected `.vars = y`
# theta_1 = 0.4
for(i in 2:100)
y[i] <- 0.4*e[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "theta_1 = 0.4")
## Plot variable not specified, automatically selected `.vars = y`
# theta_1 = 0.6
for(i in 2:100)
y[i] <- 0.6*e[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "theta_1 = 0.6")
## Plot variable not specified, automatically selected `.vars = y`
# theta_1 = 0.8
for(i in 2:100)
y[i] <- 0.8*e[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "theta_1 = 0.8")
## Plot variable not specified, automatically selected `.vars = y`
# theta_1 = 1
for(i in 2:100)
y[i] <- 1*e[i-1] + e[i]
tsibble(idx = seq_len(100), y = y, index = idx) %>%
autoplot() + labs(title = "theta_1 = 1")
## Plot variable not specified, automatically selected `.vars = y`
As we decrease the value of theta_1 the series becomes more
stationary.
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + 0.6*e[i-1] + e[i]
ARMA <- tsibble(idx = seq_len(100), y = y, index = idx)
f)Generate data from an AR(2) model with ϕ1=−0.8, ϕ2=0.3and σ2=1.(Note that these parameters will give a non-stationary series.
y <- numeric(100)
e <- rnorm(100)
for(i in 3:100)
y[i] <- -0.8*y[i-1] + 0.3*y[i-2] + e[i]
AR2 <- tsibble(idx = seq_len(100), y = y, index = idx)
ARMA %>% autoplot() + labs(title = "ARMA(1,1) Model")
## Plot variable not specified, automatically selected `.vars = y`
AR2 %>% autoplot() + labs(title = "AR(2) Model")
## Plot variable not specified, automatically selected `.vars = y`
#9.7
Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.
fit <- aus_airpassengers %>%
filter(Year < 2012) %>%
model(ARIMA(Passengers))
report(fit)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8756
## s.e. 0.0722
##
## sigma^2 estimated as 4.671: log likelihood=-87.8
## AIC=179.61 AICc=179.93 BIC=182.99
fit %>%
forecast(h = 10) %>%
autoplot(aus_airpassengers)
fit %>% gg_tsresiduals() + labs(title = "Residuals from ARIMA(0,1,1) Model")
\yt = -0.87e(t-1) + e(t)\ \(1 - B)yt = -0.87e(t)\
fit_drift <- aus_airpassengers %>%
filter(Year < 2012) %>%
model(ARIMA(Passengers~ pdq(0,1,0)))
fit_drift %>%
forecast(h = 10) %>%
autoplot(aus_airpassengers) +
labs(title = "Forecasts from ARIMA(0,1,0) Model with Drift")
fit_drift %>% gg_tsresiduals() +
labs(title = "Residuals from ARIMA(0,1,0) Model with Drift")
fit_212 <- aus_airpassengers %>%
filter(Year < 2012) %>%
model(ARIMA(Passengers~ pdq(2,1,2)))
fit_212 %>%
forecast(h = 10) %>%
autoplot(aus_airpassengers) +
labs(title = "Forecasts from ARIMA(2,1,2) Model with Drift")
fit_212 %>% gg_tsresiduals() +
labs(title = "Residuals from ARIMA(2,1,2) Model with Drift")
fit_021 <- aus_airpassengers %>%
filter(Year < 2012) %>%
model(ARIMA(Passengers~ pdq(0,2,1)))
fit_021 %>%
forecast(h = 10) %>%
autoplot(aus_airpassengers) +
labs(title = "Forecasts from ARIMA(0,2,1) Model with Drift")
fit_021 %>% gg_tsresiduals() +
labs(title = "Residuals from ARIMA(0,2,1) Model with Drift")
#9.8
For the United States GDP series (from global_economy):
US_GDP <- global_economy %>%
filter(Country == "United States")
lambda <- US_GDP %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
US_GDP %>%
gg_tsdisplay(GDP, plot_type = "partial") +
labs(title = "US GDP")
ARIMA_b <- global_economy %>%
filter(Code == "USA") %>%
model(ARIMA(box_cox(GDP, lambda)))
report(ARIMA_b)
## Series: GDP
## Model: ARIMA(1,1,0) w/ drift
## Transformation: box_cox(GDP, lambda)
##
## Coefficients:
## ar1 constant
## 0.4586 118.1822
## s.e. 0.1198 9.5047
##
## sigma^2 estimated as 5479: log likelihood=-325.32
## AIC=656.65 AICc=657.1 BIC=662.78
global_economy %>%
filter(Code == "USA") %>%
gg_tsdisplay(box_cox(GDP,lambda), plot_type='partial') +
labs(title = "Transformed United States GDP")
global_economy %>%
filter(Code == "USA") %>%
features(box_cox(GDP,lambda), unitroot_ndiffs)
## # A tibble: 1 × 2
## Country ndiffs
## <fct> <int>
## 1 United States 1
ARIMA_d <- global_economy %>%
filter(Code == "USA") %>%
model(ARIMA(box_cox(GDP, lambda) ~ pdq(0,1,1)))
report(ARIMA_d)
## Series: GDP
## Model: ARIMA(0,1,1) w/ drift
## Transformation: box_cox(GDP, lambda)
##
## Coefficients:
## ma1 constant
## 0.4026 219.6195
## s.e. 0.1098 13.6953
##
## sigma^2 estimated as 5689: log likelihood=-326.37
## AIC=658.73 AICc=659.18 BIC=664.86
ARIMA_d %>%
forecast(h = 10) %>%
autoplot(US_GDP)
ETS <- global_economy %>%
filter(Code == "USA") %>%
model(ETS(GDP))
report(ETS)
## Series: GDP
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9990876
## beta = 0.5011949
##
## Initial states:
## l[0] b[0]
## 448093333334 64917355687
##
## sigma^2: 7e-04
##
## AIC AICc BIC
## 3190.787 3191.941 3201.089
ETS %>%
forecast(h = 10) %>%
autoplot(US_GDP)