library(fpp3)
library(tidyverse)
library(ggrepel)
library(seasonal)
library(fabletools)
library(corrplot)
library(GGally)
library(patchwork)
library(caret)
theme_set(theme_bw())

9.1 Figure 9.32 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers.

  1. Explain the differences among these figures. Do they all indicate that the data are white noise?

The above figure has different correlation values, but also has different ranges for which the correlations are significant when compared to 0, which is signified by the dashed blue lines. Based on the figure, it appears that all 3 sets of data are white noise since none of the correlations lie outside of range for significance.

  1. Why are the critical values at different distances from the mean of zero? Why are the autocorrelations different in each figure when they each refer to white noise?

The critical values are at different distances from the mean of zero because of the number of samples in each dataset. The more observations within the data, the smaller this range is. Even though each of the figures above refer to white noise, the autocorrelations are different because each dataset is unique and have their own seasonal correlations, such that the figures with smaller autocorrelations have smaller seasonal correlations.

9.2 A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.

amzn_stock = gafa_stock |> 
  filter(Symbol == "AMZN") |>
  mutate(day = row_number()) |>
  update_tsibble(index = day, regular = TRUE)

amzn_stock |>
  autoplot(Close) +
  labs(title = "Daily Closing Prices for Amazon Stocks")

amzn_stock |>
  ACF(Close, lag_max = 20) |>
  autoplot() +
  labs(title = "ACF Plot for Amazon Stock Prices")

amzn_stock |>
  PACF(Close, lag_max = 20) |>
  autoplot() +
  labs(title = "PACF Plot for Amazon Stock Prices")

The ACF plot above shows that the data is non-stationary due to the correlations slowly decreasing, rather than a quick decrease to near zero. On the other hand, the PACF plot above has a major spike at the first lag, followed by a steep drop off in correlation, as well as a correlation outside the critical values at a high lag value, which is unlikely for a stationary dataset.

9.3 For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.

  1. Turkish GDP from global_economy.
turkish_econ = global_economy |> filter(Country == "Turkey") |> select(GDP)

turkish_econ |> autoplot() + labs(title = "Annual Turkish GDP")

BoxCoxTrans(turkish_econ$GDP)
## Box-Cox Transformation
## 
## 58 data points used to estimate Lambda
## 
## Input data summary:
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 8.022e+09 3.786e+10 9.900e+10 2.527e+11 3.028e+11 9.506e+11 
## 
## Largest/Smallest: 118 
## Sample Skewness: 1.19 
## 
## Estimated Lambda: 0 
## With fudge factor, Lambda = 0 will be used for transformations

Based on the Box-Cox transformation above, the lambda value of 0 suggests a log transformation is appropriate.

turkish_log = turkish_econ |> mutate(GDP = log(GDP))
turkish_log |> autoplot() + labs(title = "Annual Turkish GDP with Log Transformation")

turkish_log |> ACF(lag_max = 28) |> autoplot() + 
  labs(title = "ACF Plot of Annual Turkish GDP with Log Transformation")

The ACF plot above shows a slowly decreasing trend. This, combined with the the time plot showing an increasing trend, suggests that the data is non-stationary. As a result, differencing is required.

turkish_log |> 
  autoplot(difference(GDP)) +
  labs(title = "Time Plot of Annual Turkish GDP with Log Transformation and Difference of One")

turkish_log |> ACF(difference(GDP), lag_max = 28) |>
  autoplot() +
  labs(title = "ACF Plot of Annual Turkish GDP with Log Transformation and Difference of One")

The above time plot and ACF plot of the transformed data with a difference of one shows that the time series is now stationary since the time plot is roughly horizontal with constant variance, and the ACF plot looks like white noise. This is verified with the KPSS tests below, which shows a value of 1 for the number of differences needed.

turkish_log |> features(GDP, unitroot_ndiffs)
  1. Accommodation takings in the state of Tasmania from aus_accommodation.
tasmania_accomm = aus_accommodation |>
  filter(State == "Tasmania") |> select(Takings)

tasmania_accomm |> autoplot() + labs(title = "Time Plot of Tasmania Accommodation Takings")

BoxCoxTrans(tasmania_accomm$Takings)
## Box-Cox Transformation
## 
## 74 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   16.11   29.00   39.27   41.27   52.27   84.52 
## 
## Largest/Smallest: 5.25 
## Sample Skewness: 0.482 
## 
## Estimated Lambda: 0.3

Based on the Box-Cox transformation above, the lambda value of 0.3 (when rounded to 0.5) suggests a square root transformation is appropriate.

tasmania_sqrt = tasmania_accomm |> mutate(Takings = sqrt(Takings))

tasmania_sqrt |> autoplot() + 
  labs(title = "Time Plot of Tasmania Accommodation Takings with Square Root Transformation")

tasmania_sqrt |> ACF(lag_max = 20) |> autoplot() + 
  labs(title = "ACF Plot of Tasmania Accommodation Takings with Square Root Transformation")

Based on both the time plot and ACF plot above, the data appears to be seasonal, but non-stationary. As a result, a seasonal difference is looked at first.

tasmania_sqrt |>
  autoplot(difference(Takings, 4))  + 
  labs(title = "Time Plot of Tasmania Accommodation Takings with\nSquare Root Transformation and Seasonal Difference of 4")

tasmania_sqrt |>
  ACF(difference(Takings, 4))  |>
  autoplot() + 
  labs(title = "ACF Plot of Tasmania Accommodation Takings with\nSquare Root Transformation and Seasonal Difference of 4")

The above time plot and ACF plot of the transformed data with a seasonal difference of four shows that the time series is now stationary since the time plot is roughly horizontal with constant variance, and the ACF plot not having a slowly declining trend. This is verified with the KPSS tests below, which shows a value of 1 for the number of seasonal differences, and 0 for the number of differences needed after applying the seasonal differences.

tasmania_sqrt |> features(Takings, unitroot_nsdiffs)
tasmania_sqrt |> mutate(diff_takings = difference(Takings, 12)) |>
  features(diff_takings, unitroot_ndiffs)
  1. Monthly sales from souvenirs.
souvenirs |> autoplot() + labs(title = "Time Plot of Monthly Souvenirs")

BoxCoxTrans(souvenirs$Sales)
## Box-Cox Transformation
## 
## 84 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1665    5884    8772   14316   16889  104661 
## 
## Largest/Smallest: 62.9 
## Sample Skewness: 3.37 
## 
## Estimated Lambda: -0.2 
## With fudge factor, Lambda = 0 will be used for transformations

Based on the Box-Cox transformation above, the lambda value of -0.2 (rounded to 0) suggests a log transformation is appropriate.

souvenir_log = souvenirs |> mutate(Sales = log(Sales))

souvenir_log |> autoplot() + labs(title = "Time Plot of Monthly Souvenirs with Log Transformation")

souvenir_log |> ACF(lag_max = 36) |> 
  autoplot() + labs(title = "Time Plot of Monthly Souvenirs with Log Transformation")

Based on both the time plot and ACF plot above, the data appears to be seasonal, but non-stationary. As a result, a seasonal difference is looked at first.

souvenir_log |> autoplot(difference(Sales, 12)) +
  labs(title = "Time Plot of Monthly Souvenirs with Log Transformation and Seasonal Difference")

souvenir_log |> ACF(difference(Sales, 12)) |> autoplot() +
  labs(title = "ACF Plot of Monthly Souvenirs with Log Transformation and Seasonal Difference")

The above time plot and ACF plot of the transformed data with a seasonal difference of twelve shows that the time series is now stationary since the time plot is roughly horizontal with constant variance, and the ACF plot not having a slowly declining trend. This is verified with the KPSS tests below, which shows a value of 1 for the number of seasonal differences, and 0 for the number of differences needed after applying the seasonal differences.

souvenir_log |> features(Sales, unitroot_nsdiffs)
souvenir_log |> mutate(diff_sales = difference(Sales, 12)) |>
  features(diff_sales, unitroot_ndiffs)

9.5 For your retail data (from Exercise 7 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.

set.seed(8271)
retail_data <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

retail_data |> autoplot(Turnover) +
  labs(title = "Time Plot for Turnover of Household Goods Retailing in New South Wales")

BoxCoxTrans(retail_data$Turnover)
## Box-Cox Transformation
## 
## 441 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   211.3   424.6   639.2   743.5  1030.0  1887.0 
## 
## Largest/Smallest: 8.93 
## Sample Skewness: 0.603 
## 
## Estimated Lambda: 0.2

Based on the Box-Cox transformation above, the lamdba value of 0.2 (when rounded to 0) suggests a log transformation would be appropriate.

retail_log = retail_data |> mutate(Turnover = log(Turnover))
  
retail_log |>  
  autoplot(Turnover) +
  labs(title = "Time Plot for Turnover of Household Goods Retailing in\nNew South Wales with Log Transformation")

retail_log |> ACF(Turnover, lag_max = 36) |> autoplot() +
  labs(title = "ACF Plot for Turnover of Household Goods Retailing in\nNew South Wales with Log Transformation")

The time plot and ACF plot with the transformed data shows that the data is non-stationary. Since the data is seasonal, a seaonsal difference is looked at first.

retail_log |> autoplot(difference(Turnover, 12)) +
  labs(title = "Time Plot for Turnover of Household Goods Retailing in\nNew South Wales with Log Transformation and Seasonal Difference")

retail_log |> ACF(difference(Turnover, 12)) |> autoplot() +
  labs(title = "ACF Plot for Turnover of Household Goods Retailing in New South Wales\nwith Log Transformation and Seasonal Difference")

Looking at the time plot and ACF plot with the transformed data and a seasonal difference, the data appears to be stationary. This is verified with the KPSS tests below that shows no additional differences are needed after making the seasonal difference.

retail_log |> features(Turnover, unitroot_nsdiffs)
retail_log |> mutate(diff_turnover = difference(Turnover, 12)) |>
  features(diff_turnover, unitroot_ndiffs)

9.6 Simulate and plot some data from simple ARIMA models.

  1. Use the following R code to generate data from an AR(1) model with \(\phi_1=0.6\) and \(\sigma^2=1\). The process starts with \(y_1=0\).
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), phi_0.6 = y, index = idx)
  1. Produce a time plot for the series. How does the plot change as you change \(\phi_1\)?
y1 = numeric(100)
for(i in 2:100)
  y1[i] <- 0.8*y1[i-1] + e[i]
sim1 <- tsibble(idx = seq_len(100), phi_0.8 = y1, index = idx)

y2 = numeric(100)
for(i in 2:100)
  y2[i] <- 1*y2[i-1] + e[i]
sim2 <- tsibble(idx = seq_len(100), phi_1.0 = y2, index = idx)

y3 = numeric(100)
for(i in 2:100)
  y3[i] <- 0.4*y3[i-1] + e[i]
sim3 <- tsibble(idx = seq_len(100), phi_0.4 = y3, index = idx)

y4 = numeric(100)
for(i in 2:100)
  y4[i] <- 0.2*y4[i-1] + e[i]
sim4 <- tsibble(idx = seq_len(100), phi_0.2 = y4, index = idx)

y5 = numeric(100)
for(i in 2:100)
  y5[i] <- 0.0*y5[i-1] + e[i]
sim5 <- tsibble(idx = seq_len(100), phi_0.0 = y5, index = idx)

sim_full = sim |>
  full_join(sim1) |>
  full_join(sim2) |>
  full_join(sim3) |>
  full_join(sim4) |>
  full_join(sim5) |>
  pivot_longer(cols = starts_with("phi"),
               names_to = "phi",
               values_to = "value")

sim_full |> ggplot(aes(x = idx, y = value, color = phi, group = phi)) +
  geom_line() +
  labs(title = "Time Plot of AR(1) Model with Different Values of Phi",
       y = "y")

The plot above shows the time plots with different values of \(\phi_1\). When the value of \(\phi_1\) approaches 1, the values of the plot approaches a random walk, whereas when the value of \(\phi_1\) approaches 0, the plot approaches white noise.

  1. Write your own code to generate data from an MA(1) model with \(\theta_1=0.6\) and \(\sigma^2=1\).
y_ma <- numeric(100)
for(i in 2:100)
  y_ma[i] <- 0.6*e[i-1] + e[i]
sim_ma <- tsibble(idx = seq_len(100), theta_0.6 = y_ma, index = idx)
  1. Produce a time plot for the series. How does the plot change as you change \(\theta_1\)?
y_ma1 = numeric(100)
for(i in 2:100)
  y_ma1[i] <- 0.8*e[i-1] + e[i]
sim_ma1 <- tsibble(idx = seq_len(100), theta_0.8 = y_ma1, index = idx)

y_ma2 = numeric(100)
for(i in 2:100)
  y_ma2[i] <- 1.0*e[i-1] + e[i]
sim_ma2 <- tsibble(idx = seq_len(100), theta_1.0 = y_ma2, index = idx)

y_ma3 = numeric(100)
for(i in 2:100)
  y_ma3[i] <- 0.4*e[i-1] + e[i]
sim_ma3 <- tsibble(idx = seq_len(100), theta_0.4 = y_ma3, index = idx)

y_ma4 = numeric(100)
for(i in 2:100)
  y_ma4[i] <- 0.2*e[i-1] + e[i]
sim_ma4 <- tsibble(idx = seq_len(100), theta_0.2 = y_ma4, index = idx)

y_ma5 = numeric(100)
for(i in 2:100)
  y_ma5[i] <- 0.0*e[i-1] + e[i]
sim_ma5 <- tsibble(idx = seq_len(100), theta_0.0 = y_ma5, index = idx)

sim_ma_full = sim_ma |>
  full_join(sim_ma1) |>
  full_join(sim_ma2) |>
  full_join(sim_ma3) |>
  full_join(sim_ma4) |>
  full_join(sim_ma5) |>
  pivot_longer(cols = starts_with("theta"),
               names_to = "theta",
               values_to = "value")

sim_ma_full |> ggplot(aes(x = idx, y = value, color = theta, group = theta)) +
  geom_line() +
  labs(title = "Time Plot of MA(1) Model with Different Values of Theta",
       y = "y")

The plot above shows the time plots with different values of \(\theta_1\). When the value of \(\theta_1\) approaches 1, the peaks and valley within the time plot become more extreme, whereas when the value of \(\theta_1\) approaches 0, these peaks become less extreme.

  1. Generate data from an ARMA(1,1) model with \(\phi_1=0.6\), \(\theta_1=0.6\), and \(\sigma^2=1\).
y_arma <- numeric(100)
for(i in 2:100)
  y_arma[i] <- 0.6*y_arma[i-1] + 0.6*e[i-1] + e[i]
sim_arma <- tsibble(idx = seq_len(100), arma_1_1 = y_arma, index = idx)
  1. Generate data from an AR(2) model with \(\phi_1=-0.8\), \(\phi_2=0.3\) and \(\sigma^2=1\). (Note that these parameters will give a non-stationary series.)
y_ar2 = numeric(100)
for(i in 3:100)
  y_ar2[i] <- -0.8*y_ar2[i-1] + 0.3*y_ar2[i-2] + e[i]
sim_ar2 <- tsibble(idx = seq_len(100), ar_2 = y_ar2, index = idx)
  1. Graph the latter two series and compare them.
sim_arma_ar2 = sim_arma |>
  full_join(sim_ar2) |>
  pivot_longer(cols = starts_with("ar"),
               names_to = "model",
               values_to = "value")

sim_arma_ar2 |> ggplot(aes(x = idx, y = value, color = model, group = model)) +
  geom_line() +
  labs(title = "Time Plot of ARMA(1,1) and AR(2) Models",
       y = "y")

sim_arma |> autoplot() +
  labs(title = "Time Plot of ARMA(1,1) Model",
       y = "y")

Looking at the plot above, the ARMA(1,1) model appears to be relatively flat when plotted on the same plot of the AR(2) model, which appears to oscillate with increasing magnitude as time increases. Looking at the ARMA(1,1) on its own plot, the data looks like white noise around the mean of 0.

9.7 Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.

  1. Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods.
aus_airpassengers |> autoplot() +
  labs(title = "Time Plot of Number of Passengers from Australian Air Carriers")

Looking at the time plot above, there does not appear to be any change in variance, so a Box-Cox transformation is not required. However, the data is non-stationary since there is an increasing trend in the data. Therefore, differencing is looked at below:

aus_airpassengers |> features(Passengers, unitroot_ndiffs)
aus_airpassengers |> gg_tsdisplay(difference(difference(Passengers)), plot_type = "partial") +
  labs(title = "Plots with Difference of Two")

Using the KPSS test above, the number of recommended differences needed to get a stationary dataset is 2. This was verified with the time plot, ACF plot, PACF plot above, where the data looks like white noise.

Looking at the ACF plot above, the last significant peak occurs at a lag of 1. This would suggest using an ARIMA(0,2,1) model. Looking at the PACF plot above, the last significant peak occurs at a lag of 14. However, this peak barely surpasses the significance level and would result in several additional parameters in the model. As a result, the next earlier peak is used, which occurs at a lag of 4. This would suggest using an ARIMA(4,2,0) model. These are both modeled below, along with the model that the ARIMA() function automatically picks.

airpass_models = aus_airpassengers |> 
  model(arima021 = ARIMA(Passengers ~ pdq(0,2,1)),
        arima420 = ARIMA(Passengers ~ pdq(4,2,0)),
        stepwise = ARIMA(Passengers),
        search = ARIMA(Passengers, stepwise = FALSE))

airpass_models |> pivot_longer(arima021:search, names_to = "Model", values_to = "Order")
glance(airpass_models) |> arrange(AICc) |> select(.model:BIC)

Looking at the results of the models above, ARIMA() automatically selected an ARIMA(0,2,1) model, using both the stepwise and search methods. This is the same as the model selected by hand. Using the AICc values for comparison, the ARIMA(0,2,1) model performs better than the ARIMA(4,2,0) model. Therefore, the ARIMA(0,2,1) model is selected.

airpass_models |>
  select(arima021) |>
  gg_tsresiduals()

augment(airpass_models) |>
  filter(.model=='arima021') |>
  features(.innov, ljung_box, lag = 10, dof = 1)

The residual plots above, as well as the Portmanteau test, shows that the residuals of the ARIMA(0,2,1) model behave like white noise, and thus is a good model. Below is the forecasted data using the ARIMA(0,2,1) model.

airpass_models |>
  select(arima021) |>
  forecast(h=10) |>
  autoplot(aus_airpassengers) +
  labs(title = "Time Plot of Number of Passengers from Australian Air Carriers with\n10 Year Forecast Using ARIMA(0,2,1) Model")

  1. Write the model in terms of the backshift operator.
report(airpass_models |> select(arima021))
## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8963
## s.e.   0.0594
## 
## sigma^2 estimated as 4.308:  log likelihood=-97.02
## AIC=198.04   AICc=198.32   BIC=201.65

\[ (1-B)^2y_t = \theta(B)\epsilon_t \]

\[ where: \theta(B) = 1 + \theta_1B \]

\[ where: \theta_1 = -0.90 \]

  1. Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a.
arima_010 = aus_airpassengers |> 
  model(ARIMA(Passengers ~ 1 + pdq(0,1,0)))

report(arima_010)
## Series: Passengers 
## Model: ARIMA(0,1,0) w/ drift 
## 
## Coefficients:
##       constant
##         1.4191
## s.e.    0.3014
## 
## sigma^2 estimated as 4.271:  log likelihood=-98.16
## AIC=200.31   AICc=200.59   BIC=203.97
glance(arima_010) |> select(.model:BIC)
arima_010 |>
  forecast(h=10) |>
  autoplot(aus_airpassengers) +
  labs(title = "Time Plot of Number of Passengers from Australian Air Carriers with\n10 Year Forecast Using ARIMA(0,1,0) with Drift Model")

Looking at the forecast above using an ARIMA(0,1,0) with drift model, the forecast appears to have a slightly lower slope with narrower intervals. These intervals are also not linearly, as they curving towards the point forecast, and thus not increasing linearly. This also appears to have a slightly worse AICc value of 200.59 compared to the 198.32 from the ARIMA(0,2,1) model. However, these cannot be directly compared since the ARIMA(0,1,0) model having a difference of 1, compared to the ARIMA(0,2,1) model having a differncing of 2. Therefore, we cannot confidently say that the ARIMA(0,1,0) with drift model is worse than the ARIMA(0,2,1) model.

  1. Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens.
arima_212_c = aus_airpassengers |> 
  model(ARIMA(Passengers ~ 1 + pdq(2,1,2)))

report(arima_212_c)
## Series: Passengers 
## Model: ARIMA(2,1,2) w/ drift 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2  constant
##       -0.5518  -0.7327  0.5895  1.0000    3.2216
## s.e.   0.1684   0.1224  0.0916  0.1008    0.7224
## 
## sigma^2 estimated as 4.031:  log likelihood=-96.23
## AIC=204.46   AICc=206.61   BIC=215.43
glance(arima_212_c) |> select(.model:BIC)
arima_212_c |>
  forecast(h=10) |>
  autoplot(aus_airpassengers) +
  labs(title = "Time Plot of Number of Passengers from Australian Air Carriers with\n10 Year Forecast Using ARIMA(2,1,2) with Drift Model")

Looking at the forecast above using an ARIMA(2,1,2) with drift model, the forecast appears to have a slope similar to the ARIMA(0,1,0) with drift model, which is less than the ARIMA(0,2,1) model. The prediction interval appears to be similar to the ARIMA(0,1,0) with drift model, which is less than the ARIMA(2,1,2) model. However, unlike either of the previous 2 models, the point forecast and prediction intervals are not straight, but are somewhat wavy. Finally, the AICc value for the ARIMA(2,1,2) with drift model is worse than both of the previous models. We can confidently say this is worse than the ARIMA(0,1,0) model since the level of differencing is the same, but cannot say the same when comparing to the ARIMA(0,2,1) model since the level of differencing is different.

arima_212_nc = aus_airpassengers |> 
  model(ARIMA(Passengers ~ 0 + pdq(2,1,2)))
report(arima_212_nc)
## Series: Passengers 
## Model: NULL model 
## NULL model

When removing the constant from the ARIMA(2,1,2) with drift model, the model fails. This is likely because the data is no longer stationary.

  1. Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens?
arima_021_c = aus_airpassengers |> 
  model(ARIMA(Passengers ~ 1 + pdq(0,2,1)))

report(arima_021_c)
## Series: Passengers 
## Model: ARIMA(0,2,1) w/ poly 
## 
## Coefficients:
##           ma1  constant
##       -1.0000    0.0571
## s.e.   0.0585    0.0213
## 
## sigma^2 estimated as 3.855:  log likelihood=-95.1
## AIC=196.21   AICc=196.79   BIC=201.63
glance(arima_021_c) |> select(.model:BIC)
arima_021_c |>
  forecast(h=10) |>
  autoplot(aus_airpassengers) +
  labs(title = "Time Plot of Number of Passengers from Australian Air Carriers with\n10 Year Forecast Using ARIMA(0,2,1) with Drift Model")

Looking at the forecast above using an ARIMA(0,2,1) with drift model, the forecast appears to have a slightly higher slope with narrower intervals. These intervals are also not linearly, as they curving towards the point forecast, and thus not increasing linearly. This also appears to have a slightly better AICc value of 196.79 compared to the 198.32 from the ARIMA(0,2,1) model. This suggests the model with drift is slightly better.

9.8 For the United States GDP series (from global_economy):

  1. if necessary, find a suitable Box-Cox transformation for the data;
us_gdp = global_economy |> filter(Country == "United States") |> select(GDP)

us_gdp |> autoplot() + labs(title = "Time Plot of US GDP")

BoxCoxTrans(us_gdp$GDP)
## Box-Cox Transformation
## 
## 58 data points used to estimate Lambda
## 
## Input data summary:
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 5.433e+11 1.584e+12 5.455e+12 6.992e+12 1.138e+13 1.939e+13 
## 
## Largest/Smallest: 35.7 
## Sample Skewness: 0.586 
## 
## Estimated Lambda: 0.2

Based on the above calculation for the Box-Cox transformation, the lambda value of 0.2 (when rounded to 0) suggests a log transformation is appropriate.

  1. fit a suitable ARIMA model to the transformed data using ARIMA();
us_arima = us_gdp |>
  model(ARIMA(log(GDP)))

report(us_arima)
## Series: GDP 
## Model: ARIMA(0,2,1) 
## Transformation: log(GDP) 
## 
## Coefficients:
##           ma1
##       -0.6353
## s.e.   0.1138
## 
## sigma^2 estimated as 0.0004278:  log likelihood=139.76
## AIC=-275.53   AICc=-275.3   BIC=-271.48

Based on the model above, ARIMA() picked an ARIMA(0,2,1) model. The associated AICc for the model is -275.3.

  1. try some other plausible models by experimenting with the orders chosen;
us_gdp |> gg_tsdisplay(log(GDP), plot_type = "partial")

The plots above show that the US GDP data is non-stationary. As a result, differencing is looked at below.

us_gdp |> gg_tsdisplay(difference(log(GDP)), plot_type = "partial")

us_gdp |> gg_tsdisplay(difference(difference(log(GDP))), plot_type = "partial")

us_gdp |> features(log(GDP), unitroot_ndiffs)

Based on the plots above, as well as the KPSS test, a difference of 2 makes the data stationary. Thus, this component will not be modified in the following experiments.

us_arima_models = us_gdp |>
  model(arima_022 = ARIMA(log(GDP) ~ pdq(0,2,2)),
        arima_021 = ARIMA(log(GDP) ~ pdq(0,2,1)),
        arima_023 = ARIMA(log(GDP) ~ pdq(0,2,3)),
        arima_122 = ARIMA(log(GDP) ~ pdq(1,2,2)),
        arima_222 = ARIMA(log(GDP) ~ pdq(2,2,2)),
        arima_121 = ARIMA(log(GDP) ~ pdq(1,2,1)),
        arima_221 = ARIMA(log(GDP) ~ pdq(2,2,1)),
        arima_123 = ARIMA(log(GDP) ~ pdq(1,2,3)),
        arima_223 = ARIMA(log(GDP) ~ pdq(2,2,3)))

glance(us_arima_models) |> arrange(AICc) |> select(.model:BIC)
  1. choose what you think is the best model and check the residual diagnostics;

Based on the above models, the model that produced the best AICc value was the ARIMA(0,2,1) model, which is what ARIMA() previously predicted. The residuals for the ARIMA(0,2,1) model is checked below.

us_arima_models |> select(arima_021) |> gg_tsresiduals()

Based on the plots above, the residuals appear to behave similarly to white noise.

  1. produce forecasts of your fitted model. Do the forecasts look reasonable?
us_arima_models |>
  select(arima_021) |>
  forecast(h = 10) |>
  autoplot(us_gdp) +
  labs(title = "Time Plot of US GDP with 10 Year Forecast using an ARIMA(0,2,1) Model")

A 10-year forecast using the ARIMA(0,2,1) model for the US GDP data is above. This appears to be a reasonable forecast, as the point forecast appears to follow an exponentially increasing trend, similar to the previous data.

  1. compare the results with what you would obtain using ETS() (with no transformation).
us_gdp_ets = us_gdp |>
  model(ETS(GDP))

us_gdp |>
  slice(-n()) |>
  stretch_tsibble(.init = 10) |>
  model(arima = ARIMA(log(GDP)),
        ets = ETS(GDP)) |>
  forecast(h = 1) |>
  accuracy(us_gdp) |>
  select(.model, RMSE:RMSSE)
us_gdp_ets |>
  forecast(h = 10) |>
  autoplot(us_gdp) +
  labs(title = "Time Plot of US GDP with 10 Year Forecast using an ETS Model")

The ETS model above appears to have performed better than the ARIMA(0,2,1) model, based on the RMSE value being lower for the ETS model. Looking at the 10-year forecast above, this does appear to be a better forecast, as the ETS model has better captured the later trend of the plot, rather than continuing an exponential growth trend.