# Load libraries
library(fpp3)
library(dplyr)
library(imputeTS)
library(fable)
library(tibble)
library(kableExtra)
library(ggplot2)
library(scales)
library(gridExtra)


Instructions

Do the exercises 9.1, 9.2, 9.3, 9.5, 9.6, 9.7, 9.8 in Hyndman. Please submit both the Rpubs link as well as your .rmd file.



Exercise 9.1

Figure 9.32 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers. Explain the differences among these figures. Do they all indicate that the data are white noise?

Yes. All three figures indicate white noise as they have no significant auto correlations in the lags (as shown by the lack of any lags exceeding the confidence interval). Also, the autocorrelation lags have a constant mean fluctuating around zero and constant variance hovering around zero. Smaller sample sizes (n = 36) are likely to show random spikes in autocorrelation whereas large samples (n = 1000) are better able to estimate the autocorrelation - which converges on zero for white noise.

Why are the critical values at different distances from the mean of zero? Why are the autocorrelations different in each figure when they each refer to white noise?

The difference is caused by the difference between n = 36, n = 360, and n = 1000 observations in the data. The 95% confidence interval is calculated by \(\pm \frac{1.96}{\sqrt{N}}\), As n increases, the critical values decrease - getting closer to zero. The more observations, the more the sample estimate converges to its true value - which, again, is zero for white noise.



Exercise 9.2

A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.

In the original untransformed/not-differenced stock data, the ACF plot shows high autocorrelations across multiple lags that decay slowly; this indicates non-stationarity and the presence of a trend in the data. The trend is visible in the plot for the original stock data as well. For a stationary series, the ACF should decay quickly; the persistent correlations suggest the autocorrelation continue over time.

The PACF plot has a significant spike at lag 1, suggesting strong autocorrelation with the immediate past value but no substantial autocorrelations at higher lags. A slowly decaying ACF and a sharp drop in the PACF after lag 1 is indicative of a series that needs first-order differencing to become stationary. Differencing the series would likely remove the trend and stabilize the mean.

The differenced plot indicates that the trend has been removed and stabilized the mean. The ACF shows no significant correlations at any lag, with values mostly falling within the confidence intervals, The PACF does not exhibit any prominent spikes indicating no strong autocorrelation in the differenced series.

# Load the dataset
data("gafa_stock")

# Filter Amazon stock
amazon_stock <- gafa_stock %>%
  filter(Symbol == "AMZN") %>%
  select(Date, Close)

# Create a 'Trading_Day'
amazon_stock <- amazon_stock %>%
  mutate(Trading_Day = row_number())

# Convert to a tsibble using 'Trading_Day' as the index
amazon_stock_ts <- amazon_stock %>%
  as_tsibble(index = Trading_Day)

# Plot the time series original data
amazon_stock_ts %>%
  gg_tsdisplay(Close, plot_type = "partial")+
  ggtitle("Amazon Stock - Original Data: Close Price vs Trading Day")

# Plot the time series differenced data
amazon_stock_ts %>%
  gg_tsdisplay(difference(Close), plot_type = "partial") +
  ggtitle("Amazon Stock - Differenced Data: Differenced Close Price vs Trading Day")



Exercise 9.3

For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.

- Turkish GDP from global_economy

Here, we perform a box-cox transformation to stabilize the variance. We then perform a first order difference to remove the trend and stabilize the mean. The KPSS pvalue = 0.1 indicates that we fail to reject the null hypothesis that the data is stationary (ie, the data is stationary). A box-cox transformed, first order difference of the Turkey GDP should be suitable for modeling.

# Filter the global_economy for Turkey
turkey_gdp <- global_economy %>%
  filter(Country == "Turkey") %>%
  select(Year, GDP) %>%
  mutate(GDP_billions = GDP / 1e9)

# Apply Box-Cox transformation (finding the optimal lambda)
lambda <- turkey_gdp %>%
  features(GDP_billions, features = guerrero) %>%
  pull(lambda_guerrero)

turkey_gdp <- turkey_gdp %>%
  mutate(BoxCox_GDP = box_cox(GDP_billions, lambda))

# Perform first-order differencing on the Box-Cox transformed GDP
turkey_gdp <- turkey_gdp %>%
  mutate(Diff_BoxCox_GDP = difference(BoxCox_GDP, lag = 1))

# Plot original GDP, Box-Cox transformed GDP, and first-order differenced Box-Cox GDP
p1 <- autoplot(turkey_gdp, GDP_billions) +
  labs(title = "Turkish GDP Over Time (in Billions)",
       y = "GDP (Billions)",
       x = "Year") +
  scale_y_continuous(labels = label_comma(scale = 1)) +  
  theme_minimal() +
  theme(axis.text.y = element_text(size = 6))

p2 <- autoplot(turkey_gdp, BoxCox_GDP) +
  labs(title = paste("Box-Cox Transformed Turkish GDP (lambda =", round(lambda, 3), ")"),
       y = "BxCX GDP",
       x = "Year") +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 6))

p3 <- autoplot(turkey_gdp, Diff_BoxCox_GDP, na.rm = TRUE) +
  labs(title = "Differenced Box-Cox Transformed Turkish GDP",
       y = "Diff BxCx GDP",
       x = "Year") +
  theme_minimal() +
  theme(axis.text.y = element_text(size = 6))

# Print all plots
grid.arrange(p1, p2, p3, nrow = 3)

# Perform the KPSS test
turkey_gdp |> features(Diff_BoxCox_GDP, unitroot_kpss)
## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1    0.0889         0.1


- Accommodation takings in the state of Tasmania from aus_accommodation.

Here, we perform a box-cox transformation to stabilize the variance. We then perform a first order difference to remove the trend and stabilize the mean. The KPSS pvalue = 0.1 indicates that we fail to reject the null hypothesis that the data is stationary (ie, the data is stationary). A box-cox transformed, first order difference of the Accommodation Takings should be suitable for modeling.

# Filter the aus_accommodation dataset
tas_accommodation <- aus_accommodation %>%
  filter(State == "Tasmania") %>%
  select(Date, Takings)

# Apply Box-Cox transformation
lambda <- tas_accommodation %>%
  features(Takings, features = guerrero) %>%
  pull(lambda_guerrero)

tas_accommodation <- tas_accommodation %>%
  mutate(BoxCox_Takings = box_cox(Takings, lambda))

# Perform first-order differencing on the Box-Cox transformed Takings
tas_accommodation <- tas_accommodation %>%
  mutate(Diff_BoxCox_Takings = difference(BoxCox_Takings, lag = 1))

# Plot original Takings
p1 <- autoplot(tas_accommodation, Takings) +
  labs(title = "Accommodation Takings in Tasmania (Millions)",
       y = "Takings (Millions)",
       x = "Quarter") +
  scale_y_continuous(labels = label_comma(scale = 1)) +  
  theme_minimal() +
  theme(axis.text.y = element_text(size = 6))

# Plot Box-Cox transformed Takings
p2 <- autoplot(tas_accommodation, BoxCox_Takings) +
  labs(title = paste("Box-Cox Transformed Accommodation Takings (lambda =", round(lambda, 3), ")"),
       y = "BxCx Takings",
       x = "Quarter") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

# Plot first-order differenced Box-Cox Takings
p3 <- autoplot(tas_accommodation, Diff_BoxCox_Takings, na.rm = TRUE) +
  labs(title = "Differenced Box-Cox Transformed Accommodation Takings",
       y = "Diff BxCx Takings",
       x = "Quarter") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

# Print all plots
grid.arrange(p1, p2, p3, nrow = 3)

# Perform kpss test
tas_accommodation |> features(Diff_BoxCox_Takings, unitroot_kpss)
## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1     0.258         0.1


- Monthly sales from souvenirs.

Here, we perform a box-cox transformation to stabilize the variance. We then perform a first order difference to remove the trend and stabilize the mean. The KPSS pvalue = 0.1 indicates that we fail to reject the null hypothesis that the data is stationary (ie, the data is stationary). A box-cox transformed, first order difference of the monthly souvenir sales should be suitable for modeling.

# Load the souvenirs dataset
souvenirs_sales <- souvenirs %>%
  select(Month, Sales)

# Apply Box-Cox transformation
lambda <- souvenirs_sales %>%
  features(Sales, features = guerrero) %>%
  pull(lambda_guerrero)

souvenirs_sales <- souvenirs_sales %>%
  mutate(BoxCox_Sales = box_cox(Sales, lambda))

# Perform first-order differencing on the Box-Cox transformed Sales
souvenirs_sales <- souvenirs_sales %>%
  mutate(Diff_BoxCox_Sales = difference(BoxCox_Sales, lag = 1))

# Plot original Sales
p1 <- autoplot(souvenirs_sales, Sales) +
  labs(title = "Monthly Sales of Souvenirs",
       y = "Sales (Millions)",
       x = "Month") +
  scale_y_continuous(labels = label_comma(scale = 1)) +  
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

# Plot Box-Cox transformed Sales
p2 <- autoplot(souvenirs_sales, BoxCox_Sales) +
  labs(title = paste("Box-Cox Transformed Souvenir Sales (lambda =", round(lambda, 3), ")"),
       y = "BxCx Sales",
       x = "Month") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

# Plotfirst-order differenced Box-Cox Sales
p3 <- autoplot(souvenirs_sales, Diff_BoxCox_Sales, na.rm = TRUE) +
  labs(title = "Differenced Box-Cox Transformed Souvenir Sales",
       y = "Diff BxCx Sales",
       x = "Month") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

# Print all plots
grid.arrange(p1, p2, p3, nrow = 3)

# Perform the KPSS test
souvenirs_sales |> features(Diff_BoxCox_Sales, unitroot_kpss)
## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1    0.0631         0.1



Exercise 9.5

For your retail data (from Exercise 7 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.

Here, we perform a box-cox transformation to stabilize the variance. We then perform a first order difference to remove the trend and stabilize the mean. The KPSS pvalue = 0.1 indicates that we fail to reject the null hypothesis that the data is stationary (ie, the data is stationary). A box-cox transformed, first order difference of myseries should be suitable for modeling.

# Set a seed and filter the aus_retail dataset for a random Series ID
set.seed(123456789)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))

# Apply Box-Cox transformation
lambda <- myseries %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

myseries <- myseries %>%
  mutate(BoxCox_Turnover = box_cox(Turnover, lambda))

# Perform first-order differencing on the Box-Cox transformed Turnover
myseries <- myseries %>%
  mutate(Diff_BoxCox_Turnover = difference(BoxCox_Turnover, lag = 1))

# Plot original Turnover
p1 <- autoplot(myseries, Turnover) +
  labs(title = "Original Time Series of Turnover",
       y = "Turnover",
       x = "Time") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

# Plot Box-Cox transformed Turnover
p2 <- autoplot(myseries, BoxCox_Turnover) +
  labs(title = paste("Box-Cox Transformed Turnover (lambda =", round(lambda, 3), ")"),
       y = "BxCx Turnover",
       x = "Time") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

# Plot first-order differenced Box-Cox Turnover
p3 <- autoplot(myseries, Diff_BoxCox_Turnover, na.rm = TRUE) +
  labs(title = "Differenced Box-Cox Transformed Turnover",
       y = "Diff BxCx Turnover",
       x = "Time") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

# Print all plots
grid.arrange(p1, p2, p3, nrow = 3)

# Perform the KPSS test
myseries |> features(Diff_BoxCox_Turnover, unitroot_kpss)
## # A tibble: 1 × 4
##   State    Industry                                        kpss_stat kpss_pvalue
##   <chr>    <chr>                                               <dbl>       <dbl>
## 1 Tasmania Hardware, building and garden supplies retaili…    0.0159         0.1



Exercise 9.6

Simulate and plot some data from simple ARIMA models. Use the following R code to generate data from an AR(1) model with ϕ1=0.6 and σ2=1. The process starts with y1=0.

Here, we perform a box-cox transformation to stabilize the variance. We then perform a first order difference to remove the trend and stabilize the mean. The KPSS pvalue = 0.1 indicates that we fail to reject the null hypothesis that the data is stationary (ie, the data is stationary). A box-cox transformed, first order difference of the simulated data should be suitable for modeling.

We then check the ACF and PACF of the differenced and box-cox transformed data. We can identify a possible ARIMA(1, 1, 0) or ARIMA(0, 1, 1) model. After performing the stepwise and general search, fable identified an ARIMA(1,1,1) as the best fit to the data according to the AICc(lowest value)

# Simulate the time series
set.seed(123456789)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6 * y[i - 1] + e[i]

sim <- tsibble(idx = seq_len(100), y = y, index = idx)

# Plot the original series
autoplot(sim) +
  labs(title = "Original Simulated Time Series",
       y = "y",
       x = "Index") +
  theme_minimal()

# Apply Box-Cox transformation
lambda <- sim %>%
  features(y, features = guerrero) %>%
  pull(lambda_guerrero)

sim <- sim %>%
  mutate(BoxCox_y = box_cox(y, lambda))

# Perform first-order differencing on the Box-Cox transformed series
sim <- sim %>%
  mutate(Diff_BoxCox_y = difference(BoxCox_y, lag = 1))

# Plot original series, Box-Cox transformed series, and first-order differenced series
p1 <- autoplot(sim, y) +
  labs(title = "Original Simulated Time Series",
       y = "y",
       x = "Index") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

p2 <- autoplot(sim, BoxCox_y) +
  labs(title = paste("Box-Cox Transformed Time Series (lambda =", round(lambda, 3), ")"),
       y = "BxCx y",
       x = "Index") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

p3 <- autoplot(sim, Diff_BoxCox_y, na.rm = TRUE) +
  labs(title = "Differenced Box-Cox Transformed Time Series",
       y = "Diff BxCx y",
       x = "Index") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

# Print all plots
grid.arrange(p1, p2, p3, nrow = 3)

# Perform the KPSS test
sim|> features(Diff_BoxCox_y, unitroot_kpss)
## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1    0.0197         0.1
sim |> select(BoxCox_y) |>
gg_tsdisplay(difference(BoxCox_y), plot_type = "partial")

# Fit ARIMA models to the Box-Cox transformed series
sim_fit <- sim |>
  model(
    arima110 = ARIMA(BoxCox_y ~ pdq(1, 1, 0)),
    arima011 = ARIMA(BoxCox_y ~ pdq(0, 1, 1)),
    stepwise = ARIMA(BoxCox_y),                       
    search = ARIMA(BoxCox_y, stepwise = FALSE))

# Display model summary
sim_fit %>%
  kable("html", caption = "ARIMA Model Candidates for Box-Cox Transformed Time Series") %>%
  kable_styling(full_width = TRUE, bootstrap_options = c("striped", "hover", "responsive"))
ARIMA Model Candidates for Box-Cox Transformed Time Series
arima110 arima011 stepwise search
<ARIMA(1,1,0)> <ARIMA(0,1,1)> <ARIMA(1,1,1)> <ARIMA(1,1,1)>
# Create a summary table of ARIMA model candidates
sim_fit_summary <- glance(sim_fit)

# Display model summary
sim_fit_summary %>%
  kable("html", caption = "ARIMA Model Candidates for Box-Cox Transformed Time Series") %>%
  kable_styling(full_width = TRUE, bootstrap_options = c("striped", "hover", "responsive"))
ARIMA Model Candidates for Box-Cox Transformed Time Series
.model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
arima110 1.597773 -163.2170 330.4340 330.5590 335.6243 -3.292111+0i
arima011 1.521474 -160.9194 325.8388 325.9638 331.0291 1.849484+0i
stepwise 1.313953 -153.6971 313.3943 313.6469 321.1796 2.47651+0i 1.059447+0i
search 1.313953 -153.6971 313.3943 313.6469 321.1796 2.47651+0i 1.059447+0i



Exercise 9.7

Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.

  • Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods.
  • Write the model in terms of the backshift operator.
  • Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a.
  • Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens.
  • Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens?

Here we apply a box cox transformation and first order difference to achieve stationarity - as confirmed by the KPSS pvalue = 0.1. Of the 3 ARIMAS models, ARIMA(0,1,2) with Drift is the best fitting model. ARIMA(2,1,2) could not be fit and could not be forecast. The residuals for the ARIMA(0,1,2) and ARIMA(0,1,0) indicate that these are whites noise and suitable for modeling.

Another observation is as you moved away from the random walk drift of the (0,1,0), the prediction intervals narrowed.

# Load the aus_airpassengers dataset
passengers <- aus_airpassengers

# autoplot of aus_airpassengers
autoplot(passengers) +
  ggtitle("Australian Air Passengers") +
  xlab("Year") +
  ylab("Passengers (in thousands)") +
  theme_minimal()

# Box-Cox transformation lambda
lambda <- passengers %>%
  features(Passengers, features = guerrero) %>%
  pull(lambda_guerrero)

# Apply Box-Cox transformation using the lambda value
passengers_transformed <- passengers |> 
  mutate(Passengers_boxcox = box_cox(Passengers, lambda))

# Plot the Box-Cox transformed data
autoplot(passengers_transformed, Passengers_boxcox) +
  ggtitle("Australian Air Passengers - Box-Cox Transformed") +
  xlab("Year") +
  ylab("Transformed Passengers") +
  theme_minimal()

# Differencing the Box-Cox transformed data
passengers_differenced <- passengers_transformed |> 
  mutate(Passengers_diff = difference(Passengers_boxcox))

# Plot the differenced series
passengers_differenced |> 
  gg_tsdisplay(Passengers_diff, plot_type = "partial") +
  ggtitle("Differenced Box-Cox Transformed Australian Air Passengers")

# Perform KPSS test on the differenced series
passengers_differenced |> features(Passengers_diff, unitroot_kpss)
## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1     0.112         0.1
# Fit ARIMA(0,1,0), ARIMA(2,1,2), ARIMA(0,1,2)
passengers_fit <- passengers_transformed |> 
  model(
    arima010 = ARIMA(Passengers_boxcox ~ pdq(0, 1, 0)),
    arima212 = ARIMA(Passengers_boxcox ~ pdq(2,1,2)),
    arima012 = ARIMA(Passengers_boxcox ~ pdq(0,1,2)))

# ARIMA models reports 
report(passengers_fit$arima010[[1]])
## Series: Passengers_boxcox 
## Model: ARIMA(0,1,0) w/ drift 
## 
## Coefficients:
##       constant
##         0.0289
## s.e.    0.0063
## 
## sigma^2 estimated as 0.001837:  log likelihood=80.12
## AIC=-156.25   AICc=-155.97   BIC=-152.59
# Add a blank line
cat("\n\n")
report(passengers_fit$arima212[[1]])
## Series: Passengers_boxcox 
## Model: NULL model 
## NULL model
# Add a blank line
cat("\n\n")
report(passengers_fit$arima021[[1]])
## NULL model
# Check model metrics
passengers_fit |> report()
## # A tibble: 2 × 8
##   .model    sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 arima010 0.00184    80.1 -156. -156. -153. <cpl [0]> <cpl [0]>
## 2 arima012 0.00180    81.6 -155. -154. -148. <cpl [0]> <cpl [2]>
# Check model residuals
passengers_fit |>
  select(arima010)|>
  gg_tsresiduals()+
  ggtitle("ARIMA(0,1,0) Residuals")

# Check model residuals
passengers_fit |>
  select(arima012)|>
  gg_tsresiduals()+
  ggtitle("ARIMA(0,1,2) Residuals")

# Generate 10-year forecasts
passengers_forecast <- passengers_fit |> 
  forecast(h = "10 years")

# Plot the forecast for ARIMA(0,1,0)
p1 <- autoplot(passengers_transformed, Passengers_boxcox) +
  autolayer(passengers_forecast %>% filter(.model == "arima010"), series = "ARIMA(0,1,0)", PI = FALSE) +
  ggtitle("ARIMA(0,1,0) Model Forecast") +
  xlab("Year") +
  ylab("BxCx Passengers") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

# Plot the forecast for ARIMA(2,1,2)
p2 <- autoplot(passengers_transformed, Passengers_boxcox) +
  autolayer(passengers_forecast %>% filter(.model == "arima212"), series = "ARIMA(2,1,2)", PI = FALSE) +
  ggtitle("ARIMA(2,1,2) Model Forecast") +
  xlab("Year") +
  ylab("BxCx Passengers") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

# Plot the forecast for ARIMA(0,1,2)
p3 <- autoplot(passengers_transformed, Passengers_boxcox) +
  autolayer(passengers_forecast %>% filter(.model == "arima012"), series = "ARIMA(0,1,2)", PI = FALSE) +
  ggtitle("ARIMA(0,1,2) Model Forecast") +
  xlab("Year") +
  ylab("BxCx Passengers") +
  theme_minimal()+
  theme(axis.text.y = element_text(size = 6))

grid.arrange(p1, p2, p3, nrow = 3)



Exercise 9.8

For the United States GDP series (from global_economy):

  • if necessary, find a suitable Box-Cox transformation for the data;
  • fit a suitable ARIMA model to the transformed data using ARIMA();
  • try some other plausible models by experimenting with the orders chosen;
  • choose what you think is the best model and check the residual diagnostics;
  • produce forecasts of your fitted model. Do the forecasts look reasonable?
  • compare the results with what you would obtain using ETS() (with no transformation).

We perform the standard box cox transformation and differencing to achieve stationarity. The ACF & PACF allow us to identify an ARIMA110 and an ARIMA011 as possible candidates. Using Fables stepwise and general search, we confirmed the ARIMA110 to be the best fitting model. We use this ARIMA model to forecast.

The residuals appear as white noise with no autocorreation and normally distributed; the ljung box test indicates that this is white noise with a high pvalue = 0.98.

The ETS model forecast performs similar to the ARIMA. The key distinction is that the confidence interval are much wider and grow wider over time.

# Filter the global_economy
us_gdp <- global_economy %>%
  filter(Country == "United States") %>%
  select(Year, GDP) %>%
  mutate(GDP_billions = GDP / 1e9)  # Convert GDP to billions

# Apply Box-Cox transformation (finding the optimal lambda)
lambda_us <- us_gdp %>%
  features(GDP_billions, features = guerrero) %>%
  pull(lambda_guerrero)

us_gdp <- us_gdp %>%
  mutate(BoxCox_GDP = box_cox(GDP_billions, lambda_us))

# Perform first-order differencing on the Box-Cox transformed GDP
us_gdp <- us_gdp %>%
  mutate(Diff_BoxCox_GDP = difference(BoxCox_GDP, lag = 1))

# Plot original GDP
p1 <- autoplot(us_gdp, GDP_billions) +
  labs(title = "US GDP Over Time (in Billions)",
       y = "GDP (Billions)",
       x = "Year") +
  scale_y_continuous(labels = label_comma(scale = 1)) +  
  theme_minimal()

# Plot Box-Cox transformed GDP
p2 <- autoplot(us_gdp, BoxCox_GDP) +
  labs(title = paste("Box-Cox Transformed US GDP (lambda =", round(lambda_us, 3), ")"),
       y = "Transformed GDP",
       x = "Year") +
  theme_minimal()

# Plot first-order differenced Box-Cox GDP
p3 <- autoplot(us_gdp, Diff_BoxCox_GDP, na.rm = TRUE) +
  labs(title = "Differenced Box-Cox Transformed US GDP",
       y = "First-Order Differenced Transformed GDP",
       x = "Year") +
  theme_minimal()

# Print all plots
p1

grid.arrange(p2, p3, nrow = 2)

# Perform KPSS test on the differenced Box-Cox transformed GDP
us_gdp %>% features(Diff_BoxCox_GDP, unitroot_kpss)
## # A tibble: 1 × 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1     0.208         0.1
# Display ACF and PACF of the Box-Cox transformed data
us_gdp %>%
  gg_tsdisplay(difference(BoxCox_GDP), plot_type = "partial") +
  labs(title = "ACF and PACF of Box-Cox Transformed US GDP")

us_gdp_fit <- us_gdp |> 
  model(
    arima110 = ARIMA(BoxCox_GDP ~ pdq(1, 1,0)),
    arima011 = ARIMA(BoxCox_GDP ~ pdq(0, 1,1)),
    stepwise = ARIMA(BoxCox_GDP),
    search = ARIMA(BoxCox_GDP, stepwise = FALSE))

# Create table
us_gdp_fit %>%
  kable("html", caption = "ARIMA Model Candidates") %>%
  kable_styling(full_width = TRUE, bootstrap_options = c("striped", "hover", "responsive"))
ARIMA Model Candidates
arima110 arima011 stepwise search
<ARIMA(1,1,0) w/ drift> <ARIMA(0,1,1) w/ drift> <ARIMA(1,1,0) w/ drift> <ARIMA(1,1,0) w/ drift>
# Convert the model summary to a table
us_gdp_fit_summary <- glance(us_gdp_fit) %>%
  select(-ar_roots, -ma_roots) %>%
  arrange(AICc)

# Create table
us_gdp_fit_summary %>%
  kable("html", caption = "ARIMA Model Fits for US GDP") %>%
  kable_styling(full_width = TRUE, bootstrap_options = c("striped", "hover", "responsive"))
ARIMA Model Fits for US GDP
.model sigma2 log_lik AIC AICc BIC
arima110 0.0461005 7.716151 -9.432303 -8.979473 -3.303149
stepwise 0.0461005 7.716151 -9.432303 -8.979473 -3.303149
search 0.0461005 7.716151 -9.432303 -8.979473 -3.303149
arima011 0.0478668 6.674118 -7.348235 -6.895405 -1.219081
# Check Residuals
us_gdp_fit |>
  select(arima110)|>
  gg_tsresiduals()+
  ggtitle("ARIMA(1,1,0) Residiual")

portm <- augment(us_gdp_fit) |>
  filter(.model == "arima110") |>
  features (.innov, ljung_box, lag = 10, dof = 1)

portm %>%
  kable("html", caption = "ARIMA Model Candidates") %>%
  kable_styling(full_width = TRUE, bootstrap_options = c("striped", "hover", "responsive"))
ARIMA Model Candidates
.model lb_stat lb_pvalue
arima110 3.812056 0.9233412
# Forecast using the ARIMA(1,1,0) model with lowest AICc
us_gdp_forecast <- us_gdp_fit %>%
  select(arima110) %>%
  forecast(h = "10 years")

# Plot the forecast with back-transformed values
us_gdp_forecast_plot <- us_gdp_forecast %>%
  autoplot(us_gdp) +
  labs(title = "10-Year Forecast of US GDP (in Billions)",
       y = "GDP (Billions)",
       x = "Year") +
  scale_y_continuous(labels = label_comma(scale = 1)) +
  theme_minimal()

# Print forecast plot
us_gdp_forecast_plot

# Fit an ETS model to the untransformed GDP data
us_gdp_ets_fit <- us_gdp %>%
  model(ETS(GDP_billions))

# Forecast the next 10 years using the ETS model
us_gdp_ets_forecast <- us_gdp_ets_fit %>%
  forecast(h = "10 years")

# Plot the historical GDP and forecasted GDP (with prediction intervals)
us_gdp %>%
  autoplot(GDP_billions) +
  autolayer(us_gdp_ets_forecast) +
  labs(title = "10-Year Forecast of US GDP (in Billions) using ETS Model",
       y = "GDP (Billions)",
       x = "Year") +
  theme_minimal()