Figure 9.32 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers.
! [Caption] (https://otexts.com/fpp3/fpp_files/figure-html/wnacfplus-1.png)
Figure 9.32: Left: ACF for a white noise series of 36 numbers. Middle: ACF for a white noise series of 360 numbers. Right: ACF for a white noise series of 1,000 numbers.
A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.
amazon_stock <- gafa_stock %>%
filter(Symbol == "AMZN") %>%
select(Date, Close) %>%
as_tsibble(index = Date)
amazon_stock %>%
autoplot(Close) +
labs(title = "Daily Closing Prices of Amazon Stock",
y = "Closing Price (USD)", x = "Date")
amazon_stock %>%
ACF(Close) %>%
autoplot() +
labs(title = "ACF of Amazon Daily Closing Prices")
## Warning: Provided data has an irregular interval, results should be treated
## with caution. Computing ACF by observation.
amazon_stock %>%
PACF(Close) %>%
autoplot() +
labs(title = "PACF of Amazon Daily Closing Prices")
## Warning: Provided data has an irregular interval, results should be treated
## with caution. Computing ACF by observation.
For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.
turkey_gdp <- global_economy %>%
filter(Country == "Turkey") %>%
select(Year, GDP) %>%
as_tsibble(index = Year)
lambda <- turkey_gdp %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
turkey_gdp <- turkey_gdp %>%
mutate(GDP_boxcox = box_cox(GDP, lambda))
autoplot(turkey_gdp, GDP_boxcox) +
labs(title = "Box-Cox Transformed Turkish GDP")
turkey_gdp %>%
ACF(GDP_boxcox) %>%
autoplot() +
labs(title = "ACF of Box-Cox Transformed GDP")
turkey_gdp %>%
PACF(GDP_boxcox) %>%
autoplot() +
labs(title = "PACF of Box-Cox Transformed GDP")
turkey_gdp %>%
features(GDP_boxcox, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 1.50 0.01
turkey_gdp <- turkey_gdp %>%
mutate(GDP_diff1 = difference(GDP_boxcox))
autoplot(turkey_gdp, GDP_diff1) +
labs(title = "First-Order Differenced GDP")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
turkey_gdp %>%
ACF(GDP_diff1) %>%
autoplot() +
labs(title = "ACF of First-Order Differenced GDP")
turkey_gdp %>%
PACF(GDP_diff1) %>%
autoplot() +
labs(title = "PACF of First-Order Differenced GDP")
tas_accommodation <- aus_accommodation %>%
filter(State == "Tasmania") %>%
select(Date, Takings) %>%
as_tsibble(index = Date)
lambda <- tas_accommodation %>%
features(Takings, features = guerrero) %>%
pull(lambda_guerrero)
tas_accommodation <- tas_accommodation %>%
mutate(Takings_boxcox = box_cox(Takings, lambda))
autoplot(tas_accommodation, Takings_boxcox) +
labs(title = "Box-Cox Transformed Accommodation Takings in Tasmania")
tas_accommodation %>%
ACF(Takings_boxcox) %>%
autoplot() +
labs(title = "ACF of Box-Cox Transformed Takings")
tas_accommodation %>%
PACF(Takings_boxcox) %>%
autoplot() +
labs(title = "PACF of Box-Cox Transformed Takings")
tas_accommodation %>%
features(Takings_boxcox, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 1.84 0.01
tas_accommodation <- tas_accommodation %>%
mutate(Takings_diff1 = difference(Takings_boxcox))
autoplot(tas_accommodation, Takings_diff1) +
labs(title = "First-Order Differenced Takings")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
tas_accommodation %>%
ACF(Takings_diff1) %>%
autoplot() +
labs(title = "ACF of First-Order Differenced Takings")
tas_accommodation %>%
PACF(Takings_diff1) %>%
autoplot() +
labs(title = "PACF of First-Order Differenced Takings")
souvenir_sales <- souvenirs %>%
select(Month, Sales) %>%
as_tsibble(index = Month) # Ensure it's a tsibble
lambda <- souvenir_sales %>%
features(Sales, features = guerrero) %>%
pull(lambda_guerrero)
souvenir_sales <- souvenir_sales %>%
mutate(Sales_boxcox = box_cox(Sales, lambda))
autoplot(souvenir_sales, Sales_boxcox) +
labs(title = "Box-Cox Transformed Monthly Souvenir Sales")
souvenir_sales %>%
ACF(Sales_boxcox) %>%
autoplot() +
labs(title = "ACF of Box-Cox Transformed Souvenir Sales")
souvenir_sales %>%
PACF(Sales_boxcox) %>%
autoplot() +
labs(title = "PACF of Box-Cox Transformed Souvenir Sales")
souvenir_sales %>%
features(Sales_boxcox, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 1.79 0.01
souvenir_sales <- souvenir_sales %>%
mutate(Sales_diff1 = difference(Sales_boxcox))
autoplot(souvenir_sales, Sales_diff1) +
labs(title = "First-Order Differenced Souvenir Sales")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
souvenir_sales %>%
ACF(Sales_diff1) %>%
autoplot() +
labs(title = "ACF of First-Order Differenced Souvenir Sales")
souvenir_sales %>%
PACF(Sales_diff1) %>%
autoplot() +
labs(title = "PACF of First-Order Differenced Souvenir Sales")
For your retail data (from Exercise 7 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.
set.seed(951)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries_tsibble <- myseries %>%
filter(!is.na(Turnover)) %>%
select(Month, Turnover) %>%
as_tsibble(index = Month)
autoplot(myseries_tsibble, Turnover) +
labs(title = "Selected Retail Time Series", y = "Turnover")
gg_season(myseries_tsibble, Turnover) +
labs(title = "Seasonal Plot of Retail Time Series")
gg_subseries(myseries_tsibble, Turnover) +
labs(title = "Subseries Plot of Retail Time Series")
gg_lag(myseries_tsibble, Turnover, lags = 1:9) +
labs(title = "Lag Plot of Retail Time Series")
myseries_tsibble %>%
ACF(Turnover) %>%
autoplot() +
labs(title = "ACF of Retail Time Series")
log_series <- myseries_tsibble %>%
mutate(log_turnover = ifelse(Turnover > 0, log(Turnover), NA))
autoplot(log_series, log_turnover) +
labs(title = "Log-Transformed Retail Time Series", y = "Log(Turnover)")
diff_series <- log_series %>%
mutate(diff_log_turnover = difference(log_turnover))
autoplot(diff_series, diff_log_turnover) +
labs(title = "First Difference of Log-Transformed Series", y = "First Difference")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
diff_series %>%
ACF(diff_log_turnover, na.action = na.pass) %>%
autoplot() +
labs(title = "ACF of First-Differenced Log Series")
acf_result <- log_series %>%
ACF(log_turnover, lag.max = 24)
## Warning: The `...` argument of `PACF()` is deprecated as of feasts 0.2.2.
## ℹ ACF variables should be passed to the `y` argument. If multiple variables are
## to be used, specify them using `vars(...)`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ACF currently only supports one column, `log_turnover` will be used.
if (any(acf_result$acf[13:24] > 0.2)) {
seasonal_diff_series <- diff_series %>%
mutate(seasonal_diff_log_turnover = difference(diff_log_turnover, lag = 12))
autoplot(seasonal_diff_series, seasonal_diff_log_turnover) +
labs(title = "Seasonal Difference of First-Differenced Series", y = "Seasonal Difference")
seasonal_diff_series %>%
ACF(seasonal_diff_log_turnover, na.action = na.pass) %>%
autoplot() +
labs(title = "ACF of Seasonally Differenced Series")
}
ndiffs_result <- log_series %>%
pull(log_turnover) %>%
na.omit() %>%
ndiffs()
cat("Number of regular differences needed (ndiffs):", ndiffs_result, "\n")
## Number of regular differences needed (ndiffs): 1
Simulate and plot some data from simple ARIMA models.
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
# Plot
ggplot(sim, aes(x = idx, y = y)) +
geom_line(color = "blue") +
labs(title = "Simulated AR(1) Process (ϕ1 = 0.6, σ² = 1)",
x = "Time", y = "Value") +
theme_minimal()
set.seed(962)
simulate_ar1 <- function(phi, n = 100) {
y <- numeric(n)
e <- rnorm(n)
for(i in 2:n) {
y[i] <- phi * y[i-1] + e[i]
}
return(y)
}
phi_values <- c(-0.6, 0.2, 0.6, 0.9)
sim_data <- tibble(idx = rep(1:100, times = length(phi_values)),
phi = rep(phi_values, each = 100),
y = unlist(lapply(phi_values, simulate_ar1))) %>%
mutate(phi = factor(phi, levels = as.character(phi_values)))
ggplot(sim_data, aes(x = idx, y = y, color = phi, group = phi)) +
geom_line() +
facet_wrap(~ phi, scales = "free_y") +
labs(title = "Simulated AR(1) Process for Different phi Values",
x = "Time", y = "Value") +
theme_minimal()
theta1 <- 0.6
n <- 100
e <- rnorm(n)
y <- numeric(n)
y[1] <- e[1]
for (i in 2:n) {
y[i] <- e[i] + theta1 * e[i - 1]
}
sim_ma1 <- tsibble(idx = seq_len(n), y = y, index = idx)
ggplot(sim_ma1, aes(x = idx, y = y)) +
geom_line(color = "blue") +
labs(title = "Simulated MA(1) Process (θ1 = 0.6, σ² = 1)",
x = "Time", y = "Value") +
theme_minimal()
simulate_ma1 <- function(theta, n = 100) {
e <- rnorm(n)
y <- numeric(n)
y[1] <- e[1]
for (i in 2:n) {
y[i] <- e[i] + theta * e[i - 1]
}
return(y)
}
theta_values <- c(-0.6, 0.2, 0.6, 0.9)
sim_data <- tibble(idx = rep(1:100, times = length(theta_values)),
theta = rep(theta_values, each = 100),
y = unlist(lapply(theta_values, simulate_ma1))) %>%
mutate(theta = factor(theta, levels = as.character(theta_values)))
ggplot(sim_data, aes(x = idx, y = y, color = theta, group = theta)) +
geom_line() +
facet_wrap(~ theta, scales = "free_y") +
labs(title = "Simulated MA(1) Process for Different θ1 Values",
x = "Time", y = "Value") +
theme_minimal()
phi1 <- 0.6
theta1 <- 0.6
n <- 100
e <- rnorm(n)
y <- numeric(n)
y[1] <- e[1]
for (i in 2:n) {
y[i] <- phi1 * y[i - 1] + e[i] + theta1 * e[i - 1]
}
sim_arma11 <- tsibble(idx = seq_len(n), y = y, index = idx)
ggplot(sim_arma11, aes(x = idx, y = y)) +
geom_line(color = "blue") +
labs(title = "Simulated ARMA(1,1) Process (φ1 = 0.6, θ1 = 0.6, σ² = 1)",
x = "Time", y = "Value") +
theme_minimal()
phi1 <- -0.8
phi2 <- 0.3
n <- 100
e <- rnorm(n)
y <- numeric(n)
y[1] <- e[1]
y[2] <- e[2]
for (i in 3:n) {
y[i] <- phi1 * y[i - 1] + phi2 * y[i - 2] + e[i]
}
sim_ar2 <- tsibble(idx = seq_len(n), y = y, index = idx)
ggplot(sim_ar2, aes(x = idx, y = y)) +
geom_line(color = "blue") +
labs(title = "Simulated AR(2) Process (φ1 = -0.8, φ2 = 0.3, σ² = 1)",
x = "Time", y = "Value") +
theme_minimal()
simulate_arma11 <- function(phi, theta, n = 100) {
e <- rnorm(n)
y <- numeric(n)
y[1] <- e[1]
for (i in 2:n) {
y[i] <- phi * y[i - 1] + e[i] + theta * e[i - 1]
}
return(y)
}
simulate_ar2 <- function(phi1, phi2, n = 100) {
e <- rnorm(n)
y <- numeric(n)
y[1] <- e[1]
y[2] <- e[2]
for (i in 3:n) {
y[i] <- phi1 * y[i - 1] + phi2 * y[i - 2] + e[i]
}
return(y)
}
n <- 100
arma11_data <- tibble(idx = 1:n, model = "ARMA(1,1)", y = simulate_arma11(phi = 0.6, theta = 0.6, n))
ar2_data <- tibble(idx = 1:n, model = "AR(2)", y = simulate_ar2(phi1 = -0.8, phi2 = 0.3, n))
sim_data <- bind_rows(arma11_data, ar2_data)
ggplot(sim_data, aes(x = idx, y = y, color = model, group = model)) +
geom_line() +
facet_wrap(~ model, scales = "free_y") +
labs(title = "Comparison of ARMA(1,1) and AR(2) Processes",
x = "Time", y = "Value") +
theme_minimal()
Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.
data <- aus_airpassengers %>%
filter(Year >= 1970 & Year <= 2011)
fit <- data %>%
model(ARIMA(Passengers))
report(fit)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8756
## s.e. 0.0722
##
## sigma^2 estimated as 4.671: log likelihood=-87.8
## AIC=179.61 AICc=179.93 BIC=182.99
fit %>% gg_tsresiduals()
forecast_fit <- fit %>% forecast(h = 10)
autoplot(data, Passengers) +
autolayer(forecast_fit, level = 95) +
labs(title = "Forecast for Australian Air Passengers",
y = "Passengers (millions)",
x = "Year") +
theme_minimal()
Model: ARIMA(0,2,1)
fit_021 <- data %>%
model(ARIMA(Passengers ~ 0 + pdq(0,2,1)))
report(fit_021)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8756
## s.e. 0.0722
##
## sigma^2 estimated as 4.671: log likelihood=-87.8
## AIC=179.61 AICc=179.93 BIC=182.99
forecast_fit_021 <- fit_021 %>% forecast(h = 10)
autoplot(data, Passengers) +
autolayer(forecast_fit_021, level = 95) +
labs(title = "ARIMA(0,2,1)",
y = "Passengers (millions)",
x = "Year") +
theme_minimal()
fit_010_drift <- data %>%
model(ARIMA(Passengers ~ pdq(0,1,0)))
report(fit_010_drift)
## Series: Passengers
## Model: ARIMA(0,1,0) w/ drift
##
## Coefficients:
## constant
## 1.3669
## s.e. 0.3319
##
## sigma^2 estimated as 4.629: log likelihood=-89.08
## AIC=182.17 AICc=182.48 BIC=185.59
forecast_021 <- fit_021 %>% forecast(h = 10)
forecast_010_drift <- fit_010_drift %>% forecast(h = 10)
autoplot(data, Passengers) +
autolayer(forecast_010_drift, series = "ARIMA(0,1,0) with Drift", level = 95) +
labs(title = "ARIMA(0,1,0) with Drift",
y = "Passengers (millions)",
x = "Year") +
theme_minimal()
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
fit_212_drift <- data %>%
model(ARIMA(Passengers ~ pdq(2,1,2)))
report(fit_212_drift)
## Series: Passengers
## Model: ARIMA(2,1,2) w/ drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 constant
## 1.4694 -0.5103 -1.5736 0.6780 0.0650
## s.e. 0.3780 0.3558 0.3081 0.2688 0.0294
##
## sigma^2 estimated as 4.748: log likelihood=-87.74
## AIC=187.47 AICc=189.94 BIC=197.75
forecast_212_drift <- fit_212_drift %>% forecast(h = 10)
autoplot(data, Passengers) +
autolayer(forecast_212_drift, series = "ARIMA(2,1,2) with Drift", level = 95) +
labs(title = "ARIMA(2,1,2) with Drift",
y = "Passengers (millions)",
x = "Year") +
theme_minimal()
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
fit_021_constant <- data %>%
model(ARIMA(Passengers ~1 + pdq(0,2,1)))
## Warning: Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
report(fit_021_constant)
## Series: Passengers
## Model: ARIMA(0,2,1) w/ poly
##
## Coefficients:
## ma1 constant
## -1.0000 0.0721
## s.e. 0.0709 0.0260
##
## sigma^2 estimated as 4.086: log likelihood=-85.74
## AIC=177.48 AICc=178.15 BIC=182.55
forecast_021_constant <- fit_021_constant %>% forecast(h = 10)
autoplot(data, Passengers) +
autolayer(forecast_021_constant, series = "ARIMA(0,2,1) with Constant", level = 95) +
labs(title = "ARIMA(0,2,1) with Constant (Drift)",
y = "Passengers (millions)",
x = "Year") +
theme_minimal()
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`
For the United States GDP series (from global_economy):
us_gdp <- global_economy %>%
filter(Country == "United States") %>%
select(GDP)
lambda <- BoxCox.lambda(us_gdp$GDP)
print(lambda)
## [1] 0.2819714
us_gdp_transformed <- us_gdp %>%
mutate(GDP_transformed = BoxCox(GDP, lambda))
us_gdp_transformed %>%
autoplot(GDP_transformed) +
labs(title = "Transformed US GDP", y = "Transformed GDP")
us_gdp <- global_economy %>%
filter(Country == "United States") %>%
mutate(GDP_transformed = log(GDP))
fit <- us_gdp %>%
model(ARIMA(GDP_transformed))
report(fit)
## Series: GDP_transformed
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.6353
## s.e. 0.1138
##
## sigma^2 estimated as 0.0004278: log likelihood=139.76
## AIC=-275.53 AICc=-275.3 BIC=-271.48
fit %>%
gg_tsresiduals()
fit %>%
augment() %>%
features(.resid, ljung_box, lag = 10, dof = 1)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States ARIMA(GDP_transformed) 2.31 0.986
fit_arima_1_1_1 <- us_gdp %>%
model(ARIMA(GDP_transformed ~ pdq(1,1,1)))
fit_arima_2_1_0 <- us_gdp %>%
model(ARIMA(GDP_transformed ~ pdq(2,1,0)))
fit_arima_0_1_2 <- us_gdp %>%
model(ARIMA(GDP_transformed ~ pdq(0,1,2)))
fit_arima_1_1_2 <- us_gdp %>%
model(ARIMA(GDP_transformed ~ pdq(1,1,2)))
fit_arima_2_1_2 <- us_gdp %>%
model(ARIMA(GDP_transformed ~ pdq(2,1,2)))
fit_arima_0_0_0 <- us_gdp %>%
model(ARIMA(GDP_transformed ~ pdq(0,0,0)))
report(fit_arima_1_1_1)
## Series: GDP_transformed
## Model: ARIMA(1,1,1) w/ drift
##
## Coefficients:
## ar1 ma1 constant
## 0.9270 -0.5751 0.0042
## s.e. 0.0636 0.1552 0.0010
##
## sigma^2 estimated as 0.0004144: log likelihood=143.16
## AIC=-278.32 AICc=-277.55 BIC=-270.14
report(fit_arima_2_1_0)
## Series: GDP_transformed
## Model: ARIMA(2,1,0) w/ drift
##
## Coefficients:
## ar1 ar2 constant
## 0.5069 0.2176 0.0166
## s.e. 0.1301 0.1317 0.0025
##
## sigma^2 estimated as 0.000438: log likelihood=141.63
## AIC=-275.25 AICc=-274.48 BIC=-267.08
report(fit_arima_0_1_2)
## Series: GDP_transformed
## Model: ARIMA(0,1,2) w/ drift
##
## Coefficients:
## ma1 ma2 constant
## 0.5316 0.2806 0.0622
## s.e. 0.1305 0.1184 0.0052
##
## sigma^2 estimated as 0.0005129: log likelihood=137.14
## AIC=-266.29 AICc=-265.52 BIC=-258.11
report(fit_arima_1_1_2)
## Series: GDP_transformed
## Model: ARIMA(1,1,2) w/ drift
##
## Coefficients:
## ar1 ma1 ma2 constant
## 0.9373 -0.5149 -0.1004 0.0036
## s.e. 0.0561 0.1533 0.1336 0.0008
##
## sigma^2 estimated as 0.0004182: log likelihood=143.44
## AIC=-276.88 AICc=-275.71 BIC=-266.67
report(fit_arima_2_1_2)
## Series: GDP_transformed
## Model: ARIMA(2,1,2) w/ drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 constant
## 0.5858 0.3276 -0.1667 -0.3041 0.0050
## s.e. 1.0571 0.9791 1.0274 0.5776 0.0012
##
## sigma^2 estimated as 0.0004255: log likelihood=143.49
## AIC=-274.99 AICc=-273.31 BIC=-262.73
report(fit_arima_0_0_0)
## Series: GDP_transformed
## Model: ARIMA(0,0,0) w/ mean
##
## Coefficients:
## constant
## 29.0791
## s.e. 0.1471
##
## sigma^2 estimated as 1.276: log likelihood=-88.87
## AIC=181.73 AICc=181.95 BIC=185.85
fit_arima_1_1_1 %>%
gg_tsresiduals()
fit_arima_2_1_0 %>%
gg_tsresiduals()
fit_arima_0_1_2 %>%
gg_tsresiduals()
fit_arima_1_1_2 %>%
gg_tsresiduals()
fit_arima_2_1_2 %>%
gg_tsresiduals()
fit_arima_0_0_0 %>%
gg_tsresiduals()
fit_arima_1_1_1 %>%
augment() %>%
features(.resid, ljung_box, lag = 10, dof = 2)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States ARIMA(GDP_transformed ~ pdq(1, 1, 1)) 4.11 0.847
fit_arima_2_1_0 %>%
augment() %>%
features(.resid, ljung_box, lag = 10, dof = 2)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States ARIMA(GDP_transformed ~ pdq(2, 1, 0)) 5.62 0.689
fit_arima_0_1_2 %>%
augment() %>%
features(.resid, ljung_box, lag = 10, dof = 2)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States ARIMA(GDP_transformed ~ pdq(0, 1, 2)) 26.6 0.000831
fit_arima_1_1_2 %>%
augment() %>%
features(.resid, ljung_box, lag = 10, dof = 2)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States ARIMA(GDP_transformed ~ pdq(1, 1, 2)) 3.83 0.872
fit_arima_2_1_2 %>%
augment() %>%
features(.resid, ljung_box, lag = 10, dof = 2)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States ARIMA(GDP_transformed ~ pdq(2, 1, 2)) 3.86 0.869
fit_arima_0_0_0 %>%
augment() %>%
features(.resid, ljung_box, lag = 10, dof = 2)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States ARIMA(GDP_transformed ~ pdq(0, 0, 0)) 360. 0
| Model | AIC | \(phi_{1}\) | \(theta_1\) | \(sigma^2\) | P-value | Acf plot | His-resid plot. |
|---|---|---|---|---|---|---|---|
| ARIMA(1, 1, 1) | -278.32 | 0.9270 | -0.5751 | 0.0042 | 0.847 | No significant autocorrelation | Right-skewed residuals |
| ARIMA(2, 1, 0) | -275.25 | 0.5069 | 0.0166 | 0.689 | No significant autocorrelation | Right-skewed residuals | |
| ARIMA(0, 1, 2) | -266.29 | 0.5316 | 0.0622 | 0.000831 | No significant autocorrelation | Symmetric | |
| ARIMA(1, 1, 2) | -276.88 | 0.9373 | -0.5149 | 0.0036 | 0.872 | No significant autocorrelation | Right-skewed residuals |
| ARIMA(2, 1, 2) | -274.99 | 0.5858 | -0.1667 | 0.0050 | 0.869 | No significant autocorrelation | Right-skewed residuals |
| ARIMA(0, 0, 0) | 181.73 | 29.0791 | 0 | Significant autocorrelation | Right-skewed residuals |
I think ARIMA(1, 1, 1) is the best model based on lowest AIC, high p-value, and no significant autocorrelation.
forecast_arima_1_1_1 <- fit_arima_1_1_1 %>%
forecast(h = 12)
forecast_arima_1_1_1 %>%
autoplot(us_gdp) +
labs(title = "Forecast for US GDP using ARIMA(1, 1, 1)", y = "Transformed GDP")
fit_ets <- us_gdp %>%
model(ETS(GDP_transformed))
forecast_ets <- fit_ets %>%
forecast(h = 12)
forecast_ets %>%
autoplot(us_gdp) +
labs(title = "Forecast for US GDP using ETS", y = "Transformed GDP")