library(tidyverse)
library(feasts)
library(tsibble)
library(fpp3)
set.seed(123)
# Generate series
acf_data <- function(n, label) {
acf_obj <- acf(rnorm(n), plot = FALSE, lag.max = 20)
tibble(
lag = as.integer(acf_obj$lag[-1]),
acf = as.numeric(acf_obj$acf[-1]),
crit_val = qnorm(0.975) / sqrt(n),
label = label
)
}
bind_rows(
acf_data(36, "n = 36"),
acf_data(360, "n = 360"),
acf_data(1000, "n = 1,000")
) %>%
mutate(label = factor(label, levels = c("n = 36", "n = 360", "n = 1,000"))) %>%
ggplot(aes(x = lag, y = acf)) +
geom_hline(yintercept = 0) +
geom_segment(aes(xend = lag, yend = 0)) +
geom_hline(aes(yintercept = crit_val), colour = "steelblue", linetype = "dashed") +
geom_hline(aes(yintercept = -crit_val), colour = "steelblue", linetype = "dashed") +
facet_wrap(~ label) +
scale_y_continuous(limits = c(-1, 1)) +
labs(x = "lag", y = "acf") +
theme_bw()
a. All three are white noise. Larger n indicates spikes shrink toward 0.
Critical values = ±1.96/√n — larger n → narrower bands. ACFs also differ because each is a sample estimate with error ~1/√n
amazon <- gafa_stock %>% filter(Symbol == "AMZN")
# Closing price plot
p1 <- amazon %>%
autoplot(Close) +
labs(title = "Amazon Daily Closing Price", y = "Price (USD)", x = "") +
theme_bw()
# ACF + PACF
p2 <- amazon %>%
ACF(Close, lag_max = 40) %>%
autoplot() + labs(title = "ACF") + theme_bw()
p3 <- amazon %>%
PACF(Close, lag_max = 40) %>%
autoplot() + labs(title = "PACF") + theme_bw()
library(patchwork)
p1 / (p2 | p3)
Price plot: clear upward trend with no tendency to revert to a mean indicates non-stationary in level.
ACF: Autocorrelations decay very slowly from ~1.0, remaining significant across many lags.
PACF: Huge spike at lag 1 (~1.0), then cuts off abruptly . The series is almost entirely explained by its previous value
# a. Turkish GDP
turkey <- global_economy %>% filter(Country == "Turkey")
lambda_tr <- turkey %>% features(GDP, features = guerrero) %>% pull(lambda_guerrero)
turkey %>%
mutate(GDP = difference(box_cox(GDP, lambda_tr))) %>%
ACF(GDP) %>% autoplot() + labs(title = paste("Turkish GDP | λ=", round(lambda_tr,2), "d=1"))
# b. Tasmania Accommodation
tasmania <- aus_accommodation %>% filter(State == "Tasmania")
lambda_tas <- tasmania %>% features(Takings, features = guerrero) %>% pull(lambda_guerrero)
tasmania %>%
mutate(Takings = difference(difference(box_cox(Takings, lambda_tas)), lag = 4)) %>%
ACF(Takings) %>% autoplot() + labs(title = paste("Tasmania | λ=", round(lambda_tas,2), "d=1 D=1"))
# c. Souvenirs
lambda_sv <- souvenirs %>% features(Sales, features = guerrero) %>% pull(lambda_guerrero)
souvenirs %>%
mutate(Sales = difference(difference(box_cox(Sales, lambda_sv)), lag = 12)) %>%
ACF(Sales) %>% autoplot() + labs(title = paste("Souvenirs | λ=", round(lambda_sv,2), "d=1 D=1"))
set.seed(12345678)
myseries <- aus_retail %>% filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
# Check lambda
lambda <- myseries %>% features(Turnover, features = guerrero) %>% pull(lambda_guerrero)
lambda
## [1] 0.08303631
# Plot original, then transformed + differenced ACFs
myseries %>% autoplot(box_cox(Turnover, lambda)) + labs(title = paste("Transformed | λ=", round(lambda,2))) + theme_bw()
myseries %>%
mutate(Turnover = difference(box_cox(Turnover, lambda), lag = 12)) %>%
ACF(Turnover) %>% autoplot() + labs(title = "After seasonal diff (D=1)")
myseries %>%
mutate(Turnover = difference(difference(box_cox(Turnover, lambda), lag = 12))) %>%
ACF(Turnover) %>% autoplot() + labs(title = "After seasonal + first diff (D=1, d=1)")
lambda = 0.08 means log transformation stabilises variance. D=1 (lag=12) removes monthly seasonality. d=1 removes remaining trend.
set.seed(123)
# a/b
y <- numeric(100); e <- rnorm(100)
for(i in 2:100) y[i] <- 0.6*y[i-1] + e[i]
sim_ar1 <- tsibble(idx = seq_len(100), y = y, index = idx)
sim_ar1 %>% autoplot(y) + labs(title = "AR(1) φ₁=0.6") + theme_bw()
# c/d
e <- rnorm(100)
y_ma <- numeric(100)
for(i in 2:100) y_ma[i] <- 0.6*e[i-1] + e[i]
sim_ma1 <- tsibble(idx = seq_len(100), y = y_ma, index = idx)
sim_ma1 %>% autoplot(y) + labs(title = "MA(1) θ₁=0.6") + theme_bw()
#e
e <- rnorm(100)
y_arma <- numeric(100)
for(i in 2:100) y_arma[i] <- 0.6*y_arma[i-1] + 0.6*e[i-1] + e[i]
sim_arma <- tsibble(idx = seq_len(100), y = y_arma, index = idx)
#f
e <- rnorm(100)
y_ar2 <- numeric(100)
for(i in 3:100) y_ar2[i] <- -0.8*y_ar2[i-1] + 0.3*y_ar2[i-2] + e[i]
sim_ar2 <- tsibble(idx = seq_len(100), y = y_ar2, index = idx)
#g
library(patchwork)
p1 <- sim_arma %>% autoplot(y) + labs(title = "ARMA(1,1) φ=0.6, θ=0.6") + theme_bw()
p2 <- sim_ar2 %>% autoplot(y) + labs(title = "AR(2) φ₁=-0.8, φ₂=0.3 (non-stationary)") + theme_bw()
p1 / p2
b. Smoother and more persistent series.
d. Only affects short-term correlations.
g. ARMA(1,1) fluctuates stably around 0; AR(2) oscillates with growing amplitude → non-stationary.
air <- as_tsibble(aus_airpassengers)
#a.
fit <- air %>% model(auto = ARIMA(Passengers))
report(fit)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8963
## s.e. 0.0594
##
## sigma^2 estimated as 4.308: log likelihood=-97.02
## AIC=198.04 AICc=198.32 BIC=201.65
fit %>% gg_tsresiduals()
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fit %>% forecast(h=10) %>% autoplot(air) + labs(title = "Auto ARIMA") + theme_bw()
#c
air %>% model(ARIMA(Passengers ~ 1 + pdq(0,1,0))) %>%
forecast(h=10) %>% autoplot(air) + labs(title = "ARIMA(0,1,0) w/ drift") + theme_bw()
#d
air %>% model(
with_drift = ARIMA(Passengers ~ 1 + pdq(2,1,2)),
without_drift = ARIMA(Passengers ~ 0 + pdq(2,1,2))
) %>% forecast(h=10) %>% autoplot(air) + labs(title = "ARIMA(2,1,2)") + theme_bw()
#e
air %>% model(ARIMA(Passengers ~ 1 + pdq(0,2,1))) %>%
forecast(h=10) %>% autoplot(air) + labs(title = "ARIMA(0,2,1) w/ constant") + theme_bw()
us_gdp <- global_economy %>% filter(Country == "United States")
#a
lambda <- us_gdp %>% features(GDP, features = guerrero) %>% pull(lambda_guerrero)
# lambda ≈ 0.3, apply transformation
us_gdp <- us_gdp %>% mutate(GDP_tr = box_cox(GDP, lambda))
head(us_gdp)
## # A tsibble: 6 x 10 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population GDP_tr
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United Sta… USA 1960 5.43e11 NA 13.6 4.20 4.97 180671000 7215.
## 2 United Sta… USA 1961 5.63e11 2.30 13.7 4.03 4.90 183691000 7289.
## 3 United Sta… USA 1962 6.05e11 6.10 13.9 4.13 4.81 186538000 7438.
## 4 United Sta… USA 1963 6.39e11 4.40 14.0 4.09 4.87 189242000 7552.
## 5 United Sta… USA 1964 6.86e11 5.80 14.2 4.10 5.10 191889000 7705.
## 6 United Sta… USA 1965 7.44e11 6.40 14.4 4.24 4.99 194303000 7883.
#b
fit <- us_gdp %>% model(auto = ARIMA(GDP_tr))
report(fit)
## Series: GDP_tr
## Model: ARIMA(1,1,0) w/ drift
##
## Coefficients:
## ar1 constant
## 0.4586 118.1822
## s.e. 0.1198 9.5047
##
## sigma^2 estimated as 5479: log likelihood=-325.32
## AIC=656.65 AICc=657.1 BIC=662.78
#c
fit_all <- us_gdp %>% model(
auto = ARIMA(GDP_tr),
arima011 = ARIMA(GDP_tr ~ pdq(0,1,1)),
arima012 = ARIMA(GDP_tr ~ pdq(0,1,2)),
arima111 = ARIMA(GDP_tr ~ pdq(1,1,1)),
arima210 = ARIMA(GDP_tr ~ pdq(2,1,0))
)
glance(fit_all) %>% arrange(AICc) %>% select(.model, AIC, AICc, BIC)
## # A tibble: 5 × 4
## .model AIC AICc BIC
## <chr> <dbl> <dbl> <dbl>
## 1 auto 657. 657. 663.
## 2 arima011 659. 659. 665.
## 3 arima111 659. 659. 667.
## 4 arima210 659. 659. 667.
## 5 arima012 659. 660. 667.
#d
fit_best <- us_gdp %>% model(ARIMA(GDP_tr ~ pdq(0,1,1)))
fit_best %>% gg_tsresiduals()
augment(fit_best) %>% features(.innov, ljung_box, lag=10)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States ARIMA(GDP_tr ~ pdq(0, 1, 1)) 6.22 0.796
#e
fit_best %>%
forecast(h=20) %>%
mutate(GDP_fc = inv_box_cox(.mean, lambda)) %>%
autoplot(us_gdp) + labs(title = "US GDP Forecast (Box-Cox ARIMA)") + theme_bw()
#f
fit_ets <- us_gdp %>% model(ETS(GDP))
fit_ets %>% forecast(h=20) %>%
autoplot(us_gdp) + labs(title = "US GDP Forecast (ETS)") + theme_bw()
glance(fit_best) %>% select(AICc)
## # A tibble: 1 × 1
## AICc
## <dbl>
## 1 659.
glance(fit_ets) %>% select(AICc)
## # A tibble: 1 × 1
## AICc
## <dbl>
## 1 3192.
ETS often selects ETS(M,A,N); both give similar forecasts but ARIMA on transformed data tends to have better-behaved residuals