All three show white noise. The figure with larger sample size has a narrower confidence bound, which is to be expected. Smaller sample size leads to greater variability but the values are still within the confidence bounds.
Autocorrelations differ based on sampling variability, and with smaller sample sets outlier variables can alter the values more significantly.
amazon_stock <- gafa_stock %>% filter(Symbol == "AMZN") %>%
select(Date, Close) %>%
as_tsibble(index = Date)
autoplot(amazon_stock, Close) +
ggtitle("Daily Closing Prices of Amazon Stock") +
xlab("Date") +
ylab("Closing Price (USD)")
# Compute ACF
amazon_acf <- amazon_stock %>%
ACF(Close)
# Plot ACF
acf_plot <- autoplot(amazon_acf) +
ggtitle("ACF of Amazon Closing Prices")
# Compute PACF
amazon_pacf <- amazon_stock %>%
PACF(Close)
# Plot PACF
pacf_plot <- autoplot(amazon_pacf) +
ggtitle("PACF of Amazon Closing Prices")
# Arrange the plots side by side
gridExtra::grid.arrange(acf_plot, pacf_plot, ncol = 2)
Plotting the closing numbers shows an upward trend, and the spread of the data increases with the level. Furthermore the ACF decreases slowly and the PCF has big lags. This indicates the series is non-stationary.
We see a clear upward trend in the data
turkey_gdp <- global_economy %>%
filter(Country == "Turkey") %>%
select(Year, GDP) %>%
drop_na()
autoplot(turkey_gdp, GDP) +
ggtitle("Turkish GDP Over Time") +
ylab("GDP (US Dollars)") +
xlab("Year")
The lambda 0.1572 is what we will use in the box-cox transformation
lambda_gdp <- turkey_gdp %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
print(paste("Optimal lambda for Turkish GDP:", round(lambda_gdp, 4)))
## [1] "Optimal lambda for Turkish GDP: 0.1572"
After applying the transformation we plot the ACF and PACF
turkey_gdp <- turkey_gdp %>%
mutate(
GDP_transformed = box_cox(GDP, lambda_gdp)
)
autoplot(turkey_gdp, GDP_transformed) +
ggtitle("Box-Cox Transformed Turkish GDP") +
ylab("Transformed GDP") +
xlab("Year")
turkey_gdp %>%
ACF(GDP_transformed) %>%
autoplot() +
ggtitle("ACF of Transformed Turkish GDP")
turkey_gdp %>%
PACF(GDP_transformed) %>%
autoplot() +
ggtitle("PACF of Transformed Turkish GDP")
The p-value 0.01 is less than 0.05, so the series is non-stationary.
turkey_gdp %>%
features(GDP_transformed, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 1.50 0.01
Now we apply first-order differencing:
turkey_gdp <- turkey_gdp %>%
mutate(
GDP_diff = difference(GDP_transformed)
)
autoplot(turkey_gdp, GDP_diff) +
ggtitle("Differenced Transformed Turkish GDP") +
ylab("Differenced Transformed GDP") +
xlab("Year")
turkey_gdp %>%
ACF(GDP_diff, na_rm = TRUE) %>%
autoplot() +
ggtitle("ACF of Differenced Transformed Turkish GDP")
turkey_gdp %>%
PACF(GDP_diff, na_rm = TRUE) %>%
autoplot() +
ggtitle("PACF of Differenced Transformed Turkish GDP")
The new p-value after differencing is 0.1.
turkey_gdp %>%
filter(!is.na(GDP_diff)) %>%
features(GDP_diff, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0889 0.1
# Filter data for Tasmania
tas_accommodation <- aus_accommodation %>%
filter(State == "Tasmania") %>%
select(Date, Takings) %>%
drop_na()
tas_accommodation
## # A tsibble: 74 x 2 [1Q]
## Date Takings
## <qtr> <dbl>
## 1 1998 Q1 28.7
## 2 1998 Q2 19.0
## 3 1998 Q3 16.1
## 4 1998 Q4 25.9
## 5 1999 Q1 28.4
## 6 1999 Q2 20.1
## 7 1999 Q3 17.3
## 8 1999 Q4 24.3
## 9 2000 Q1 30.0
## 10 2000 Q2 21.7
## # ℹ 64 more rows
# Plot the takings data
autoplot(tas_accommodation, Takings) +
ggtitle("Accommodation Takings in Tasmania") +
ylab("Takings (Australian Dollars)") +
xlab("Quarter")
# Calculate the optimal lambda for Box-Cox transformation
lambda_takings <- tas_accommodation %>%
features(Takings, features = guerrero) %>%
pull(lambda_guerrero)
# Display the lambda
print(paste("Optimal lambda for Tasmania Accommodation Takings:", round(lambda_takings, 4)))
## [1] "Optimal lambda for Tasmania Accommodation Takings: 0.0018"
# Apply the Box-Cox transformation
tas_accommodation <- tas_accommodation %>%
mutate(
Takings_transformed = box_cox(Takings, lambda_takings)
)
# Plot the transformed data
autoplot(tas_accommodation, Takings_transformed) +
ggtitle("Box-Cox Transformed Accommodation Takings in Tasmania") +
ylab("Transformed Takings") +
xlab("Quarter")
# Apply seasonal differencing
tas_accommodation <- tas_accommodation %>%
mutate(
Takings_seas_diff = difference(Takings_transformed, lag = 4)
)
# Plot the seasonally differenced data
autoplot(tas_accommodation, Takings_seas_diff) +
ggtitle("Seasonally Differenced Transformed Takings") +
ylab("Seasonally Differenced Transformed Takings") +
xlab("Quarter")
# ACF and PACF of seasonally differenced series
tas_accommodation %>%
ACF(Takings_seas_diff, na_rm = TRUE) %>%
autoplot() +
ggtitle("ACF of Seasonally Differenced Transformed Takings")
tas_accommodation %>%
PACF(Takings_seas_diff, na_rm = TRUE) %>%
autoplot() +
ggtitle("PACF of Seasonally Differenced Transformed Takings")
# KPSS test on seasonally differenced series
tas_accommodation %>%
filter(!is.na(Takings_seas_diff)) %>%
features(Takings_seas_diff, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.198 0.1
# Apply first-order differencing
tas_accommodation <- tas_accommodation %>%
mutate(
Takings_diff = difference(Takings_seas_diff)
)
# Plot the differenced series
autoplot(tas_accommodation, Takings_diff) +
ggtitle("First-order Differenced Seasonally Differenced Takings") +
ylab("Differenced Values") +
xlab("Quarter")
# ACF and PACF of fully differenced series
tas_accommodation %>%
ACF(Takings_diff, na_rm = TRUE) %>%
autoplot() +
ggtitle("ACF of Fully Differenced Takings")
tas_accommodation %>%
PACF(Takings_diff, na_rm = TRUE) %>%
autoplot() +
ggtitle("PACF of Fully Differenced Takings")
# KPSS test on fully differenced series
tas_accommodation %>%
filter(!is.na(Takings_diff)) %>%
features(Takings_diff, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0474 0.1
# Load souvenirs data
data("souvenirs")
# Convert to tsibble
souvenirs_tsibble <- as_tsibble(souvenirs, index = Month)
# Plot the sales data
autoplot(souvenirs_tsibble, Sales) +
ggtitle("Monthly Souvenir Sales") +
ylab("Sales") +
xlab("Month")
# Calculate the optimal lambda for Box-Cox transformation
lambda_sales <- souvenirs_tsibble %>%
features(Sales, features = guerrero) %>%
pull(lambda_guerrero)
# Display the lambda
print(paste("Optimal lambda for Souvenir Sales:", round(lambda_sales, 4)))
## [1] "Optimal lambda for Souvenir Sales: 0.0021"
# Apply the Box-Cox transformation
souvenirs_tsibble <- souvenirs_tsibble %>%
mutate(
Sales_transformed = box_cox(Sales, lambda_sales)
)
# Plot the transformed data
autoplot(souvenirs_tsibble, Sales_transformed) +
ggtitle("Box-Cox Transformed Souvenir Sales") +
ylab("Transformed Sales") +
xlab("Month")
# Apply seasonal differencing
souvenirs_tsibble <- souvenirs_tsibble %>%
mutate(
Sales_seas_diff = difference(Sales_transformed, lag = 12)
)
# Plot the seasonally differenced data
autoplot(souvenirs_tsibble, Sales_seas_diff) +
ggtitle("Seasonally Differenced Transformed Sales") +
ylab("Seasonally Differenced Transformed Sales") +
xlab("Month")
# ACF and PACF of seasonally differenced series
souvenirs_tsibble %>%
ACF(Sales_seas_diff, na_rm = TRUE) %>%
autoplot() +
ggtitle("ACF of Seasonally Differenced Transformed Sales")
souvenirs_tsibble %>%
PACF(Sales_seas_diff, na_rm = TRUE) %>%
autoplot() +
ggtitle("PACF of Seasonally Differenced Transformed Sales")
# KPSS test on seasonally differenced series
souvenirs_tsibble %>%
filter(!is.na(Sales_seas_diff)) %>%
features(Sales_seas_diff, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.240 0.1
# Apply first-order differencing
souvenirs_tsibble <- souvenirs_tsibble %>%
mutate(
Sales_diff = difference(Sales_seas_diff)
)
# Plot the differenced series
autoplot(souvenirs_tsibble, Sales_diff) +
ggtitle("First-order Differenced Seasonally Differenced Sales") +
ylab("Differenced Values") +
xlab("Month")
# ACF and PACF of fully differenced series
souvenirs_tsibble %>%
ACF(Sales_diff, na_rm = TRUE) %>%
autoplot() +
ggtitle("ACF of Fully Differenced Sales")
souvenirs_tsibble %>%
PACF(Sales_diff, na_rm = TRUE) %>%
autoplot() +
ggtitle("PACF of Fully Differenced Sales")
# KPSS test on fully differenced series
souvenirs_tsibble %>%
filter(!is.na(Sales_diff)) %>%
features(Sales_diff, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0381 0.1
# Aggregate data to get total monthly retail turnover
total_retail <- aus_retail %>%
summarize(Turnover = sum(Turnover)) %>%
as_tsibble(index = Month)
# View the first few rows
head(total_retail)
## # A tsibble: 6 x 2 [1M]
## Month Turnover
## <mth> <dbl>
## 1 1982 Apr 6225.
## 2 1982 May 6382.
## 3 1982 Jun 6162.
## 4 1982 Jul 6399.
## 5 1982 Aug 6163.
## 6 1982 Sep 6331.
# Plot the total monthly retail turnover
autoplot(total_retail, Turnover) +
ggtitle("Total Monthly Australian Retail Turnover") +
ylab("Turnover (Million AUD)") +
xlab("Month")
# Calculate the optimal lambda for Box-Cox transformation
lambda_retail <- total_retail %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
# Display the optimal lambda
print(paste("Optimal lambda for Australian retail turnover:", round(lambda_retail, 4)))
## [1] "Optimal lambda for Australian retail turnover: 0.196"
# Apply the Box-Cox transformation
total_retail <- total_retail %>%
mutate(
Turnover_transformed = box_cox(Turnover, lambda_retail)
)
# Plot the transformed data
autoplot(total_retail, Turnover_transformed) +
ggtitle("Box-Cox Transformed Total Monthly Australian Retail Turnover") +
ylab("Transformed Turnover") +
xlab("Month")
# Plot the ACF of the transformed series
total_retail %>%
ACF(Turnover_transformed) %>%
autoplot() +
ggtitle("ACF of Transformed Australian Retail Turnover")
# Plot the PACF of the transformed series
total_retail %>%
PACF(Turnover_transformed) %>%
autoplot() +
ggtitle("PACF of Transformed Australian Retail Turnover")
# Perform KPSS test on the transformed series
total_retail %>%
features(Turnover_transformed, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 7.41 0.01
# Perform Augmented Dickey-Fuller (ADF) test
total_retail %>%
features(Turnover_transformed, unitroot_ndiffs)
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 1
# Apply first-order differencing
total_retail <- total_retail %>%
mutate(
Turnover_diff = difference(Turnover_transformed)
)
# Plot the differenced series
autoplot(total_retail, Turnover_diff) +
ggtitle("First-order Differenced Transformed Australian Retail Turnover") +
ylab("Differenced Transformed Turnover") +
xlab("Month")
# Plot the ACF and PACF of the differenced series
total_retail %>%
filter(!is.na(Turnover_diff)) %>%
ACF(Turnover_diff) %>%
autoplot() +
ggtitle("ACF of Differenced Transformed Australian Retail Turnover")
total_retail %>%
filter(!is.na(Turnover_diff)) %>%
PACF(Turnover_diff) %>%
autoplot() +
ggtitle("PACF of Differenced Transformed Australian Retail Turnover")
# KPSS test on non-seasonally differenced series
total_retail %>%
filter(!is.na(Turnover_diff)) %>%
features(Turnover_diff, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0118 0.1
# Determine number of seasonal differences required
total_retail %>%
filter(!is.na(Turnover_diff)) %>%
features(Turnover_diff, unitroot_nsdiffs)
## # A tibble: 1 × 1
## nsdiffs
## <int>
## 1 1
# Apply seasonal differencing
total_retail <- total_retail %>%
mutate(
Turnover_seas_diff = difference(Turnover_diff, lag = 12)
)
# Plot the seasonally differenced series
autoplot(total_retail, Turnover_seas_diff) +
ggtitle("Seasonally Differenced (Lag 12) Differenced Transformed Australian Retail Turnover") +
ylab("Seasonally Differenced Values") +
xlab("Month")
# Plot the ACF and PACF of the fully differenced series
total_retail %>%
filter(!is.na(Turnover_seas_diff)) %>%
ACF(Turnover_seas_diff) %>%
autoplot() +
ggtitle("ACF of Fully Differenced Transformed Australian Retail Turnover")
total_retail %>%
filter(!is.na(Turnover_seas_diff)) %>%
PACF(Turnover_seas_diff) %>%
autoplot() +
ggtitle("PACF of Fully Differenced Transformed Australian Retail Turnover")
# KPSS test on fully differenced series
total_retail %>%
filter(!is.na(Turnover_seas_diff)) %>%
features(Turnover_seas_diff, unitroot_kpss)
## # A tibble: 1 × 2
## kpss_stat kpss_pvalue
## <dbl> <dbl>
## 1 0.0149 0.1
# Load the aus_airpassengers dataset
data("aus_airpassengers")
# View the structure of the dataset
glimpse(aus_airpassengers)
## Rows: 47
## Columns: 2
## $ Year <dbl> 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979,…
## $ Passengers <dbl> 7.3187, 7.3266, 7.7956, 9.3846, 10.6647, 11.0551, 10.8643, …
# Plot the time series
autoplot(aus_airpassengers, Passengers) +
ggtitle("Australian Air Passengers (1970–2011)") +
ylab("Passengers (millions)") +
xlab("Year")
# Fit an automatic ARIMA model
fit_auto <- aus_airpassengers %>%
model(auto = ARIMA(Passengers))
# Display the model report
report(fit_auto)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8963
## s.e. 0.0594
##
## sigma^2 estimated as 4.308: log likelihood=-97.02
## AIC=198.04 AICc=198.32 BIC=201.65
# Extract residuals
residuals_auto <- augment(fit_auto)
# Plot residuals
residuals_auto %>%
gg_tsdisplay(.innov, plot_type = "partial", lag = 20) +
ggtitle("Residual Diagnostics for ARIMA(0,2,1) with Drift")
# Ljung-Box test for lags 10 and 20
residuals_auto %>%
features(.innov, ljung_box, lag = 10, dof = 2)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 auto 6.70 0.570
residuals_auto %>%
features(.innov, ljung_box, lag = 20, dof = 2)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 auto 16.4 0.562
# Generate forecasts for the next 10 periods
fc_auto <- fit_auto %>%
forecast(h = 10)
# Plot the forecasts
fc_auto %>%
autoplot(aus_airpassengers) +
ggtitle("Forecasts from ARIMA(0,2,1) Model with Drift") +
ylab("Passengers (millions)") +
xlab("Year")
Write the model in terms of the backshift operator. \[ (1 - B)^2 Y_t = (1 - \theta_1 B) \epsilon_t \]
Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a.
library(fable)
# Fit an ARIMA(0,1,0) model with drift (Random Walk with Drift)
fit_rwdrift <- aus_airpassengers %>%
model(rw_drift = ARIMA(Passengers ~ 1 + pdq(0,1,0)))
# Display the model report
report(fit_rwdrift)
## Series: Passengers
## Model: ARIMA(0,1,0) w/ drift
##
## Coefficients:
## constant
## 1.4191
## s.e. 0.3014
##
## sigma^2 estimated as 4.271: log likelihood=-98.16
## AIC=200.31 AICc=200.59 BIC=203.97
# Generate forecasts
fc_rwdrift <- fit_rwdrift %>%
forecast(h = 10)
# Plot the forecasts
fc_rwdrift %>%
autoplot(aus_airpassengers) +
ggtitle("Forecasts from ARIMA(0,1,0) Model with Drift") +
ylab("Passengers (millions)") +
xlab("Year")
# Combine forecasts
fc_combined <- bind_rows(
fc_auto %>% mutate(Model = "ARIMA(0,1,1) with Drift"),
fc_rwdrift %>% mutate(Model = "ARIMA(0,1,0) with Drift")
)
# Plot combined forecasts
fc_combined %>%
autoplot(aus_airpassengers, level = NULL) +
ggtitle("Comparison of Forecasts from Different Models") +
ylab("Passengers (millions)") +
xlab("Year") +
facet_wrap(~ Model, ncol = 1)
fit_arima212 <- aus_airpassengers %>%
model(arima_drift = ARIMA(Passengers ~ 1 + pdq(2,1,2)))
report(fit_arima212)
## Series: Passengers
## Model: ARIMA(2,1,2) w/ drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 constant
## -0.5518 -0.7327 0.5895 1.0000 3.2216
## s.e. 0.1684 0.1224 0.0916 0.1008 0.7224
##
## sigma^2 estimated as 4.031: log likelihood=-96.23
## AIC=204.46 AICc=206.61 BIC=215.43
# Generate forecasts
fc_arima212 <- fit_arima212 %>%
forecast(h = 10)
# Plot the forecasts
fc_arima212 %>%
autoplot(aus_airpassengers) +
ggtitle("Forecasts from ARIMA(2,1,2) Model with Drift") +
ylab("Passengers (millions)") +
xlab("Year")
# Combine forecasts
fc_combined_all <- bind_rows(
fc_auto %>% mutate(Model = "ARIMA(0,1,1) with Drift"),
fc_rwdrift %>% mutate(Model = "ARIMA(0,1,0) with Drift"),
fc_arima212 %>% mutate(Model = "ARIMA(2,1,2) with Drift")
)
# Plot combined forecasts
fc_combined_all %>%
autoplot(aus_airpassengers, level = NULL) +
ggtitle("Comparison of Forecasts from Different Models") +
ylab("Passengers (millions)") +
xlab("Year") +
facet_wrap(~ Model, ncol = 1)
An ARIMA(0,2,1) with a constant doesnt work because the difference d=2 implies a quadratic trend or higher. We could either remove the constant or use a different configuration.
# Fit an ARIMA(0,2,1) model with constant
#fit_arima212_no_drift <- aus_airpassengers %>%
# model(arima_no_drift = ARIMA(Passengers ~ 1 + pdq(0,2,1)))
# Display the model report
#report(fit_arima021)