library(fpp3)
amazon_stock <- gafa_stock |>
filter (Symbol == "AMZN")
amazon_stock |>
autoplot(Close) + labs(title = "Amazon Closing Prices Daily", y = "Price (USD",
x = "Date")
amazon_stock |>
ACF(Close) |>
autoplot() + labs(title = "ACF of Amazon Closing Prices ")
amazon_stock |>
PACF(Close) |>
autoplot() + labs(title = "PACF of Amazon Closing Prices ")
a.) Turkish GDP from global_economy.
global_economy |>
filter(Country == "Turkey") |>
autoplot(GDP) +
labs(title = "Turkey GDP", y = "GDP (Billions USD)")
Turkey_GDP <- global_economy |>
filter(Country == "Turkey")
lambda_value <- Turkey_GDP |>
features(GDP, features = guerrero) |>
pull(lambda_guerrero)
Turkey_GDP <- Turkey_GDP |>
mutate(trans_GDP = box_cox(GDP, lambda = lambda_value),
GDP_diff = difference(trans_GDP))
Turkey_GDP |>
autoplot(GDP_diff) +
labs(title = "Turkey Differenced Transformed GDP", y = "Differenced GDP")
b.) Accommodation takings in the state of Tasmania from aus_accommodation.
aus_accommodation |>
filter(State == "Tasmania") |>
autoplot(Takings) +
labs(title = "Accommodation Takings in Tasmania", y = "Takings (AUD)")
tas_accommod <- aus_accommodation |>
filter(State == "Tasmania")
lambda_value <- tas_accommod |>
features(Takings, features = guerrero) |>
pull(lambda_guerrero) # Correct column name
tas_accommod <- tas_accommod |>
mutate(Takings_trans = box_cox(Takings, lambda = lambda_value),
Takings_diff = difference(Takings_trans, lag = 12))
autoplot(tas_accommod, Takings_diff) +
labs(title = "Seasonally Differenced Accommodation Takings", y = "Transformed & Differenced Takings (AUD)")
c.) Monthly sales from souvenirs.
souvenirs |>
autoplot(Sales) +
labs(title = "Souvenir Sales", y = "Sales (Units)")
souvenirs |>
features(Sales, features = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 0.00212
lambda_value <- souvenirs |>
features(Sales, features = guerrero) |>
pull(lambda_guerrero)
souvenirs <- souvenirs |>
mutate(Sales_trans = box_cox(Sales, lambda = lambda_value),
Sales_diff = difference(Sales_trans, lag = 12))
autoplot(souvenirs, Sales_diff) +
labs(title = "Differenced Souvenir Sales")
retail_data <- aus_retail
retail_data |>
filter(Industry == "Department stores") |>
autoplot(Turnover) +
labs(title = "Retail Turnover (Department Stores)", y = "Turnover (Million AUD)")
retail_data |>
filter(Industry == "Department stores") |>
features(Turnover, features = guerrero)
## # A tibble: 6 × 3
## State Industry lambda_guerrero
## <chr> <chr> <dbl>
## 1 Australian Capital Territory Department stores 0.240
## 2 New South Wales Department stores 0.219
## 3 Queensland Department stores 0.121
## 4 South Australia Department stores 0.0354
## 5 Victoria Department stores 0.173
## 6 Western Australia Department stores 0.0816
retail_diff1 <- retail_data |>
filter(Industry == "Department stores") |>
mutate(Turnover_diff1 = difference(Turnover))
autoplot(retail_diff1, Turnover_diff1) +
labs(title = "First-order Retail Turnover Difference")
retail_diff_seasonal <- retail_data |>
filter(Industry == "Department stores") |>
mutate(Turnover_diff_seasonal = difference(Turnover, lag = 12))
autoplot(retail_diff_seasonal, Turnover_diff_seasonal) +
labs(title = "Seasonally Retail Turnover Difference (Lag = 12)")
retail_diff_combined <- retail_data |>
filter(Industry == "Department stores") |>
mutate(Turnover_diff_combined = difference(difference(Turnover, lag = 12)))
autoplot(retail_diff_combined, Turnover_diff_combined) +
labs(title = "First-order + Seasonal Retail Turnover Difference")
retail_diff_combined |>
ACF(Turnover_diff_combined, na.rm = TRUE) |>
autoplot() +
labs(title = "ACF Retail Turnover Difference")
retail_diff_combined |>
PACF(Turnover_diff_combined, na.rm = TRUE) |>
autoplot() +
labs(title = "PACF Retail Turnover Difference ")
a.) Use the following R code to generate data from an AR(1) model with \(\theta_1\)= 0.6 and \(\sigma^2\)= 1.The process starts with \(y_1\)= 0.
y <- numeric(100) e <- rnorm(100) for(i in 2:100) y[i] <- 0.6*y[i-1] + e[i] sim <- tsibble(idx = seq_len(100), y = y, index = idx)
set.seed(123456)
n <- 100
phi <- 0.6
sigma_square<- 1
e <- rnorm(n, mean = 0, sd = sqrt(sigma_square))
y <- numeric(n)
y[1] <- 0
for (i in 2:n) {
y[i] <- phi * y[i - 1] + e[i]
}
sim_ar <- tsibble(idx = seq_len(n), y = y, index = idx)
sim_ar|>
autoplot(y) +
labs(title = "Simulated AR(1) Process (ϕ_1 = 0.6)", y = "y", x = "Time")
b.)Produce a time plot for the series. How does the plot change as you change \(\theta_1\)?
theta <- 0.6
e <- rnorm(n, mean = 0, sd = sqrt(sigma_square))
y_ma <- numeric(n)
y_ma[1] <- e[1]
for (i in 2:n) {
y_ma[i] <- e[i] + theta * e[i - 1]
}
sim_ma <- tsibble(idx = seq_len(n), y = y_ma, index = idx)
sim_ma |>
autoplot(y) +
labs(title = "Simulated MA(1) Process (θ1 = 0.6)", y = "y", x = "Time")
c.) Write your own code to generate data from an MA(1) model with \(\theta_1\)= 0.6 and \(\sigma^2\)= 1
n <- 100
theta_tes <- 0.6
sigma <- 1
set.seed(123467)
e <- rnorm(n, mean = 0, sd = sigma)
y <- numeric(n)
y[1] <- e[1]
for (i in 2:n) {
y[i] <- e[i] + theta_tes * e[i - 1]
}
ma1_sim <- tsibble(
time = 1:n,
y = y,
index = time
)
ma1_sim |>
autoplot(y) +
labs(title = "Simulated MA(1) Process", y = "Value", x = "Time")
d.) Produce a time plot for the series. How does the plot change as
you change
\(\theta_1\)? It apears the when \(\theta_1\) is 0 there is mostly white
noise, when its positive there is more smoothness and when its negative
there is more oscoscillating.
library(purrr)
## Warning: package 'purrr' was built under R version 4.3.3
simulate_ma1 <- function(theta1, n = 100, sigma = 1, seed = 123) {
set.seed(seed)
e <- rnorm(n, mean = 0, sd = sigma)
y <- numeric(n)
y[1] <- e[1]
for (i in 2:n) {
y[i] <- e[i] + theta1 * e[i - 1]
}
# Return a valid tsibble with theta1 as a key
tsibble(
time = 1:n,
y = y,
theta1 = factor(theta1),
key = theta1,
index = time
)
}
# Try multiple theta values
theta_vals <- c(-0.9, -0.5, 0, 0.5, 0.9)
# Simulate for each and combine into one dataset
ma1_all <- map_dfr(theta_vals, simulate_ma1)
# Plot all series
ma1_all |>
ggplot(aes(x = time, y = y)) +
geom_line() +
facet_wrap(~theta1, ncol = 1) +
labs(
title = "MA(1) Simulations with Different θ₁ Values",
y = "Simulated Value",
x = "Time"
)
e.) Generate data from an ARMA(1,1) model with with $ _1=0.6$ \(\theta_1\)= 0.6 and \(\sigma^2\)= 1
phi <- 0.6
theta <- 0.6
e <- rnorm(n, mean = 0, sd = sqrt(sigma))
y_arma <- numeric(n)
y_arma[1] <- 0
for (i in 2:n) {
y_arma[i] <- phi * y_arma[i - 1] + e[i] + theta * e[i - 1]
}
sim_ARMA_1 <- tsibble(idx = seq_len(n), y = y_arma, index = idx)
sim_ARMA_1
## # A tsibble: 100 x 2 [1]
## idx y
## <int> <dbl>
## 1 1 0
## 2 2 -0.169
## 3 3 -0.194
## 4 4 -0.612
## 5 5 -1.53
## 6 6 -1.53
## 7 7 -1.73
## 8 8 -3.18
## 9 9 -3.29
## 10 10 -1.28
## # ℹ 90 more rows
sim_ARMA_1 |>
autoplot(y) +
labs(title = "Simulated ARMA(1,1) Process (ϕ1 = 0.6, θ1 = 0.6)", y = "y", x = "Time")
f.) Generate data from an AR(2) model with $ _1=0.8$ \(\phi_2\)= 0.3 and \(\sigma^2\)= 1 (Note that these parameters will give a non-stationary series.)
phi_1 <- -0.8
phi_2 <- 0.3
e <- rnorm(n, mean = 0, sd = sqrt(sigma))
y_ar2 <- numeric(n)
y_ar2[1] <- 0
y_ar2[2] <- e[2]
for (i in 3:n) {
y_ar2[i] <- phi_1 * y_ar2[i - 1] + phi_2 * y_ar2[i - 2] + e[i]
}
sim_AR2 <- tsibble(idx = seq_len(n), y = y_ar2, index = idx)
sim_AR2
## # A tsibble: 100 x 2 [1]
## idx y
## <int> <dbl>
## 1 1 0
## 2 2 1.31
## 3 3 -1.32
## 4 4 1.99
## 5 5 -2.40
## 6 6 2.04
## 7 7 -3.14
## 8 8 2.53
## 9 9 -1.32
## 10 10 1.76
## # ℹ 90 more rows
sim_AR2 |>
autoplot(y) +
labs(title = "Simulated AR(2) Process (ϕ1 = -0.8, ϕ2 = 0.3)", y = "y", x = "Time")
g.) Graph the latter two series and compare them.
plot1 <- sim_ARMA_1 |>
autoplot(y) +
labs(title = "Simulated ARMA(1,1)", y = "y", x = "Time")
plot2 <- sim_AR2 |>
autoplot(y) +
labs(title = "Simulated AR(2)", y = "y", x = "Time")
plot1
plot2
a.) Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods.
aus_data <- aus_airpassengers
fit_auto <- aus_data |>
model(ARIMA(Passengers))
report(fit_auto)
## Series: Passengers
## Model: ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.8963
## s.e. 0.0594
##
## sigma^2 estimated as 4.308: log likelihood=-97.02
## AIC=198.04 AICc=198.32 BIC=201.65
gg_tsresiduals(fit_auto)
fc_auto <- fit_auto |>
forecast(h = 10)
fc_auto |>
autoplot(aus_data) +
labs(title = "Forecasts ARIMA Model", y = "Passengers (millions)")
b.) Write the model in terms of the backshift operator. \((1−\phi_1B)(1−B)Y_t=(1+\theta_1B)e_t\)
c.) Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a.
fit_drift <- aus_airpassengers |>
model(ARIMA(Passengers ~ pdq(0, 1, 0)))
fc_drift <- fit_drift |>
forecast(h = 10)
fit_air <- aus_airpassengers |>
model(ARIMA(Passengers))
fc_air <- fit_air |>
forecast(h = 10)
fc_air <- fc_air|>
mutate(Model = "Auto ARIMA")
fc_drift <- fc_drift |>
mutate(Model = "ARIMA(0,1,0) with Drift")
bind_rows(fc_air, fc_drift) |>
autoplot(aus_airpassengers) +
facet_wrap(~Model) +
labs(title = "Forecast Comparison: Auto ARIMA vs ARIMA(0,1,0) with Drift",
y = "Passengers (millions)",
x = "Year"
) + theme(
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
)
d.) Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens.
fit_drift_2 <- aus_airpassengers |>
model(ARIMA(Passengers ~ pdq(2, 1, 2)))
fc_drift_2 <- fit_drift_2 |>
forecast(h=10)
fc_drift_2 <- fc_drift_2 |>
mutate(Model = "ARIMA(2,1,2)")
bind_rows(fc_air, fc_drift, fc_drift_2 ) |>
autoplot(aus_airpassengers) +
facet_wrap(~Model) +
labs(title = "Forecast Comparison: Auto ARIMA vs ARIMA(0,1,0) & ARIMA(2,1,2)",
y = "Passengers (millions)",
x = "Year"
) + theme(
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
)
e.) Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens?
library(fable)
fit_const <- aus_airpassengers |>
model(ARIMA(Passengers ~ pdq(0,2,1)))
fc_const <- fit_const |>
forecast(h=10)
fc_const |>
autoplot(aus_airpassengers) +
labs(title = "Australian Air with Constant", "Passengers (millions)")
a.) if necessary, find a suitable Box-Cox transformation for the data;
us_gdp <- global_economy |>
filter(Country == "United States")
us_gdp |>
autoplot(GDP) +
labs(title = "United States GDP", y = "GDP (Billions USD)")
us_gdp |>
features(GDP, features = guerrero)
## # A tibble: 1 × 2
## Country lambda_guerrero
## <fct> <dbl>
## 1 United States 0.282
lambda_us <- us_gdp |>
features(GDP, features = guerrero) |>
pull(lambda_guerrero)
us_gdp <- us_gdp |>
mutate(GDP_trans = box_cox(GDP, lambda_us))
b.) fit a suitable ARIMA model to the transformed data using ARIMA();
lambda <- us_gdp |>
features(GDP, features = guerrero) |>
pull(lambda_guerrero)
fit_arima <- us_gdp |>
model(ARIMA(box_cox(GDP, lambda)))
report(fit_arima)
## Series: GDP
## Model: ARIMA(1,1,0) w/ drift
## Transformation: box_cox(GDP, lambda)
##
## Coefficients:
## ar1 constant
## 0.4586 118.1822
## s.e. 0.1198 9.5047
##
## sigma^2 estimated as 5479: log likelihood=-325.32
## AIC=656.65 AICc=657.1 BIC=662.78
c.) try some other plausible models by experimenting with the orders chosen;
fit_ARIMA_test <- us_gdp|>
model(ARIMA(box_cox(GDP, lambda) ~ pdq(2,1,2)))
fit_ARIMA_1 <- us_gdp |>
model(ARIMA(box_cox(GDP, lambda) ~ pdq(1,1,1)))
glance(fit_arima, fit_ARIMA_test, fit_ARIMA_1)
## # A tibble: 1 × 9
## Country .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 United States ARIMA(box_co… 5479. -325. 657. 657. 663. <cpl> <cpl>
d.) choose what you think is the best model and check the residual diagnostics;
fit_arima |>
gg_tsresiduals()
fit_arima |>
augment() |>
features(.innov, ljung_box, lag = 10)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 United States ARIMA(box_cox(GDP, lambda)) 3.81 0.955
e.) produce forecasts of your fitted model. Do the forecasts look reasonable? Yes it does look reasonable because theres an upwards trend that is smooth and accelerating.
fc_arima <- fit_arima |>
forecast(h = 10)
fc_arima |>
autoplot(us_gdp) +
labs(title = "United States GDP Forecast (ARIMA Model)", y = "GDP (Billions of USD)")
f.) compare the results with what you would obtain using ETS() (with no transformation).
fit_ets <- us_gdp |>
model(ETS(GDP))
fc_ets <- fit_ets |>
forecast(h = 10)
fc_ets %>%
autoplot(us_gdp) +
labs(title = "United States GDP Forecast (ETS Model)", y = "GDP (Billions USD)")
glance(fit_arima, fit_ets)
## # A tibble: 1 × 9
## Country .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 United States ARIMA(box_co… 5479. -325. 657. 657. 663. <cpl> <cpl>