9.1- A) The difference the figures can be based on the number of observation in th e white noise series. in Left(36) The ACF has large fluctionations at some lags, and some bounds are exceeded. Middle(360) ACF has much smaller spikes mostly staying in the confidence bounds, there seem to be a more stable structure which may be caused by increased sample size, there is also minimal fluctuation. Right(1000) the ACF is mostly flat all values are within the confidence bound, the larger sample minimize the chance of incorrect patterns, this aligns with theoretical property of white nose. Do all indicate that the data are white noise? Yes, all three plots indicate that the data are white noise. its harder to see in the plot left(36) because of the smaller sample size.

Why are the critical values at different distances from the mean of zero? Why are the autocorrelations different in each figure when they each refer to white noise? The critical values at different distances from the mean of zero because in the difference in sample sizes. The autocorrelations different in each figure when they each refer to white noise also because the size of the sample critical values get smaller as the samples get larger.

9.2- A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.

library(fpp3)

amazon_stock <- gafa_stock |>
  filter (Symbol == "AMZN")

amazon_stock |>
  autoplot(Close) + labs(title = "Amazon Closing Prices Daily", y = "Price (USD",
                         x = "Date")

amazon_stock |>
  ACF(Close) |>
  autoplot() + labs(title = "ACF of Amazon Closing Prices ")

amazon_stock |>
  PACF(Close) |>
  autoplot() + labs(title = "PACF of Amazon Closing Prices ")

9.3 For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.

a.) Turkish GDP from global_economy.

global_economy |>
  filter(Country == "Turkey") |>
  autoplot(GDP) +
  labs(title = "Turkey GDP", y = "GDP (Billions USD)")

Turkey_GDP <- global_economy |>
  filter(Country == "Turkey")

lambda_value <- Turkey_GDP |>
  features(GDP, features = guerrero) |>
  pull(lambda_guerrero)

Turkey_GDP <- Turkey_GDP |>
  mutate(trans_GDP = box_cox(GDP, lambda = lambda_value),
    GDP_diff = difference(trans_GDP))

Turkey_GDP |>
  autoplot(GDP_diff) +
  labs(title = "Turkey Differenced Transformed GDP", y = "Differenced GDP")

b.) Accommodation takings in the state of Tasmania from aus_accommodation.

aus_accommodation |>
  filter(State == "Tasmania") |>
  autoplot(Takings) +
  labs(title = "Accommodation Takings in Tasmania", y = "Takings (AUD)")

tas_accommod <- aus_accommodation |>
  filter(State == "Tasmania")

lambda_value <- tas_accommod |>
  features(Takings, features = guerrero) |>
  pull(lambda_guerrero)  # Correct column name

tas_accommod <- tas_accommod |>
  mutate(Takings_trans = box_cox(Takings, lambda = lambda_value),
    Takings_diff = difference(Takings_trans, lag = 12))

autoplot(tas_accommod, Takings_diff) +
  labs(title = "Seasonally Differenced Accommodation Takings", y = "Transformed & Differenced Takings (AUD)")

c.) Monthly sales from souvenirs.

souvenirs |>
  autoplot(Sales) +
  labs(title = "Souvenir Sales", y = "Sales (Units)")

souvenirs |>
  features(Sales, features = guerrero)

## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1         0.00212

lambda_value <- souvenirs |>
  features(Sales, features = guerrero) |>
  pull(lambda_guerrero)

souvenirs <- souvenirs |>
  mutate(Sales_trans = box_cox(Sales, lambda = lambda_value),
    Sales_diff = difference(Sales_trans, lag = 12))

autoplot(souvenirs, Sales_diff) +
  labs(title = "Differenced Souvenir Sales")

9.5 For your retail data (from Exercise 7 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.

retail_data <- aus_retail

retail_data |>
  filter(Industry == "Department stores") |>
  autoplot(Turnover) +
  labs(title = "Retail Turnover (Department Stores)", y = "Turnover (Million AUD)")

retail_data |>
  filter(Industry == "Department stores") |>
  features(Turnover, features = guerrero)

## # A tibble: 6 × 3
##   State                        Industry          lambda_guerrero
##   <chr>                        <chr>                       <dbl>
## 1 Australian Capital Territory Department stores          0.240 
## 2 New South Wales              Department stores          0.219 
## 3 Queensland                   Department stores          0.121 
## 4 South Australia              Department stores          0.0354
## 5 Victoria                     Department stores          0.173 
## 6 Western Australia            Department stores          0.0816

retail_diff1 <- retail_data |>
  filter(Industry == "Department stores") |>
  mutate(Turnover_diff1 = difference(Turnover))

autoplot(retail_diff1, Turnover_diff1) +
  labs(title = "First-order Retail Turnover Difference")

retail_diff_seasonal <- retail_data |>
  filter(Industry == "Department stores") |>
  mutate(Turnover_diff_seasonal = difference(Turnover, lag = 12))

autoplot(retail_diff_seasonal, Turnover_diff_seasonal) +
  labs(title = "Seasonally Retail Turnover Difference (Lag = 12)")

retail_diff_combined <- retail_data |>
  filter(Industry == "Department stores") |>
  mutate(Turnover_diff_combined = difference(difference(Turnover, lag = 12)))

autoplot(retail_diff_combined, Turnover_diff_combined) +
  labs(title = "First-order + Seasonal Retail Turnover Difference")

retail_diff_combined |>
  ACF(Turnover_diff_combined, na.rm = TRUE) |>
  autoplot() +
  labs(title = "ACF Retail Turnover Difference")

retail_diff_combined |>
  PACF(Turnover_diff_combined, na.rm = TRUE) |>
  autoplot() +
  labs(title = "PACF  Retail Turnover Difference ")

9.6 Simulate and plot some data from simple ARIMA models.

a.) Use the following R code to generate data from an AR(1) model with $\theta_1$= 0.6 and $\sigma^2$= 1.The process starts with $y_1$= 0.

y <- numeric(100) e <- rnorm(100) for(i in 2:100) y[i] <- 0.6*y[i-1] + e[i] sim <- tsibble(idx = seq_len(100), y = y, index = idx)

set.seed(123456) 
n <- 100
phi <- 0.6
sigma_square<- 1

e <- rnorm(n, mean = 0, sd = sqrt(sigma_square))

y <- numeric(n)
y[1] <- 0  

for (i in 2:n) {
  y[i] <- phi * y[i - 1] + e[i]
}
sim_ar <- tsibble(idx = seq_len(n), y = y, index = idx)

sim_ar|>
  autoplot(y) +
  labs(title = "Simulated AR(1) Process (ϕ_1 = 0.6)", y = "y", x = "Time")

b.)Produce a time plot for the series. How does the plot change as you change $\theta_1$?

theta <- 0.6

e <- rnorm(n, mean = 0, sd = sqrt(sigma_square))

y_ma <- numeric(n)
y_ma[1] <- e[1]

for (i in 2:n) {
  y_ma[i] <- e[i] + theta * e[i - 1]
}

sim_ma <- tsibble(idx = seq_len(n), y = y_ma, index = idx)

sim_ma |>
  autoplot(y) +
  labs(title = "Simulated MA(1) Process (θ1 = 0.6)", y = "y", x = "Time")

c.) Write your own code to generate data from an MA(1) model with $\theta_1$= 0.6 and $\sigma^2$= 1

n <- 100            
theta_tes <- 0.6        
sigma <- 1    

set.seed(123467)
e <- rnorm(n, mean = 0, sd = sigma)

y <- numeric(n)
y[1] <- e[1]  

for (i in 2:n) {
  y[i] <- e[i] + theta_tes * e[i - 1]
}

ma1_sim <- tsibble(
  time = 1:n,
  y = y,
  index = time
)

ma1_sim |>
  autoplot(y) +
  labs(title = "Simulated MA(1) Process", y = "Value", x = "Time")

d.) Produce a time plot for the series. How does the plot change as you change
$\theta_1$? It apears the when $\theta_1$ is 0 there is mostly white noise, when its positive there is more smoothness and when its negative there is more oscoscillating.

library(purrr)

## Warning: package 'purrr' was built under R version 4.3.3

simulate_ma1 <- function(theta1, n = 100, sigma = 1, seed = 123) {
  set.seed(seed)
  e <- rnorm(n, mean = 0, sd = sigma)
  y <- numeric(n)
  y[1] <- e[1]
  
  for (i in 2:n) {
    y[i] <- e[i] + theta1 * e[i - 1]
  }
  
  # Return a valid tsibble with theta1 as a key
  tsibble(
    time = 1:n,
    y = y,
    theta1 = factor(theta1),
    key = theta1,
    index = time
  )
}
# Try multiple theta values
theta_vals <- c(-0.9, -0.5, 0, 0.5, 0.9)

# Simulate for each and combine into one dataset
ma1_all <- map_dfr(theta_vals, simulate_ma1)

# Plot all series
ma1_all |>
  ggplot(aes(x = time, y = y)) +
  geom_line() +
  facet_wrap(~theta1, ncol = 1) +
  labs(
    title = "MA(1) Simulations with Different θ₁ Values",
    y = "Simulated Value",
    x = "Time"
  )

e.) Generate data from an ARMA(1,1) model with with $ _1=0.6$ $\theta_1$= 0.6 and $\sigma^2$= 1

phi <- 0.6
theta <- 0.6

e <- rnorm(n, mean = 0, sd = sqrt(sigma))

y_arma <- numeric(n)
y_arma[1] <- 0

for (i in 2:n) {
  y_arma[i] <- phi * y_arma[i - 1] + e[i] + theta * e[i - 1]
}

sim_ARMA_1 <- tsibble(idx = seq_len(n), y = y_arma, index = idx)

sim_ARMA_1

## # A tsibble: 100 x 2 [1]
##      idx      y
##    <int>  <dbl>
##  1     1  0    
##  2     2 -0.169
##  3     3 -0.194
##  4     4 -0.612
##  5     5 -1.53 
##  6     6 -1.53 
##  7     7 -1.73 
##  8     8 -3.18 
##  9     9 -3.29 
## 10    10 -1.28 
## # ℹ 90 more rows

sim_ARMA_1 |>
  autoplot(y) +
  labs(title = "Simulated ARMA(1,1) Process (ϕ1 = 0.6, θ1 = 0.6)", y = "y", x = "Time")

f.) Generate data from an AR(2) model with $ _1=0.8$ $\phi_2$= 0.3 and $\sigma^2$= 1 (Note that these parameters will give a non-stationary series.)

phi_1 <- -0.8
phi_2 <- 0.3

e <- rnorm(n, mean = 0, sd = sqrt(sigma))

y_ar2 <- numeric(n)
y_ar2[1] <- 0
y_ar2[2] <- e[2] 

for (i in 3:n) {
  y_ar2[i] <- phi_1 * y_ar2[i - 1] + phi_2 * y_ar2[i - 2] + e[i]
}

sim_AR2 <- tsibble(idx = seq_len(n), y = y_ar2, index = idx)

sim_AR2

## # A tsibble: 100 x 2 [1]
##      idx     y
##    <int> <dbl>
##  1     1  0   
##  2     2  1.31
##  3     3 -1.32
##  4     4  1.99
##  5     5 -2.40
##  6     6  2.04
##  7     7 -3.14
##  8     8  2.53
##  9     9 -1.32
## 10    10  1.76
## # ℹ 90 more rows

sim_AR2 |>
  autoplot(y) +
  labs(title = "Simulated AR(2) Process (ϕ1 = -0.8, ϕ2 = 0.3)", y = "y", x = "Time")

g.) Graph the latter two series and compare them.

plot1  <- sim_ARMA_1 |>
  autoplot(y) +
  labs(title = "Simulated ARMA(1,1)", y = "y", x = "Time")
plot2 <- sim_AR2 |>
  autoplot(y) +
  labs(title = "Simulated AR(2)", y = "y", x = "Time")
plot1

plot2

9.7 Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.

a.) Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods.

aus_data <- aus_airpassengers

fit_auto <- aus_data |>
  model(ARIMA(Passengers))

report(fit_auto)

## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8963
## s.e.   0.0594
## 
## sigma^2 estimated as 4.308:  log likelihood=-97.02
## AIC=198.04   AICc=198.32   BIC=201.65

gg_tsresiduals(fit_auto)

fc_auto <- fit_auto |>
  forecast(h = 10)

fc_auto |>
  autoplot(aus_data) +
  labs(title = "Forecasts ARIMA Model", y = "Passengers (millions)")

b.) Write the model in terms of the backshift operator. $(1−\phi_1B)(1−B)Y_t=(1+\theta_1B)e_t$

c.) Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a.

fit_drift <- aus_airpassengers |>
  model(ARIMA(Passengers ~ pdq(0, 1, 0)))

fc_drift <- fit_drift |>
  forecast(h = 10)

fit_air <- aus_airpassengers |>
  model(ARIMA(Passengers))  

fc_air <- fit_air |>
  forecast(h = 10)

fc_air <- fc_air|>
  mutate(Model = "Auto ARIMA")

fc_drift <- fc_drift |>
  mutate(Model = "ARIMA(0,1,0) with Drift")

bind_rows(fc_air, fc_drift) |>
  autoplot(aus_airpassengers) +
  facet_wrap(~Model) +
  labs(title = "Forecast Comparison: Auto ARIMA vs ARIMA(0,1,0) with Drift",
    y = "Passengers (millions)",
    x = "Year"
  ) + theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
  )

d.) Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens.

fit_drift_2 <- aus_airpassengers |>
  model(ARIMA(Passengers ~ pdq(2, 1, 2)))

fc_drift_2 <- fit_drift_2 |>
  forecast(h=10)

fc_drift_2 <- fc_drift_2 |>
  mutate(Model = "ARIMA(2,1,2)")

bind_rows(fc_air, fc_drift, fc_drift_2 ) |>
  autoplot(aus_airpassengers) +
  facet_wrap(~Model) +
  labs(title = "Forecast Comparison: Auto ARIMA vs ARIMA(0,1,0) & ARIMA(2,1,2)",
    y = "Passengers (millions)",
    x = "Year"
  ) + theme(
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
  )

e.) Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens?

library(fable)

fit_const <- aus_airpassengers |> 
 model(ARIMA(Passengers ~ pdq(0,2,1)))

fc_const <- fit_const |>
    forecast(h=10)

fc_const |> 
  autoplot(aus_airpassengers) +
  labs(title = "Australian Air  with Constant", "Passengers (millions)")

9.8 For the United States GDP series (from global_economy):

a.) if necessary, find a suitable Box-Cox transformation for the data;

us_gdp <- global_economy |>
  filter(Country == "United States")

us_gdp |>
  autoplot(GDP) +
  labs(title = "United States GDP", y = "GDP (Billions USD)")

us_gdp |>
  features(GDP, features = guerrero)

## # A tibble: 1 × 2
##   Country       lambda_guerrero
##   <fct>                   <dbl>
## 1 United States           0.282

lambda_us <- us_gdp |>
  features(GDP, features = guerrero) |>
  pull(lambda_guerrero)

us_gdp <- us_gdp |>
  mutate(GDP_trans = box_cox(GDP, lambda_us))

b.) fit a suitable ARIMA model to the transformed data using ARIMA();

lambda <- us_gdp |>
  features(GDP, features = guerrero) |>
  pull(lambda_guerrero)

fit_arima <- us_gdp |>
  model(ARIMA(box_cox(GDP, lambda)))

report(fit_arima)

## Series: GDP 
## Model: ARIMA(1,1,0) w/ drift 
## Transformation: box_cox(GDP, lambda) 
## 
## Coefficients:
##          ar1  constant
##       0.4586  118.1822
## s.e.  0.1198    9.5047
## 
## sigma^2 estimated as 5479:  log likelihood=-325.32
## AIC=656.65   AICc=657.1   BIC=662.78

c.) try some other plausible models by experimenting with the orders chosen;

fit_ARIMA_test <- us_gdp|>
  model(ARIMA(box_cox(GDP, lambda) ~ pdq(2,1,2)))

fit_ARIMA_1 <- us_gdp |>
  model(ARIMA(box_cox(GDP, lambda) ~ pdq(1,1,1)))

glance(fit_arima, fit_ARIMA_test, fit_ARIMA_1)

## # A tibble: 1 × 9
##   Country       .model        sigma2 log_lik   AIC  AICc   BIC ar_roots ma_roots
##   <fct>         <chr>          <dbl>   <dbl> <dbl> <dbl> <dbl> <list>   <list>  
## 1 United States ARIMA(box_co…  5479.   -325.  657.  657.  663. <cpl>    <cpl>

d.) choose what you think is the best model and check the residual diagnostics;

fit_arima |>
  gg_tsresiduals()

fit_arima |>
  augment() |>
  features(.innov, ljung_box, lag = 10)

## # A tibble: 1 × 4
##   Country       .model                      lb_stat lb_pvalue
##   <fct>         <chr>                         <dbl>     <dbl>
## 1 United States ARIMA(box_cox(GDP, lambda))    3.81     0.955

e.) produce forecasts of your fitted model. Do the forecasts look reasonable? Yes it does look reasonable because theres an upwards trend that is smooth and accelerating.

fc_arima <- fit_arima |>
  forecast(h = 10)

fc_arima |>
  autoplot(us_gdp) +
  labs(title = "United States GDP Forecast (ARIMA Model)", y = "GDP (Billions of USD)")

f.) compare the results with what you would obtain using ETS() (with no transformation).

fit_ets <- us_gdp |>
  model(ETS(GDP))

fc_ets <- fit_ets |>
  forecast(h = 10)

fc_ets %>%
  autoplot(us_gdp) +
  labs(title = "United States GDP Forecast (ETS Model)", y = "GDP (Billions USD)")

glance(fit_arima, fit_ets)

## # A tibble: 1 × 9
##   Country       .model        sigma2 log_lik   AIC  AICc   BIC ar_roots ma_roots
##   <fct>         <chr>          <dbl>   <dbl> <dbl> <dbl> <dbl> <list>   <list>  
## 1 United States ARIMA(box_co…  5479.   -325.  657.  657.  663. <cpl>    <cpl>

Homework #6 -Data624

Michael Robinson

2025-03-23