Load necessary libraries

library(rmarkdown)
library(knitr)
library(readxl)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(ggplot2)
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ fma       2.5     ✔ expsmooth 2.3
## 

— Question 1: Box-Cox Transformation —

Find optimal Box-Cox lambda values for variance stabilization

Lambda values close to 0 suggest log transformation, while values near 1 indicate no transformation needed.

lambda_usnetelec <- BoxCox.lambda(usnetelec)
lambda_usgdp <- BoxCox.lambda(usgdp)
lambda_mcopper <- BoxCox.lambda(mcopper)
lambda_enplanements <- BoxCox.lambda(enplanements)

print(lambda_usnetelec)
## [1] 0.5167714
print(lambda_usgdp)
## [1] 0.366352
print(lambda_mcopper)
## [1] 0.1919047
print(lambda_enplanements)
## [1] -0.2269461

— Question 2: Why Box-Cox is unhelpful for cangas? —

Answer: Box-Cox is unhelpful if variance is already stable. Since cangas likely has constant variance,

applying a transformation does not improve the model.

#— Question 3. What Box-Cox transformation would you select for your retail data (from Exercise 3 in # Section 2.10)?

Applying Box-Cox to retail data to determine if transformation is needed for variance stabilization.

Load the data from 2.10

retaildata <- read_excel("retail.xlsx", skip=1)
head(retaildata)
## # A tibble: 6 × 190
##   `Series ID`         A3349335T A3349627V A3349338X A3349398A A3349468W
##   <dttm>                  <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
## 1 1982-04-01 00:00:00      303.      41.7      63.9      409.      65.8
## 2 1982-05-01 00:00:00      298.      43.1      64        405.      65.8
## 3 1982-06-01 00:00:00      298       40.3      62.7      401       62.3
## 4 1982-07-01 00:00:00      308.      40.9      65.6      414.      68.2
## 5 1982-08-01 00:00:00      299.      42.1      62.6      404.      66  
## 6 1982-09-01 00:00:00      305.      42        64.4      412.      62.3
## # ℹ 184 more variables: A3349336V <dbl>, A3349337W <dbl>, A3349397X <dbl>,
## #   A3349399C <dbl>, A3349874C <dbl>, A3349871W <dbl>, A3349790V <dbl>,
## #   A3349556W <dbl>, A3349791W <dbl>, A3349401C <dbl>, A3349873A <dbl>,
## #   A3349872X <dbl>, A3349709X <dbl>, A3349792X <dbl>, A3349789K <dbl>,
## #   A3349555V <dbl>, A3349565X <dbl>, A3349414R <dbl>, A3349799R <dbl>,
## #   A3349642T <dbl>, A3349413L <dbl>, A3349564W <dbl>, A3349416V <dbl>,
## #   A3349643V <dbl>, A3349483V <dbl>, A3349722T <dbl>, A3349727C <dbl>, …
myts <- ts(retaildata[,"A3349873A"], frequency=12, start=c(1982,4))

# Plot the time series
autoplot(myts) + ggtitle("Retail Time Series Plot")

# Seasonal Plot
ggseasonplot(myts) + ggtitle("Seasonal Plot of Retail Data")

# Seasonal Subseries Plot
ggsubseriesplot(myts) + ggtitle("Seasonal Subseries Plot")

# Lag Plot to check autocorrelation
gglagplot(myts)

# Autocorrelation Function (ACF) Plot
ggAcf(myts)

lambda_retail <- BoxCox.lambda(myts)
print(lambda_retail)
## [1] 0.1276369

Intepretation

lambda_retail <- BoxCox.lambda(myts)
print(lambda_retail)
## [1] 0.1276369
# Apply transformation
myts_transformed <- BoxCox(myts, lambda_retail)

# Plot transformed data
autoplot(myts_transformed) + ggtitle("Transformed Retail Time Series")

— Question 4: Plot and Transformation for dole, usdeaths, bricksq —

Visualizing raw data and applying Box-Cox transformation if needed.

autoplot(dole) + ggtitle("Dole Data Plot")

autoplot(usdeaths) + ggtitle("US Deaths Data Plot")

autoplot(bricksq) + ggtitle("Brick Data Plot")

# Apply Box-Cox transformation if necessary
lambda_dole <- BoxCox.lambda(dole)
lambda_usdeaths <- BoxCox.lambda(usdeaths)
lambda_bricksq <- BoxCox.lambda(bricksq)

print(lambda_dole)
## [1] 0.3290922
print(lambda_usdeaths)
## [1] -0.03363775
print(lambda_bricksq)
## [1] 0.2548929
# Transform and plot if required
autoplot(BoxCox(dole, lambda_dole)) + ggtitle("Transformed Dole Data")

autoplot(BoxCox(usdeaths, lambda_usdeaths)) + ggtitle("Transformed US Deaths Data")

autoplot(BoxCox(bricksq, lambda_bricksq)) + ggtitle("Transformed Brick Data")

# ACF and Lag plots
ggAcf(dole) + ggtitle("Dole Data ACF Plot")

gglagplot(dole) + ggtitle("Dole Data Lag Plot")

ggAcf(usdeaths) + ggtitle("US Deaths ACF Plot")

gglagplot(usdeaths) + ggtitle("US Deaths Lag Plot")

ggAcf(bricksq) + ggtitle("Bricksq ACF Plot")

gglagplot(bricksq) + ggtitle("Bricksq Lag Plot")

— Question 5: Seasonal Naïve Forecast for Australian Beer Production —

The seasonal naïve method repeats the last observed seasonal pattern.

beer <- window(ausbeer, start=1992)
fc_beer <- snaive(beer)

Plot forecast

autoplot(fc_beer)

# Get residuals

res_beer <- residuals(fc_beer)

Check residuals and plot ACF

checkresiduals(fc_beer)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 32.269, df = 8, p-value = 8.336e-05
## 
## Model df: 0.   Total lags used: 8
ggAcf(res_beer) + ggtitle("Beer Production Residuals ACF Plot")

gglagplot(res_beer) + ggtitle("Beer Production Residuals Lag Plot")
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_path()`).

— Question 7: True or False Answers —

a) TRUE - Normally distributed residuals indicate a good model.

b) FALSE - Small residuals do not guarantee better forecasts.

c) FALSE - MAPE is not always the best accuracy measure.

d) FALSE - More complex models do not always improve forecasting.

e) FALSE - Other factors matter beyond test set accuracy.

— Question 8: Retail Time Series Forecasting —

Splitting data into training and test sets for model evaluation

myts.train <- window(myts, end=c(2010,12))
myts.test <- window(myts, start=2011)

Plot training vs. test set

autoplot(myts) +
  autolayer(myts.train, series="Training") +
  autolayer(myts.test, series="Test") +
  ggtitle("Training vs Test Data")

Forecast using seasonal naïve

fc_retail <- snaive(myts.train)
accuracy(fc_retail, myts.test)
##                     ME     RMSE      MAE       MPE      MAPE     MASE      ACF1
## Training set  7.772973 20.24576 15.95676  4.702754  8.109777 1.000000 0.7385090
## Test set     55.300000 71.44309 55.78333 14.900996 15.082019 3.495907 0.5315239
##              Theil's U
## Training set        NA
## Test set      1.297866
checkresiduals(fc_retail)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 624.45, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24
ggAcf(residuals(fc_retail)) + ggtitle("Retail Forecast Residuals ACF Plot")

gglagplot(residuals(fc_retail)) + ggtitle("Retail Forecast Residuals Lag Plot")
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_path()`).

— Question 9: Visitor Nights Forecasting —

Evaluating different training periods to measure forecast accuracy

train1 <- window(visnights[, "QLDMetro"], end=c(2015,4))
train2 <- window(visnights[, "QLDMetro"], end=c(2014,4))
train3 <- window(visnights[, "QLDMetro"], end=c(2013,4))

fc1 <- snaive(train1)
fc2 <- snaive(train2)
fc3 <- snaive(train3)

accuracy(fc1, visnights[, "QLDMetro"])
##                      ME      RMSE       MAE        MPE     MAPE      MASE
## Training set 0.02006107 1.0462821 0.8475553 -0.2237701 7.976760 1.0000000
## Test set     0.56983879 0.9358727 0.7094002  4.6191866 6.159821 0.8369957
##                    ACF1 Theil's U
## Training set 0.06014484        NA
## Test set     0.09003153 0.4842061
accuracy(fc2, visnights[, "QLDMetro"])
##                     ME      RMSE       MAE        MPE     MAPE      MASE
## Training set 0.0161876 1.0735582 0.8809432 -0.2744747 8.284216 1.0000000
## Test set     0.3669560 0.8516202 0.6644561  2.8431247 6.085501 0.7542553
##                      ACF1 Theil's U
## Training set  0.066108793        NA
## Test set     -0.002021023 0.5102527
accuracy(fc3, visnights[, "QLDMetro"])
##                        ME     RMSE       MAE        MPE     MAPE      MASE
## Training set -0.007455407 1.074544 0.8821694 -0.5625865 8.271365 1.0000000
## Test set      0.411850940 1.035878 0.8800104  4.4185560 8.635235 0.9975526
##                     ACF1 Theil's U
## Training set  0.07317746        NA
## Test set     -0.21876890 0.8365723

#Conclusion #This project took a deep dive into different forecasting techniques, from Box-Cox transformations to seasonal naïve forecasts and residual analysis. Along the way, we used ACF and lag plots to uncover patterns in the data, ensuring our models weren’t missing any hidden trends. By evaluating forecast accuracy using error metrics, we got a clearer picture of which models worked best for different datasets. Overall, this was a great exercise in understanding the importance of model diagnostics and making data-driven decisions in time series forecasting!