library(rmarkdown)
library(knitr)
library(readxl)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ fma 2.5 ✔ expsmooth 2.3
##
lambda_usnetelec <- BoxCox.lambda(usnetelec)
lambda_usgdp <- BoxCox.lambda(usgdp)
lambda_mcopper <- BoxCox.lambda(mcopper)
lambda_enplanements <- BoxCox.lambda(enplanements)
print(lambda_usnetelec)
## [1] 0.5167714
print(lambda_usgdp)
## [1] 0.366352
print(lambda_mcopper)
## [1] 0.1919047
print(lambda_enplanements)
## [1] -0.2269461
cangas
likely has constant variance,#— Question 3. What Box-Cox transformation would you select for your retail data (from Exercise 3 in # Section 2.10)?
retaildata <- read_excel("retail.xlsx", skip=1)
head(retaildata)
## # A tibble: 6 × 190
## `Series ID` A3349335T A3349627V A3349338X A3349398A A3349468W
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1982-04-01 00:00:00 303. 41.7 63.9 409. 65.8
## 2 1982-05-01 00:00:00 298. 43.1 64 405. 65.8
## 3 1982-06-01 00:00:00 298 40.3 62.7 401 62.3
## 4 1982-07-01 00:00:00 308. 40.9 65.6 414. 68.2
## 5 1982-08-01 00:00:00 299. 42.1 62.6 404. 66
## 6 1982-09-01 00:00:00 305. 42 64.4 412. 62.3
## # ℹ 184 more variables: A3349336V <dbl>, A3349337W <dbl>, A3349397X <dbl>,
## # A3349399C <dbl>, A3349874C <dbl>, A3349871W <dbl>, A3349790V <dbl>,
## # A3349556W <dbl>, A3349791W <dbl>, A3349401C <dbl>, A3349873A <dbl>,
## # A3349872X <dbl>, A3349709X <dbl>, A3349792X <dbl>, A3349789K <dbl>,
## # A3349555V <dbl>, A3349565X <dbl>, A3349414R <dbl>, A3349799R <dbl>,
## # A3349642T <dbl>, A3349413L <dbl>, A3349564W <dbl>, A3349416V <dbl>,
## # A3349643V <dbl>, A3349483V <dbl>, A3349722T <dbl>, A3349727C <dbl>, …
myts <- ts(retaildata[,"A3349873A"], frequency=12, start=c(1982,4))
# Plot the time series
autoplot(myts) + ggtitle("Retail Time Series Plot")
# Seasonal Plot
ggseasonplot(myts) + ggtitle("Seasonal Plot of Retail Data")
# Seasonal Subseries Plot
ggsubseriesplot(myts) + ggtitle("Seasonal Subseries Plot")
# Lag Plot to check autocorrelation
gglagplot(myts)
# Autocorrelation Function (ACF) Plot
ggAcf(myts)
lambda_retail <- BoxCox.lambda(myts)
print(lambda_retail)
## [1] 0.1276369
lambda_retail <- BoxCox.lambda(myts)
print(lambda_retail)
## [1] 0.1276369
# Apply transformation
myts_transformed <- BoxCox(myts, lambda_retail)
# Plot transformed data
autoplot(myts_transformed) + ggtitle("Transformed Retail Time Series")
autoplot(dole) + ggtitle("Dole Data Plot")
autoplot(usdeaths) + ggtitle("US Deaths Data Plot")
autoplot(bricksq) + ggtitle("Brick Data Plot")
# Apply Box-Cox transformation if necessary
lambda_dole <- BoxCox.lambda(dole)
lambda_usdeaths <- BoxCox.lambda(usdeaths)
lambda_bricksq <- BoxCox.lambda(bricksq)
print(lambda_dole)
## [1] 0.3290922
print(lambda_usdeaths)
## [1] -0.03363775
print(lambda_bricksq)
## [1] 0.2548929
# Transform and plot if required
autoplot(BoxCox(dole, lambda_dole)) + ggtitle("Transformed Dole Data")
autoplot(BoxCox(usdeaths, lambda_usdeaths)) + ggtitle("Transformed US Deaths Data")
autoplot(BoxCox(bricksq, lambda_bricksq)) + ggtitle("Transformed Brick Data")
# ACF and Lag plots
ggAcf(dole) + ggtitle("Dole Data ACF Plot")
gglagplot(dole) + ggtitle("Dole Data Lag Plot")
ggAcf(usdeaths) + ggtitle("US Deaths ACF Plot")
gglagplot(usdeaths) + ggtitle("US Deaths Lag Plot")
ggAcf(bricksq) + ggtitle("Bricksq ACF Plot")
gglagplot(bricksq) + ggtitle("Bricksq Lag Plot")
beer <- window(ausbeer, start=1992)
fc_beer <- snaive(beer)
autoplot(fc_beer)
# Get residuals
res_beer <- residuals(fc_beer)
checkresiduals(fc_beer)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 32.269, df = 8, p-value = 8.336e-05
##
## Model df: 0. Total lags used: 8
ggAcf(res_beer) + ggtitle("Beer Production Residuals ACF Plot")
gglagplot(res_beer) + ggtitle("Beer Production Residuals Lag Plot")
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_path()`).
myts.train <- window(myts, end=c(2010,12))
myts.test <- window(myts, start=2011)
autoplot(myts) +
autolayer(myts.train, series="Training") +
autolayer(myts.test, series="Test") +
ggtitle("Training vs Test Data")
fc_retail <- snaive(myts.train)
accuracy(fc_retail, myts.test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 7.772973 20.24576 15.95676 4.702754 8.109777 1.000000 0.7385090
## Test set 55.300000 71.44309 55.78333 14.900996 15.082019 3.495907 0.5315239
## Theil's U
## Training set NA
## Test set 1.297866
checkresiduals(fc_retail)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 624.45, df = 24, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 24
ggAcf(residuals(fc_retail)) + ggtitle("Retail Forecast Residuals ACF Plot")
gglagplot(residuals(fc_retail)) + ggtitle("Retail Forecast Residuals Lag Plot")
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_path()`).
train1 <- window(visnights[, "QLDMetro"], end=c(2015,4))
train2 <- window(visnights[, "QLDMetro"], end=c(2014,4))
train3 <- window(visnights[, "QLDMetro"], end=c(2013,4))
fc1 <- snaive(train1)
fc2 <- snaive(train2)
fc3 <- snaive(train3)
accuracy(fc1, visnights[, "QLDMetro"])
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02006107 1.0462821 0.8475553 -0.2237701 7.976760 1.0000000
## Test set 0.56983879 0.9358727 0.7094002 4.6191866 6.159821 0.8369957
## ACF1 Theil's U
## Training set 0.06014484 NA
## Test set 0.09003153 0.4842061
accuracy(fc2, visnights[, "QLDMetro"])
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0161876 1.0735582 0.8809432 -0.2744747 8.284216 1.0000000
## Test set 0.3669560 0.8516202 0.6644561 2.8431247 6.085501 0.7542553
## ACF1 Theil's U
## Training set 0.066108793 NA
## Test set -0.002021023 0.5102527
accuracy(fc3, visnights[, "QLDMetro"])
## ME RMSE MAE MPE MAPE MASE
## Training set -0.007455407 1.074544 0.8821694 -0.5625865 8.271365 1.0000000
## Test set 0.411850940 1.035878 0.8800104 4.4185560 8.635235 0.9975526
## ACF1 Theil's U
## Training set 0.07317746 NA
## Test set -0.21876890 0.8365723
#Conclusion #This project took a deep dive into different forecasting techniques, from Box-Cox transformations to seasonal naïve forecasts and residual analysis. Along the way, we used ACF and lag plots to uncover patterns in the data, ensuring our models weren’t missing any hidden trends. By evaluating forecast accuracy using error metrics, we got a clearer picture of which models worked best for different datasets. Overall, this was a great exercise in understanding the importance of model diagnostics and making data-driven decisions in time series forecasting!