# QED ==================
# Homework: Download the historical series of the Cass Shipments Index
# https://www.cassinfo.com/freight-audit-payment/cass-transportation-indexes/august-2024?utm_campaign=FRT%20-%20Cass%20Transportation%20Indexes%20May%202022&utm_medium=email&_hsenc=p2ANqtz-8JYe5_jfjHADymv2XZf6H2quNPw_RhSzGg09hTGXGmc5wEjxnziw1X5M4RlZ2Z7oyCj-l_Nqg853oAaZ5seFN5HZbajQ&_hsmi=324823760&utm_content=324823760&utm_source=hs_email
# Fit an ARIMA model. forecast 18 months. plot. upload the plot to bucket CASS.
# Due Friday 27
# what does it portend for the near future?
# Load required libraries
suppressWarnings({
suppressPackageStartupMessages({
library(tidyverse)
library(lubridate)
library(ggthemes)
library(cowplot)
library(tsbox) #convert xts to tsibble
library(TSstudio)
library(tsutils)
library(rio)
library(readxl)
library(forecast)
library(forecastHybrid)
library(randomForest)
library(e1071) #for SVMs
library(pacman)
library(quantmod)
library(TSstudio)
library(lubridate)
library(dygraphs)
library(tseries)
})
})
# Loading the Cass Shipments Index data (assuming the file is in your working directory)
mydata <- read_excel("Cass Indexes Historical Data.xlsx",
sheet = "Freight Index-Shipments",
range = "B7:C423",
col_names = TRUE)
# Inspect the first few rows
head(mydata)
## # A tibble: 6 × 2
## Month `Index Value`
## <dttm> <dbl>
## 1 1990-01-01 00:00:00 1
## 2 1990-02-01 00:00:00 0.999
## 3 1990-03-01 00:00:00 1
## 4 1990-04-01 00:00:00 0.979
## 5 1990-05-01 00:00:00 0.973
## 6 1990-06-01 00:00:00 0.966
# Converting Month column to Date format and create time series
mydata <- mydata %>%
mutate(Month = as.Date(Month))
# Converting the index data to a time series object
cass_ts <- ts(mydata$`Index Value`, start = c(1990, 1), frequency = 12)
# Plotting the original data
autoplot(cass_ts) + ggtitle("Satya's Cass Shipments Index") + xlab("Year") + ylab("Index Value")
# Spliting the data into training and testing sets (18 months for forecasting)
h <- 18
split_ts_cass <- ts_split(cass_ts, sample.out = h)
train_cass <- split_ts_cass$train
test_cass <- split_ts_cass$test
Modeling and forecasting using the manual and auto arima model
# Fitting ARIMA model manually
fit_arima <- Arima(train_cass, order = c(0,1,1), seasonal = c(2,0,0))
# adding Summary of the ARIMA model
summary(fit_arima)
## Series: train_cass
## ARIMA(0,1,1)(2,0,0)[12]
##
## Coefficients:
## ma1 sar1 sar2
## -0.2091 0.2869 0.1886
## s.e. 0.0504 0.0489 0.0500
##
## sigma^2 = 0.001547: log likelihood = 721.49
## AIC=-1434.98 AICc=-1434.87 BIC=-1419.04
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0003391452 0.03913587 0.02969092 -0.03303902 2.746156 0.4327076
## ACF1
## Training set 0.001857902
# Forecasting for the next 18 months
forecast_arima <- forecast(fit_arima, h = h)
# Plotting the forecast against the actual data
autoplot(forecast_arima) +
autolayer(train_cass, series = "Training Data") +
autolayer(test_cass, series = "Test Data") +
ggtitle("Satya's ARIMA Forecast vs Actual Data") +
xlab("Year") +
ylab("Index Value") +
theme_minimal()
# Check residuals for model adequacy
checkresiduals(fit_arima)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(2,0,0)[12]
## Q* = 21.188, df = 21, p-value = 0.4475
##
## Model df: 3. Total lags used: 24
# Forecast accuracy on the test set
accuracy(forecast_arima, test_cass)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0003391452 0.03913587 0.02969092 -0.03303902 2.746156 0.4327076
## Test set -0.0705995191 0.07483918 0.07059952 -6.38742450 6.387424 1.0288988
## ACF1 Theil's U
## Training set 0.001857902 NA
## Test set 0.527354711 2.546548
# Compare different models: Auto ARIMA and Holt-Winters
# Auto ARIMA model
fit_auto_arima <- auto.arima(train_cass)
summary(fit_auto_arima)
## Series: train_cass
## ARIMA(0,1,1)(2,0,0)[12]
##
## Coefficients:
## ma1 sar1 sar2
## -0.2091 0.2869 0.1886
## s.e. 0.0504 0.0489 0.0500
##
## sigma^2 = 0.001547: log likelihood = 721.49
## AIC=-1434.98 AICc=-1434.87 BIC=-1419.04
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0003391452 0.03913587 0.02969092 -0.03303902 2.746156 0.4327076
## ACF1
## Training set 0.001857902
forecast_auto_arima <- forecast(fit_auto_arima, h = h)
# Holt-Winters model
fit_hw <- HoltWinters(train_cass)
## Warning in HoltWinters(train_cass): optimization difficulties: ERROR:
## ABNORMAL_TERMINATION_IN_LNSRCH
summary(fit_hw)
## Length Class Mode
## fitted 1544 mts numeric
## x 398 ts numeric
## alpha 1 -none- numeric
## beta 1 -none- numeric
## gamma 1 -none- numeric
## coefficients 14 -none- numeric
## seasonal 1 -none- character
## SSE 1 -none- numeric
## call 2 -none- call
forecast_hw <- forecast(fit_hw, h = h)
# Plot comparison of ARIMA, Auto ARIMA, and Holt-Winters models
g1 <- autoplot(forecast_arima) + autolayer(train_cass) + autolayer(test_cass)
g2 <- autoplot(forecast_auto_arima) + autolayer(train_cass) + autolayer(test_cass)
g3 <- autoplot(forecast_hw) + autolayer(train_cass) + autolayer(test_cass)
cowplot::plot_grid(g1 + ggtitle("Satya's Manual ARIMA"),
g2 + ggtitle("Satya's Auto ARIMA"),
g3 + ggtitle("Satya's Holt-Winters"),
ncol = 1)
# Accuracy comparison and plotting the forecast vs actual
autoplot(forecast_hw) +
autolayer(train_cass, series = "Training Data") +
autolayer(test_cass, series = "Test Data") +
ggtitle("Satya's Holt-Winters Forecast vs Actual Data") +
xlab("Year") +
ylab("Index Value") +
theme_minimal()
# Calculate accuracy using the forecast object, comparing with test data
accuracy(forecast_hw, test_cass)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.00286887 0.04066238 0.03111358 0.2112648 2.877347 0.4534412
## Test set -0.04716643 0.05260233 0.04716643 -4.2779168 4.277917 0.6873912
## ACF1 Theil's U
## Training set 0.04881095 NA
## Test set 0.64179809 1.784777
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
autoplot(forecast_hw) +
autolayer(train_cass, series = "Training Data") +
autolayer(test_cass, series = "Test Data") +
ggtitle("Holt-Winters Forecast vs Actual Data") +
xlab("Year") +
ylab("Index Value") +
theme_minimal()
summary(forecast_hw)
##
## Forecast method: HoltWinters
##
## Model Information:
## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = train_cass)
##
## Smoothing parameters:
## alpha: 0.7806181
## beta : 0.005748409
## gamma: 0.3446114
##
## Coefficients:
## [,1]
## a 1.1550541181
## b -0.0002334739
## s1 0.0337202845
## s2 0.0106405370
## s3 0.0339269398
## s4 0.0165404573
## s5 -0.0035369649
## s6 0.0139345045
## s7 0.0174943974
## s8 0.0069579928
## s9 0.0037015421
## s10 -0.0195232914
## s11 -0.0450511148
## s12 0.0149146306
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.00286887 0.04066238 0.03111358 0.2112648 2.877347 0.4534412
## ACF1
## Training set 0.04881095
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Mar 2023 1.188541 1.1364924 1.240589 1.1089396 1.268142
## Apr 2023 1.165228 1.0990546 1.231401 1.0640246 1.266431
## May 2023 1.188281 1.1103842 1.266177 1.0691483 1.307413
## Jun 2023 1.170661 1.0824786 1.258843 1.0357978 1.305524
## Jul 2023 1.150350 1.0528630 1.247837 1.0012566 1.299443
## Aug 2023 1.167588 1.0615183 1.273657 1.0053685 1.329807
## Sep 2023 1.170914 1.0568205 1.285008 0.9964230 1.345405
## Oct 2023 1.160144 1.0384739 1.281815 0.9740655 1.346223
## Nov 2023 1.156654 1.0277753 1.285533 0.9595509 1.353758
## Dec 2023 1.133196 0.9974174 1.268975 0.9255406 1.340852
## Jan 2024 1.107435 0.9650203 1.249849 0.8896306 1.325239
## Feb 2024 1.167167 1.0183448 1.315989 0.9395631 1.394771
## Mar 2024 1.185739 1.0295607 1.341918 0.9468847 1.424594
## Apr 2024 1.162426 1.0002579 1.324594 0.9144113 1.410441
## May 2024 1.185479 1.0174739 1.353484 0.9285374 1.442420
## Jun 2024 1.167859 0.9941539 1.341564 0.9022000 1.433518
## Jul 2024 1.147548 0.9682665 1.326830 0.8733606 1.421736
## Aug 2024 1.164786 0.9800401 1.349532 0.8822415 1.447331
checkresiduals(forecast_hw)
##
## Ljung-Box test
##
## data: Residuals from HoltWinters
## Q* = 89.252, df = 24, p-value = 1.917e-09
##
## Model df: 0. Total lags used: 24
accuracy(forecast_hw, test_cass)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.00286887 0.04066238 0.03111358 0.2112648 2.877347 0.4534412
## Test set -0.04716643 0.05260233 0.04716643 -4.2779168 4.277917 0.6873912
## ACF1 Theil's U
## Training set 0.04881095 NA
## Test set 0.64179809 1.784777
print("For the near future, the Holt-Winters model predicts that the Cass Shipments Index will likely stay consistent without major increases or decreases. However, it will continue showing seasonal ups and downs similar to what we've seen in the past. The forecast fits the data reasonably well, but it may not be entirely precise, especially further out in time. You can expect the index to fluctuate seasonally, but with some room for deviation from the forecasted values.")
## [1] "For the near future, the Holt-Winters model predicts that the Cass Shipments Index will likely stay consistent without major increases or decreases. However, it will continue showing seasonal ups and downs similar to what we've seen in the past. The forecast fits the data reasonably well, but it may not be entirely precise, especially further out in time. You can expect the index to fluctuate seasonally, but with some room for deviation from the forecasted values."