The below code covers the Loading libraries and preparing the data for the modeling

# Load required libraries
suppressWarnings({
  suppressPackageStartupMessages({
    library(tidyverse)
    library(lubridate)
    library(ggthemes)
    library(cowplot)
    library(tsbox) #convert xts to tsibble
    library(TSstudio)
    library(tsutils)
    library(rio)
    library(readxl)
    library(forecast)
    library(forecastHybrid)
    library(randomForest)
    library(e1071)  #for SVMs
    library(pacman)
    library(quantmod)
    library(TSstudio)
    library(lubridate)
    library(dygraphs)
    library(tseries)
    
  })
})

# Loading the Cass Shipments Index data (assuming the file is in your working directory)
mydata <- read_excel("Cass Indexes Historical Data.xlsx", 
                     sheet = "Freight Index-Shipments", 
                     range = "B7:C423",
                     col_names = TRUE)

# Inspect the first few rows
head(mydata)

## # A tibble: 6 × 2
##   Month               `Index Value`
##   <dttm>                      <dbl>
## 1 1990-01-01 00:00:00         1    
## 2 1990-02-01 00:00:00         0.999
## 3 1990-03-01 00:00:00         1    
## 4 1990-04-01 00:00:00         0.979
## 5 1990-05-01 00:00:00         0.973
## 6 1990-06-01 00:00:00         0.966

# Converting Month column to Date format and create time series
mydata <- mydata %>%
  mutate(Month = as.Date(Month))

# Converting the index data to a time series object
cass_ts <- ts(mydata$`Index Value`, start = c(1990, 1), frequency = 12)

# Plotting the original data
autoplot(cass_ts) + ggtitle("Satya's Cass Shipments Index") + xlab("Year") + ylab("Index Value")

# Spliting the data into training and testing sets (18 months for forecasting)
h <- 18
split_ts_cass <- ts_split(cass_ts, sample.out = h)
train_cass <- split_ts_cass$train
test_cass <- split_ts_cass$test

Including Plots

Modeling and forecasting using the manual and auto arima model

# Fitting ARIMA model manually
fit_arima <- Arima(train_cass, order = c(0,1,1), seasonal = c(2,0,0))

# adding Summary of the ARIMA model
summary(fit_arima)

## Series: train_cass 
## ARIMA(0,1,1)(2,0,0)[12] 
## 
## Coefficients:
##           ma1    sar1    sar2
##       -0.2091  0.2869  0.1886
## s.e.   0.0504  0.0489  0.0500
## 
## sigma^2 = 0.001547:  log likelihood = 721.49
## AIC=-1434.98   AICc=-1434.87   BIC=-1419.04
## 
## Training set error measures:
##                        ME       RMSE        MAE         MPE     MAPE      MASE
## Training set 0.0003391452 0.03913587 0.02969092 -0.03303902 2.746156 0.4327076
##                     ACF1
## Training set 0.001857902

# Forecasting for the next 18 months
forecast_arima <- forecast(fit_arima, h = h)

# Plotting the forecast against the actual data
autoplot(forecast_arima) + 
  autolayer(train_cass, series = "Training Data") + 
  autolayer(test_cass, series = "Test Data") +
  ggtitle("Satya's ARIMA Forecast vs Actual Data") + 
  xlab("Year") + 
  ylab("Index Value") +
  theme_minimal()

# Check residuals for model adequacy
checkresiduals(fit_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)(2,0,0)[12]
## Q* = 21.188, df = 21, p-value = 0.4475
## 
## Model df: 3.   Total lags used: 24

# Forecast accuracy on the test set
accuracy(forecast_arima, test_cass)

##                         ME       RMSE        MAE         MPE     MAPE      MASE
## Training set  0.0003391452 0.03913587 0.02969092 -0.03303902 2.746156 0.4327076
## Test set     -0.0705995191 0.07483918 0.07059952 -6.38742450 6.387424 1.0288988
##                     ACF1 Theil's U
## Training set 0.001857902        NA
## Test set     0.527354711  2.546548

# Compare different models: Auto ARIMA and Holt-Winters

# Auto ARIMA model
fit_auto_arima <- auto.arima(train_cass)
summary(fit_auto_arima)

## Series: train_cass 
## ARIMA(0,1,1)(2,0,0)[12] 
## 
## Coefficients:
##           ma1    sar1    sar2
##       -0.2091  0.2869  0.1886
## s.e.   0.0504  0.0489  0.0500
## 
## sigma^2 = 0.001547:  log likelihood = 721.49
## AIC=-1434.98   AICc=-1434.87   BIC=-1419.04
## 
## Training set error measures:
##                        ME       RMSE        MAE         MPE     MAPE      MASE
## Training set 0.0003391452 0.03913587 0.02969092 -0.03303902 2.746156 0.4327076
##                     ACF1
## Training set 0.001857902

forecast_auto_arima <- forecast(fit_auto_arima, h = h)

# Holt-Winters model
fit_hw <- HoltWinters(train_cass)

## Warning in HoltWinters(train_cass): optimization difficulties: ERROR:
## ABNORMAL_TERMINATION_IN_LNSRCH

summary(fit_hw)

##              Length Class  Mode     
## fitted       1544   mts    numeric  
## x             398   ts     numeric  
## alpha           1   -none- numeric  
## beta            1   -none- numeric  
## gamma           1   -none- numeric  
## coefficients   14   -none- numeric  
## seasonal        1   -none- character
## SSE             1   -none- numeric  
## call            2   -none- call

forecast_hw <- forecast(fit_hw, h = h)

# Plot comparison of ARIMA, Auto ARIMA, and Holt-Winters models
g1 <- autoplot(forecast_arima) + autolayer(train_cass) + autolayer(test_cass)
g2 <- autoplot(forecast_auto_arima) + autolayer(train_cass) + autolayer(test_cass)
g3 <- autoplot(forecast_hw) + autolayer(train_cass) + autolayer(test_cass)

cowplot::plot_grid(g1 + ggtitle("Satya's Manual ARIMA"), 
                   g2 + ggtitle("Satya's Auto ARIMA"), 
                   g3 + ggtitle("Satya's Holt-Winters"), 
                   ncol = 1)

# Accuracy comparison and plotting the forecast vs actual
autoplot(forecast_hw) + 
  autolayer(train_cass, series = "Training Data") + 
  autolayer(test_cass, series = "Test Data") +
  ggtitle("Satya's Holt-Winters Forecast vs Actual Data") + 
  xlab("Year") + 
  ylab("Index Value") +
  theme_minimal()

# Calculate accuracy using the forecast object, comparing with test data
accuracy(forecast_hw, test_cass)

##                       ME       RMSE        MAE        MPE     MAPE      MASE
## Training set  0.00286887 0.04066238 0.03111358  0.2112648 2.877347 0.4534412
## Test set     -0.04716643 0.05260233 0.04716643 -4.2779168 4.277917 0.6873912
##                    ACF1 Theil's U
## Training set 0.04881095        NA
## Test set     0.64179809  1.784777

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

autoplot(forecast_hw) + 
  autolayer(train_cass, series = "Training Data") + 
  autolayer(test_cass, series = "Test Data") +
  ggtitle("Holt-Winters Forecast vs Actual Data") + 
  xlab("Year") + 
  ylab("Index Value") +
  theme_minimal()

summary(forecast_hw)

## 
## Forecast method: HoltWinters
## 
## Model Information:
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = train_cass)
## 
## Smoothing parameters:
##  alpha: 0.7806181
##  beta : 0.005748409
##  gamma: 0.3446114
## 
## Coefficients:
##              [,1]
## a    1.1550541181
## b   -0.0002334739
## s1   0.0337202845
## s2   0.0106405370
## s3   0.0339269398
## s4   0.0165404573
## s5  -0.0035369649
## s6   0.0139345045
## s7   0.0174943974
## s8   0.0069579928
## s9   0.0037015421
## s10 -0.0195232914
## s11 -0.0450511148
## s12  0.0149146306
## 
## Error measures:
##                      ME       RMSE        MAE       MPE     MAPE      MASE
## Training set 0.00286887 0.04066238 0.03111358 0.2112648 2.877347 0.4534412
##                    ACF1
## Training set 0.04881095
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Mar 2023       1.188541 1.1364924 1.240589 1.1089396 1.268142
## Apr 2023       1.165228 1.0990546 1.231401 1.0640246 1.266431
## May 2023       1.188281 1.1103842 1.266177 1.0691483 1.307413
## Jun 2023       1.170661 1.0824786 1.258843 1.0357978 1.305524
## Jul 2023       1.150350 1.0528630 1.247837 1.0012566 1.299443
## Aug 2023       1.167588 1.0615183 1.273657 1.0053685 1.329807
## Sep 2023       1.170914 1.0568205 1.285008 0.9964230 1.345405
## Oct 2023       1.160144 1.0384739 1.281815 0.9740655 1.346223
## Nov 2023       1.156654 1.0277753 1.285533 0.9595509 1.353758
## Dec 2023       1.133196 0.9974174 1.268975 0.9255406 1.340852
## Jan 2024       1.107435 0.9650203 1.249849 0.8896306 1.325239
## Feb 2024       1.167167 1.0183448 1.315989 0.9395631 1.394771
## Mar 2024       1.185739 1.0295607 1.341918 0.9468847 1.424594
## Apr 2024       1.162426 1.0002579 1.324594 0.9144113 1.410441
## May 2024       1.185479 1.0174739 1.353484 0.9285374 1.442420
## Jun 2024       1.167859 0.9941539 1.341564 0.9022000 1.433518
## Jul 2024       1.147548 0.9682665 1.326830 0.8733606 1.421736
## Aug 2024       1.164786 0.9800401 1.349532 0.8822415 1.447331

checkresiduals(forecast_hw)

## 
##  Ljung-Box test
## 
## data:  Residuals from HoltWinters
## Q* = 89.252, df = 24, p-value = 1.917e-09
## 
## Model df: 0.   Total lags used: 24

accuracy(forecast_hw, test_cass)

##                       ME       RMSE        MAE        MPE     MAPE      MASE
## Training set  0.00286887 0.04066238 0.03111358  0.2112648 2.877347 0.4534412
## Test set     -0.04716643 0.05260233 0.04716643 -4.2779168 4.277917 0.6873912
##                    ACF1 Theil's U
## Training set 0.04881095        NA
## Test set     0.64179809  1.784777

print("For the near future, the Holt-Winters model predicts that the Cass Shipments Index will likely stay consistent without major increases or decreases. However, it will continue showing seasonal ups and downs similar to what we've seen in the past. The forecast fits the data reasonably well, but it may not be entirely precise, especially further out in time. You can expect the index to fluctuate seasonally, but with some room for deviation from the forecasted values.")

## [1] "For the near future, the Holt-Winters model predicts that the Cass Shipments Index will likely stay consistent without major increases or decreases. However, it will continue showing seasonal ups and downs similar to what we've seen in the past. The forecast fits the data reasonably well, but it may not be entirely precise, especially further out in time. You can expect the index to fluctuate seasonally, but with some room for deviation from the forecasted values."

ARIMA_CASS_Model

Satya Panda

2024-09-24

R Markdown

The below code covers the Loading libraries and preparing the data for the modeling

Including Plots