Comparing Models

Consider the following three models:
Model 1 is an AR(1);
Model 2 has AR terms at lags 1 and 8 only;
Model 3 has AR terms at lags 1 and 8, and an MA term at lag 8.
For each model, perform regression and the ACF, PACF and Q-Statistics for the residuals

Model 1: AR(1)

library(forecast)
library(tseries)
library(TSA)

data <- read.csv("M3_Time_series data/module3_data_PE_Ratios.csv")
data$date <- as.Date(data$date)
data_ts <- ts(na.omit(data$pe_ind), frequency = 12)  # Monthly data

model1 <- arima(data_ts, order = c(1, 0, 0), method = "ML")
summary(model1)
## 
## Call:
## arima(x = data_ts, order = c(1, 0, 0), method = "ML")
## 
## Coefficients:
##          ar1  intercept
##       0.9079    17.3202
## s.e.  0.0300     1.0187
## 
## sigma^2 estimated as 1.771:  log likelihood = -311.14,  aic = 626.28
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN


Residuals diagnostics for Model 1

acf(residuals(model1), main = "Model 1 Residual ACF")

pacf(residuals(model1), main = "Model 1 Residual PACF")


Model 2: AR(1) + AR(8)

model2 <- arima(data_ts,
                order = c(8, 0, 0),
                fixed = c(NA, 0, 0, 0, 0, 0, 0, NA,  # AR lags 1–8
                          NA))                      # Constant (intercept)
## Warning in stats::arima(x = x, order = order, seasonal = seasonal, xreg = xreg,
## : some AR parameters were fixed: setting transform.pars = FALSE
summary(model2)
## 
## Call:
## arima(x = data_ts, order = c(8, 0, 0), fixed = c(NA, 0, 0, 0, 0, 0, 0, NA, NA))
## 
## Coefficients:
##          ar1  ar2  ar3  ar4  ar5  ar6  ar7      ar8  intercept
##       0.9342    0    0    0    0    0    0  -0.0744    17.2535
## s.e.  0.0319    0    0    0    0    0    0   0.0324     0.6850
## 
## sigma^2 estimated as 1.72:  log likelihood = -308.57,  aic = 623.14
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN


Residuals diagnostics for Model 2

acf(residuals(model2), main = "Model 2 Residual ACF")

pacf(residuals(model2), main = "Model 2 Residual PACF")


Model 3: AR(1) + AR(8) + MA(8)

model3 <- arima(data_ts,
                order = c(8, 0, 8),
                fixed = c(
                  NA, 0, 0, 0, 0, 0, 0, NA,     # AR(1 and 8)
                  0, 0, 0, 0, 0, 0, 0, NA,     # MA(8)
                  NA                           # Constant term
                ))
## Warning in stats::arima(x = x, order = order, seasonal = seasonal, xreg = xreg,
## : some AR parameters were fixed: setting transform.pars = FALSE
summary(model3)
## 
## Call:
## arima(x = data_ts, order = c(8, 0, 8), fixed = c(NA, 0, 0, 0, 0, 0, 0, NA, 0, 
##     0, 0, 0, 0, 0, 0, NA, NA))
## 
## Coefficients:
##          ar1  ar2  ar3  ar4  ar5  ar6  ar7      ar8  ma1  ma2  ma3  ma4  ma5
##       0.9505    0    0    0    0    0    0  -0.0426    0    0    0    0    0
## s.e.  0.0314    0    0    0    0    0    0   0.0331    0    0    0    0    0
##       ma6  ma7      ma8  intercept
##         0    0  -0.2441    17.2931
## s.e.    0    0   0.0772     0.7631
## 
## sigma^2 estimated as 1.636:  log likelihood = -304.25,  aic = 616.5
## 
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN


Residuals diagnostics for Model 3

acf(residuals(model3), main = "Model 3 Residual ACF")

pacf(residuals(model3), main = "Model 3 Residual PACF")

Best Model?

  1. AIC/ BIC -> Lower is better.
  2. SSR -> Lower is better.
  3. Q-test (Ljung-Box Test) -> p > 0.05 -> Fail to reject. Residuals are not autocorrelated (white noise)

# Calculate SSRs
ssr_model1 <- sum(residuals(model1)^2, na.rm = TRUE)
ssr_model2 <- sum(residuals(model2)^2, na.rm = TRUE)
ssr_model3 <- sum(residuals(model3)^2, na.rm = TRUE)

# Create comparison table
Compare <- data.frame(
  Model = c("Model 1: AR(1)", "Model 2: AR(1)+AR(8)", "Model 3: AR(1,8)+MA(8)"),
  AIC = c(model1$aic, model2$aic, model3$aic),
  SSR = c(ssr_model1, ssr_model2, ssr_model3),
  LjungBox_pval = c(
    Box.test(residuals(model1), lag = 10, type = "Ljung-Box")$p.value,
    Box.test(residuals(model2), lag = 10, type = "Ljung-Box")$p.value,
    Box.test(residuals(model3), lag = 10, type = "Ljung-Box")$p.value
  )
)

print(Compare)
##                    Model      AIC      SSR LjungBox_pval
## 1         Model 1: AR(1) 626.2762 322.3615     0.1096526
## 2   Model 2: AR(1)+AR(8) 623.1420 313.1144     0.2839260
## 3 Model 3: AR(1,8)+MA(8) 616.4953 297.8219     0.9374738