Consider the following three models:
Model 1 is an AR(1);
Model 2 has AR terms at lags 1 and 8 only;
Model 3 has AR terms at
lags 1 and 8, and an MA term at lag 8.
For each model, perform
regression and the ACF, PACF and Q-Statistics for the residuals
library(forecast)
library(tseries)
library(TSA)
data <- read.csv("M3_Time_series data/module3_data_PE_Ratios.csv")
data$date <- as.Date(data$date)
data_ts <- ts(na.omit(data$pe_ind), frequency = 12) # Monthly data
model1 <- arima(data_ts, order = c(1, 0, 0), method = "ML")
summary(model1)
##
## Call:
## arima(x = data_ts, order = c(1, 0, 0), method = "ML")
##
## Coefficients:
## ar1 intercept
## 0.9079 17.3202
## s.e. 0.0300 1.0187
##
## sigma^2 estimated as 1.771: log likelihood = -311.14, aic = 626.28
##
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
## ME RMSE MAE MPE MAPE
## Training set NaN NaN NaN NaN NaN
acf(residuals(model1), main = "Model 1 Residual ACF")
pacf(residuals(model1), main = "Model 1 Residual PACF")
model2 <- arima(data_ts,
order = c(8, 0, 0),
fixed = c(NA, 0, 0, 0, 0, 0, 0, NA, # AR lags 1–8
NA)) # Constant (intercept)
## Warning in stats::arima(x = x, order = order, seasonal = seasonal, xreg = xreg,
## : some AR parameters were fixed: setting transform.pars = FALSE
summary(model2)
##
## Call:
## arima(x = data_ts, order = c(8, 0, 0), fixed = c(NA, 0, 0, 0, 0, 0, 0, NA, NA))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8 intercept
## 0.9342 0 0 0 0 0 0 -0.0744 17.2535
## s.e. 0.0319 0 0 0 0 0 0 0.0324 0.6850
##
## sigma^2 estimated as 1.72: log likelihood = -308.57, aic = 623.14
##
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
## ME RMSE MAE MPE MAPE
## Training set NaN NaN NaN NaN NaN
acf(residuals(model2), main = "Model 2 Residual ACF")
pacf(residuals(model2), main = "Model 2 Residual PACF")
model3 <- arima(data_ts,
order = c(8, 0, 8),
fixed = c(
NA, 0, 0, 0, 0, 0, 0, NA, # AR(1 and 8)
0, 0, 0, 0, 0, 0, 0, NA, # MA(8)
NA # Constant term
))
## Warning in stats::arima(x = x, order = order, seasonal = seasonal, xreg = xreg,
## : some AR parameters were fixed: setting transform.pars = FALSE
summary(model3)
##
## Call:
## arima(x = data_ts, order = c(8, 0, 8), fixed = c(NA, 0, 0, 0, 0, 0, 0, NA, 0,
## 0, 0, 0, 0, 0, 0, NA, NA))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8 ma1 ma2 ma3 ma4 ma5
## 0.9505 0 0 0 0 0 0 -0.0426 0 0 0 0 0
## s.e. 0.0314 0 0 0 0 0 0 0.0331 0 0 0 0 0
## ma6 ma7 ma8 intercept
## 0 0 -0.2441 17.2931
## s.e. 0 0 0.0772 0.7631
##
## sigma^2 estimated as 1.636: log likelihood = -304.25, aic = 616.5
##
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
## ME RMSE MAE MPE MAPE
## Training set NaN NaN NaN NaN NaN
acf(residuals(model3), main = "Model 3 Residual ACF")
pacf(residuals(model3), main = "Model 3 Residual PACF")
# Calculate SSRs
ssr_model1 <- sum(residuals(model1)^2, na.rm = TRUE)
ssr_model2 <- sum(residuals(model2)^2, na.rm = TRUE)
ssr_model3 <- sum(residuals(model3)^2, na.rm = TRUE)
# Create comparison table
Compare <- data.frame(
Model = c("Model 1: AR(1)", "Model 2: AR(1)+AR(8)", "Model 3: AR(1,8)+MA(8)"),
AIC = c(model1$aic, model2$aic, model3$aic),
SSR = c(ssr_model1, ssr_model2, ssr_model3),
LjungBox_pval = c(
Box.test(residuals(model1), lag = 10, type = "Ljung-Box")$p.value,
Box.test(residuals(model2), lag = 10, type = "Ljung-Box")$p.value,
Box.test(residuals(model3), lag = 10, type = "Ljung-Box")$p.value
)
)
print(Compare)
## Model AIC SSR LjungBox_pval
## 1 Model 1: AR(1) 626.2762 322.3615 0.1096526
## 2 Model 2: AR(1)+AR(8) 623.1420 313.1144 0.2839260
## 3 Model 3: AR(1,8)+MA(8) 616.4953 297.8219 0.9374738