load("/Users/fahadmehfooz/Downloads/tsa3.rda")
# 1st ARMA(1,1) simulation
Arma_sim1<-arima.sim(list(order = c(1,0,1), ar = 0.9,ma=-0.9), n = 500)
plot(Arma_sim1, main = "First Simulation")
acf(Arma_sim1, main = "Sample ACF Simulation 1")
pacf(Arma_sim1, main = "Sample PACF Simulation 1")
(arima(Arma_sim1, order = c(1, 0, 1)))
## Warning in arima(Arma_sim1, order = c(1, 0, 1)): possible convergence problem:
## optim gave code = 1
##
## Call:
## arima(x = Arma_sim1, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## -0.7276 0.7658 -0.0278
## s.e. 0.2714 0.2536 0.0480
##
## sigma^2 estimated as 1.103: log likelihood = -733.89, aic = 1475.78
Arma_sim2<-arima.sim(list(order = c(1,0,1), ar = 0.9,ma=-0.9), n = 500)
plot(Arma_sim2, main = "Second Simulation")
acf(Arma_sim2, main = "Sample ACF Simulation 2")
pacf(Arma_sim2, main = "Sample PACF Simulation 2")
(arima(Arma_sim2, order = c(1, 0, 1)))
## Warning in arima(Arma_sim2, order = c(1, 0, 1)): possible convergence problem:
## optim gave code = 1
##
## Call:
## arima(x = Arma_sim2, order = c(1, 0, 1))
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ma1 intercept
## 0.2501 -0.1996 0.0347
## s.e. NaN NaN 0.0479
##
## sigma^2 estimated as 1.009: log likelihood = -711.64, aic = 1431.28
#3rd ARMA(1,1) simulation
Arma_sim3<-arima.sim(list(order = c(1,0,1), ar = 0.9,ma=-0.9), n = 500)
plot(Arma_sim3, main = "Third Simulation")
acf(Arma_sim3, main = "Sample ACF Simulation 3")
pacf(Arma_sim3, main = "Sample PACF Simulation 3")
# The order here is (1, 1)
(arima(Arma_sim3, order = c(1, 0, 1)))
## Warning in arima(Arma_sim3, order = c(1, 0, 1)): possible convergence problem:
## optim gave code = 1
##
## Call:
## arima(x = Arma_sim3, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## 0.8846 -0.8592 -0.0501
## s.e. 0.1518 0.1656 0.0538
##
## sigma^2 estimated as 0.976: log likelihood = -703.39, aic = 1414.79
THE ACF in the simulations exhibits a sharp initial decline due to the MA(1) component’s immediate effect, followed by an exponential decay indicating the sustained influence of both AR and MA components.
The PACF displays a significant correlation at the first lag, highlighting the AR(1) component’s direct impact, and then gradually tapers off, reflecting the diminishing influence of more distant observations.
For all the simulations the value of AIC is around 1400.The standard errors in ar and the ma components is usally high for all the models suggesting the fit of ARma(1, 0, 1) is questionable here.
Among all the model for simulation 2, the AIC value is the least suggesting it is a better model.
time <- 1:length(so2)
# Plotting initial SO2 levels
plot(time, so2, main = "SO2 Levels", xlab = "Time", ylab = "SO2 Concentration")
loess_model <- loess(so2 ~ time)
lines(time, predict(loess_model), col="blue", lwd=2)
Over time, the so2 levels reduce(downward trend can be observed).
so2_difference <- diff(so2, differences = 1)
plot(so2_difference, main="First Differenced Series SO2 Levels")
acf(so2_difference, main = "ACF SO2 difference")
pacf(so2_difference, main = "PACF SO2 difference")
ACF cuts off.
From the ACF plot, a noticeable spike at lag 1 followed by values declining to stay within the confidence intervals suggests features typical of an MA(1) process. The PACF plot reveals multiple significant spikes at early lags but lacks the definitive cutoff expected in an AR(1) process. Based on these observations, the data seem to align with an ARIMA(0,1,1) model configuration.
##) II
##Null Hypothesis (H0)- The null hypothesis is that the constant term in the ARMA(1,1,1) model is equal to zero.
## Alternative Hypothesis (H1)- The alternative hypothesis is that the constant term is not equal to zero.
so2_tr <- so2[1:500]
par(mar=c(2, 2,2,2))
aima011_no_constant <- sarima(so2_tr, 0, 1, 1, no.constant=FALSE)
## initial value 0.167622
## iter 2 value -0.055382
## iter 3 value -0.091343
## iter 4 value -0.108251
## iter 5 value -0.111616
## iter 6 value -0.112686
## iter 7 value -0.113453
## iter 8 value -0.113972
## iter 9 value -0.113988
## iter 10 value -0.113989
## iter 11 value -0.113989
## iter 11 value -0.113989
## final value -0.113989
## converged
## initial value -0.113221
## iter 2 value -0.113221
## iter 3 value -0.113226
## iter 4 value -0.113229
## iter 5 value -0.113229
## iter 5 value -0.113229
## iter 5 value -0.113229
## final value -0.113229
## converged
t_val <- aima011_no_constant$fit$coef['constant'] /
sqrt(diag(aima011_no_constant$fit$var.coef)['constant'])
p_value <- 2 * (1 - pt(abs(t_val), df=length(so2_tr)-2))
cat("p-value for ARMA(0,1,1) is", p_value)
## p-value for ARMA(0,1,1) is 0.7797063
cat("Constant significantly different than zero for ARMA(0,1,1) model?",
p_value < 0.05)
## Constant significantly different than zero for ARMA(0,1,1) model? FALSE
par(mar=c(2, 2,2,2))
aima111_no_constant <- sarima(so2_tr, 1, 1, 1, no.constant=FALSE)
## initial value 0.168190
## iter 2 value -0.039066
## iter 3 value -0.081946
## iter 4 value -0.110569
## iter 5 value -0.113181
## iter 6 value -0.113793
## iter 7 value -0.113979
## iter 8 value -0.114021
## iter 9 value -0.114022
## iter 10 value -0.114022
## iter 11 value -0.114022
## iter 12 value -0.114022
## iter 12 value -0.114022
## iter 12 value -0.114022
## final value -0.114022
## converged
## initial value -0.114028
## iter 2 value -0.114031
## iter 3 value -0.114037
## iter 4 value -0.114037
## iter 5 value -0.114038
## iter 6 value -0.114038
## iter 6 value -0.114038
## iter 6 value -0.114038
## final value -0.114038
## converged
t_val <- aima111_no_constant$fit$coef['constant'] /
sqrt(diag(aima111_no_constant$fit$var.coef)['constant'])
p_value <- 2 * (1 - pt(abs(t_val), df=length(so2_tr)-2))
cat("p-value for ARMA(1,1,1) is", p_value)
## p-value for ARMA(1,1,1) is 0.8039814
cat("Constant significantly different than zero for ARMA(1,1,1) model?",
p_value < 0.05)
## Constant significantly different than zero for ARMA(1,1,1) model? FALSE
The constants in both the ARIMA(0,1,1) and ARIMA(1,1,1) models were found to be statistically insignificant, indicating that a baseline constant does not add significant value to the models. As a result, both models were also fitted without constants, which seems more appropriate given the insignificance of the constants in the initial models that included them. These findings suggest that simpler models, without baseline adjustments, are sufficient for capturing the dynamics of the data effectively.
par(mar=c(2, 2,2,2))
arima011_constant <- sarima(so2_tr, 0, 1, 1, no.constant=TRUE, details=F)
arima011_constant$fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## include.mean = !no.constant, optim.control = list(trace = trc, REPORT = 1,
## reltol = tol))
##
## Coefficients:
## ma1
## -0.8386
## s.e. 0.0275
##
## sigma^2 estimated as 0.7955: log likelihood = -651.59, aic = 1307.18
cat("For MA Coefficient of ARMA(0,1) Model:", abs( arima011_constant$fit$coef['ma1']/sqrt(diag(arima011_constant$fit$var.coef)['ma1'])))
## For MA Coefficient of ARMA(0,1) Model: 30.45523
arima011_constant$resid = innov
par(mfrow=c(2,2))
plot(arima011_constant$resid)
qqnorm(arima011_constant$resid, sub="qqplot", xlab="resid")
acf(arima011_constant$resid, 48)
pacf(arima011_constant$resid, 48)
#Shapiro-Wilk test for normality
shapiro.test(arima011_constant$resid)
##
## Shapiro-Wilk normality test
##
## data: arima011_constant$resid
## W = 0.98384, p-value = 2.348e-05
# Ljung-Box Portmanteau test for Model Adequacy
Box.test (arima011_constant$resid, lag = 48,type="Ljung")
##
## Box-Ljung test
##
## data: arima011_constant$resid
## X-squared = 72.137, df = 48, p-value = 0.01367
#Box-Pierce Statistic for Model Adequacy
Box.test (arima011_constant$resid, lag = 48)
##
## Box-Pierce test
##
## data: arima011_constant$resid
## X-squared = 68.327, df = 48, p-value = 0.02846
#McLeod-Li Portmanteau statistic for Model Adequacy
Box.test (arima011_constant$resid^2, lag = 48,type="Ljung")
##
## Box-Ljung test
##
## data: arima011_constant$resid^2
## X-squared = 68.177, df = 48, p-value = 0.02926
# ARMA (1,1) Model
par(mar=c(2, 2,2,2))
arima111_constant <- sarima(so2_tr, 1, 1, 1, no.constant=TRUE, details=F)
arima111_constant$fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## include.mean = !no.constant, optim.control = list(trace = trc, REPORT = 1,
## reltol = tol))
##
## Coefficients:
## ar1 ma1
## -0.0518 -0.8182
## s.e. 0.0568 0.0369
##
## sigma^2 estimated as 0.7943: log likelihood = -651.18, aic = 1308.35
cat("For MA Coefficient of ARMA(1,1) Model:", abs( arima111_constant$fit$coef['ma1']/sqrt(diag(arima111_constant$fit$var.coef)['ma1'])))
## For MA Coefficient of ARMA(1,1) Model: 22.20325
cat("\nFor AR Coefficient of ARMA(1,1) Model:", abs(-0.0518 / 0.0568))
##
## For AR Coefficient of ARMA(1,1) Model: 0.9119718
cat("\nARMA(1,1) Model is stationary:", abs(polyroot(c(1, -0.0518))) > 1)
##
## ARMA(1,1) Model is stationary: TRUE
print("")
## [1] ""
cat("/nARMA(1,1) Model is invertible:", abs(polyroot(c(1, -0.8182))) > 1)
## /nARMA(1,1) Model is invertible: TRUE
arima111_constant$resid <- innov
par(mfrow=c(2,2))
plot(arima111_constant$resid)
acf(arima111_constant$resid, 48)
pacf(arima111_constant$resid, 48)
qqnorm(arima111_constant$resid, sub="qqplot", xlab="resid")
#Shapiro-Wilk test for normality
shapiro.test(arima111_constant$resid)
##
## Shapiro-Wilk normality test
##
## data: arima111_constant$resid
## W = 0.9848, p-value = 4.368e-05
# Ljung-Box Portmanteau test for Model Adequacy
Box.test (arima111_constant$resid, lag = 48,type="Ljung")
##
## Box-Ljung test
##
## data: arima111_constant$resid
## X-squared = 66.912, df = 48, p-value = 0.03683
#Box-Pierce Statistic for Model Adequacy
Box.test (arima111_constant$resid, lag = 48)
##
## Box-Pierce test
##
## data: arima111_constant$resid
## X-squared = 63.331, df = 48, p-value = 0.0681
#McLeod-Li Portmanteau statistic for Model Adequacy
Box.test (arima111_constant$resid^2, lag = 48,type="Ljung")
##
## Box-Ljung test
##
## data: arima111_constant$resid^2
## X-squared = 67.628, df = 48, p-value = 0.03236
Using ARIMA models without incorporating a constant term, distinct patterns emerged. The ARIMA(0,1,1) model demonstrated a very strong and significant moving average (MA) component with a coefficient of -0.8386, which showed high significance at 30.455. This indicates predictive power and confirms the model’s invertibility, making it highly effective for forecasting purposes.
In ARIMA(1,1,1) model, AR component, with a coefficient of -0.0518, was found to be statistically insignificant with a significance value of 0.913, suggesting it contributes little to the model’s effectiveness. Meanwhile, the MA component of this model was notably significant, with a coefficient of -0.8182 and a significance value of 22.203, highlighting its important role in the model.
The residual diagnostics for the ARIMA(0,1,1) and ARIMA(1,1,1) models with constant reveal inadequacies in capturing the full dynamics data. Both models fail the Shapiro-Wilk test for normality indicating the non-normal distribution of residuals. The Ljung-Box tests highlight significant autocorrelation in the residuals (p-value = 0.03683) at lag 48, and the tests on squared residuals show significant heteroscedasticity (p-value = 0.03236), suggesting volatility clustering not accounted for by the models.
# ARMA (0,1) Model
cat("\nAIC for the ARMA(0,1) Model:", arima011_constant$fit$aic / length(so2_tr))
##
## AIC for the ARMA(0,1) Model: 2.614351
aicc <- - 2 * arima011_constant$fit$loglik +
(2 * 2 * length(so2_tr)) / (length(so2_tr) - 3)
cat("\nAICCc for the ARMA(0,1) Model:", aicc)
##
## AICCc for the ARMA(0,1) Model: 1307.2
bic <- - 2 * arima011_constant$fit$loglik + log(length(so2_tr)) * 2
cat("\nBIC for ARMA(0,1) Model:", bic)
##
## BIC for ARMA(0,1) Model: 1315.605
# ARMA (1,1) Model
cat("\nAIC for the ARMA(1,1) Model:", arima111_constant$fit$aic / length(so2_tr))
##
## AIC for the ARMA(1,1) Model: 2.616704
aicc <- - 2 * arima111_constant$fit$loglik +
(2 * 3 * length(so2_tr)) / (length(so2_tr) - 4)
cat("\nAICc for ARMA(1,1) Model:", aicc)
##
## AICc for ARMA(1,1) Model: 1308.4
bic <- - 2 * arima111_constant$fit$loglik + log(length(so2_tr)) * 3
cat("\nBIC for the ARMA(1,1) Model:", bic)
##
## BIC for the ARMA(1,1) Model: 1320.996
AIC BIC is lesser for ARMA(0,1) model, its a better model.
par(mar=c(2, 2,2,2))
# ARMA (0,1) Model
arima011_constant.pr <- sarima.for(so2_tr, n.ahead=8, 0, 1, 1)
pred <- arima011_constant.pr$pred
obs = so2[501:508]
perror <- obs - pred
# Forecast evaluation criteria
print("\nFor ARMA(0,1) Model")
## [1] "\nFor ARMA(0,1) Model"
merror <- mean(perror)
cat("\nMean Error:", merror)
##
## Mean Error: -0.3471949
mperror <- 100 * (mean(perror / obs))
cat("\nMean Percent Error:", mperror)
##
## Mean Percent Error: -21.85522
mse <- sum(perror * 2) / length(perror)
cat("\nMean Squared Error:", mse)
##
## Mean Squared Error: -0.6943899
mae <- mean(abs(perror))
cat("\nMean Absolute Error:", mae)
##
## Mean Absolute Error: 0.3856524
mape <- 100 * (mean(abs((perror) / obs)))
cat("\nMean Absolute Percent Error:", mape)
##
## Mean Absolute Percent Error: 23.54195
par(mar=c(2, 2,2,2))
# ARMA (1,1) Model
arima111_constant.pr <- sarima.for(so2_tr, n.ahead=8, 1, 1, 1)
pred <- arima111_constant.pr$pred
obs = so2[501:508]
perror <- obs - pred
# Forecast evaluation criteria
print("\nFor ARMA(1,1) Model")
## [1] "\nFor ARMA(1,1) Model"
me <- mean(perror)
cat("\nMean Error:", me)
##
## Mean Error: -0.373958
mpe <- 100 * (mean(perror / obs))
cat("\nMean Percent Error:", mpe)
##
## Mean Percent Error: -23.43046
mse <- sum(perror * 2) / length(perror)
cat("\nMean Squared Error:", mse)
##
## Mean Squared Error: -0.7479159
mae <- mean(abs(perror))
cat("\nMean Absolute Error:", mae)
##
## Mean Absolute Error: 0.4047617
mape <- 100 * (mean(abs((perror) / obs)))
cat("\nMean Absolute Percent Error:", mape)
##
## Mean Absolute Percent Error: 24.78149
The ARIMA(0,1,1) model exhibits a Mean Absolute Percent Error (MAPE) of 23.54%, which is lower than the ARIMA(1,1,1) model’s MAPE of 24.78%. This reduced MAPE indicates that the ARIMA(0,1,1) model provides more precise and effective forecasts of SO2 levels, making it a better option compared to the ARIMA(1,1,1) model.
par(mar=c(2, 2,2,2))
# fitting ARMA(0,1) Model
# predicting the future 4 values
fitarima011 <- sarima(so2, 0, 1, 1, no.constant=TRUE, details=F)
sarima.for(so2, n.ahead=4, 0, 1, 1)$pred
## Time Series:
## Start = c(1979, 41)
## End = c(1979, 44)
## Frequency = 52
## [1] 1.816620 1.814207 1.811795 1.809382