library(dynlm)
library(ggplot2)
library(AER)
library(Hmisc)
library(forecast)
library(x12)
library(dLagM)
library(TSA)
library(readr)
library(forecast)
After checking the plot and ACF of the solar radiation dataset, it’s noted that trend is not very obvious in the plot but seasonality is very strong. There are some changing variance through time. Moving average is not so significant. There is some autoregressive behavious. No intervention point is noted. The correlation coefficient -0.4540277 indicates a moderate negative correlation between radiation and precipitation series. Sample CCF after prewhitening shows that there are not that many significant lags. Solar radiation series and precipitation series are not that correlated. Despite that, I’ll still try fit some time series regression model to do some forecasting.
radiation <- read.csv("C:/RMIT/Forecasting S2 19/Assignment 2/data1.csv")
solar <- ts(radiation$solar, start = c(1960, 1), frequency = 12)
ppt <- ts(radiation$ppt, start = c(1960, 1), frequency = 12)
radiation <- ts(radiation[,1:2], start = c(1960, 1), frequency = 12)
# Plot individually
plot(solar, xlab = "Time", ylab = "Monthly Solar Radiation", main = "Monthly Average Horizontal Solar Radiation")
plot(ppt, xlab = "Time", ylab = "Monthly precipitation", main = "Monthly Precipitation Measured At the Same Point")
# Check plot and ACF of the solar radiation dataset. The trend is not very obvious in the plot but seasonality is very strong. There are some changing variance through time. Moving average is not so significant. There is some autoregressive behavious. No intervention point is noted.
plot(solar, xlab = "Time", ylab = "Monthly Solar Radiation", main = "Monthly Average Horizontal Solar Radiation")
points(y = solar, x = time(solar), pch = as.vector(season(solar)))
acf(solar, max.lag = 48, main = "Sample ACF for monthly average horizontal solar radiation")
# Plot together in a multi-panel plot. There appears a negative correlation between the two series
plot(radiation, main = "Monthly Solar Radiation and Precipitation Series")
# Plot in one frame
plot(radiation, plot.type = "s", col = c("blue", "red"), main = "Monthly Solar Radiation and Precipitation Series")
legend("topright", lty = 1, text.width = 4, cex = 0.7, col = c("blue", "red"), c("Solar Radiation", "Precipitation"))
# Scale and centre both series to see the plot
radiation.scaled <- scale(radiation)
plot(radiation.scaled, plot.type = "s", col = c("blue", "red"))
legend("topleft", lty = 1, text.width = 4, cex = 1, col = c("blue", "red"), c("Solar Radiation", "Precipitation"))
# The correlation coefficient -0.4540277 indicates a moderate negative correlation between radiation and precipitation series
cor(radiation)
## solar ppt
## solar 1.0000000 -0.4540277
## ppt -0.4540277 1.0000000
# Check for spurious correlation. Based on CCF results, almost all of the cross-correlations are significantly different from zero.
plot(radiation,yax.flip=T)
ccf(as.vector(solar), as.vector(ppt),ylab='CCF', main = "Sample CCF between average monthly solar radiation and precipitation")
# Prewhitening
radiation.dif=ts.intersect(diff(diff(solar,12)),diff(diff(ppt,12)))
prewhiten(as.vector(radiation.dif[,1]),as.vector(radiation.dif[,2]),ylab='CCF', main="Sample CCF after prewhitening")
# Sample CCF after prewhitening shows that there are not that many significant lags. Solar radiation series and precipitation series are not that correlated.
# Despite that, I'll still try fit some time series regression model to do some forecasting.
# Fit a finite DLM with a lag length based on AIC and BIC. Then do the diagnostic checking
for (i in 1:24) {
model1 = dlm(x = as.vector(ppt), y = as.vector(solar), q = i)
cat("q = ", i, "AIC = ", AIC(model1$model), "BIC = ", BIC(model1$model), "\n")
}
## q = 1 AIC = 4728.713 BIC = 4746.676
## q = 2 AIC = 4712.649 BIC = 4735.095
## q = 3 AIC = 4688.551 BIC = 4715.478
## q = 4 AIC = 4663.6 BIC = 4695.003
## q = 5 AIC = 4644.622 BIC = 4680.499
## q = 6 AIC = 4637.489 BIC = 4677.837
## q = 7 AIC = 4632.716 BIC = 4677.532
## q = 8 AIC = 4625.986 BIC = 4675.267
## q = 9 AIC = 4615.084 BIC = 4668.827
## q = 10 AIC = 4602.658 BIC = 4660.858
## q = 11 AIC = 4590.961 BIC = 4653.617
## q = 12 AIC = 4578.787 BIC = 4645.895
## q = 13 AIC = 4570.989 BIC = 4642.546
## q = 14 AIC = 4566.136 BIC = 4642.139
## q = 15 AIC = 4561.685 BIC = 4642.131
## q = 16 AIC = 4554.448 BIC = 4639.335
## q = 17 AIC = 4540.348 BIC = 4629.671
## q = 18 AIC = 4522.58 BIC = 4616.336
## q = 19 AIC = 4500.016 BIC = 4598.203
## q = 20 AIC = 4477.672 BIC = 4580.286
## q = 21 AIC = 4458.487 BIC = 4565.525
## q = 22 AIC = 4439.869 BIC = 4551.327
## q = 23 AIC = 4422.803 BIC = 4538.679
## q = 24 AIC = 4408.288 BIC = 4528.578
# The smallest AIC/BIC: q = 24 AIC = 4408.288 BIC = 4528.578. We have sufficient data to have a lag length of 24.
model1 = dlm( x = as.vector(ppt) , y = as.vector(solar), q = 24 )
summary(model1)
##
## Call:
## lm(formula = model.formula, data = design)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.5773 -4.8558 -0.6234 3.8217 29.4583
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.7212315 1.1927724 19.887 < 2e-16 ***
## x.t -3.6622718 1.8815366 -1.946 0.05206 .
## x.1 0.1749499 2.4375720 0.072 0.94281
## x.2 0.6347556 2.4632513 0.258 0.79673
## x.3 1.9474899 2.4640926 0.790 0.42963
## x.4 1.5307497 2.4667574 0.621 0.53513
## x.5 2.5333039 2.4641084 1.028 0.30432
## x.6 -0.4693138 2.4652959 -0.190 0.84908
## x.7 0.9447232 2.4626756 0.384 0.70140
## x.8 -0.4885570 2.4587366 -0.199 0.84256
## x.9 -1.5431872 2.4640601 -0.626 0.53137
## x.10 -2.1213156 2.4676485 -0.860 0.39032
## x.11 -0.8545145 2.5042905 -0.341 0.73306
## x.12 -1.4907844 2.5004760 -0.596 0.55126
## x.13 -0.5624647 2.4761197 -0.227 0.82038
## x.14 -1.3672111 2.4369280 -0.561 0.57498
## x.15 0.0007113 2.4342331 0.000 0.99977
## x.16 -0.7328747 2.4309642 -0.301 0.76316
## x.17 1.0912173 2.4354063 0.448 0.65427
## x.18 -0.1154984 2.4397023 -0.047 0.96226
## x.19 1.1173451 2.4365696 0.459 0.64670
## x.20 -0.8448639 2.4372028 -0.347 0.72897
## x.21 -0.5282734 2.4408782 -0.216 0.82873
## x.22 -1.3367030 2.4329344 -0.549 0.58292
## x.23 -1.2919531 2.4075100 -0.537 0.59172
## x.24 -5.7333536 1.8500815 -3.099 0.00203 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.577 on 610 degrees of freedom
## Multiple R-squared: 0.4029, Adjusted R-squared: 0.3784
## F-statistic: 16.46 on 25 and 610 DF, p-value: < 2.2e-16
##
## AIC and BIC values for the model:
## AIC BIC
## 1 4408.288 4528.578
checkresiduals(model1$model)
##
## Breusch-Godfrey test for serial correlation of order up to 29
##
## data: Residuals
## LM test = 576.8, df = 29, p-value < 2.2e-16
MASE(model1) # MASE 1.5516
## MASE
## model1 1.458847
# R-squared value is 0.3784, which is not very high.
# The overall model is significant at 5% level of significance with a p-value of 2.2e-16. However, almost all coefficients are insignificant.
# Looking at the residuals, there exists some repeating patterns. ACF plot shows significant correlation. The histogram is more skewed than a normal distribution.
# BG test shows the serial correlation left in residuals is significant.
vif.model1 <- vif(model1$model)
vif.model1
## x.t x.1 x.2 x.3 x.4 x.5 x.6 x.7
## 4.764779 8.005923 8.189745 8.212209 8.214170 8.174414 8.164388 8.130238
## x.8 x.9 x.10 x.11 x.12 x.13 x.14 x.15
## 8.105286 8.128678 8.140071 8.369026 8.348944 8.301408 8.118556 8.194497
## x.16 x.17 x.18 x.19 x.20 x.21 x.22 x.23
## 8.203976 8.232078 8.252162 8.232513 8.233085 8.222503 8.161616 7.983598
## x.24
## 4.750722
# VIF values are all less than 10. The model doesn't have multicollinearity issues.
# Fit a polynomial DLM of order 2 to 4.
for (i in 3:24) {
for (j in 2:4) {
model2 = polyDlm(x = as.vector(ppt), y = as.vector(solar), q = i, k = j, show.beta = FALSE)
cat("q = ", i, "k = ", j, "AIC = ", AIC(model2$model), "BIC = ", BIC(model2$model), "\n")
}
}
## q = 3 k = 2 AIC = 4689.018 BIC = 4711.457
## q = 3 k = 3 AIC = 4688.551 BIC = 4715.478
## q = 3 k = 4 AIC = 4688.551 BIC = 4715.478
## q = 4 k = 2 AIC = 4664.741 BIC = 4687.171
## q = 4 k = 3 AIC = 4661.618 BIC = 4688.535
## q = 4 k = 4 AIC = 4663.6 BIC = 4695.003
## q = 5 k = 2 AIC = 4645.25 BIC = 4667.673
## q = 5 k = 3 AIC = 4640.873 BIC = 4667.781
## q = 5 k = 4 AIC = 4642.816 BIC = 4674.209
## q = 6 k = 2 AIC = 4634.526 BIC = 4656.942
## q = 6 k = 3 AIC = 4632.682 BIC = 4659.58
## q = 6 k = 4 AIC = 4633.606 BIC = 4664.988
## q = 7 k = 2 AIC = 4627.974 BIC = 4650.382
## q = 7 k = 3 AIC = 4627.295 BIC = 4654.184
## q = 7 k = 4 AIC = 4627.207 BIC = 4658.578
## q = 8 k = 2 AIC = 4620.47 BIC = 4642.87
## q = 8 k = 3 AIC = 4620.886 BIC = 4647.766
## q = 8 k = 4 AIC = 4618.646 BIC = 4650.007
## q = 9 k = 2 AIC = 4607.861 BIC = 4630.253
## q = 9 k = 3 AIC = 4608.922 BIC = 4635.793
## q = 9 k = 4 AIC = 4605.962 BIC = 4637.312
## q = 10 k = 2 AIC = 4591.904 BIC = 4614.289
## q = 10 k = 3 AIC = 4592.945 BIC = 4619.807
## q = 10 k = 4 AIC = 4591.634 BIC = 4622.973
## q = 11 k = 2 AIC = 4577.156 BIC = 4599.533
## q = 11 k = 3 AIC = 4577.158 BIC = 4604.011
## q = 11 k = 4 AIC = 4577.773 BIC = 4609.101
## q = 12 k = 2 AIC = 4567.969 BIC = 4590.339
## q = 12 k = 3 AIC = 4563.099 BIC = 4589.942
## q = 12 k = 4 AIC = 4564.433 BIC = 4595.751
## q = 13 k = 2 AIC = 4579.717 BIC = 4602.079
## q = 13 k = 3 AIC = 4553.336 BIC = 4580.17
## q = 13 k = 4 AIC = 4554.87 BIC = 4586.177
## q = 14 k = 2 AIC = 4634.193 BIC = 4656.547
## q = 14 k = 3 AIC = 4546.325 BIC = 4573.15
## q = 14 k = 4 AIC = 4547.95 BIC = 4579.246
## q = 15 k = 2 AIC = 4723.198 BIC = 4745.544
## q = 15 k = 3 AIC = 4540.236 BIC = 4567.051
## q = 15 k = 4 AIC = 4541.89 BIC = 4573.175
## q = 16 k = 2 AIC = 4775.508 BIC = 4797.846
## q = 16 k = 3 AIC = 4532.601 BIC = 4559.407
## q = 16 k = 4 AIC = 4533.378 BIC = 4564.652
## q = 17 k = 2 AIC = 4748.973 BIC = 4771.304
## q = 17 k = 3 AIC = 4522.137 BIC = 4548.934
## q = 17 k = 4 AIC = 4517.91 BIC = 4549.173
## q = 18 k = 2 AIC = 4688.46 BIC = 4710.783
## q = 18 k = 3 AIC = 4521.368 BIC = 4548.156
## q = 18 k = 4 AIC = 4497.388 BIC = 4528.64
## q = 19 k = 2 AIC = 4650.66 BIC = 4672.975
## q = 19 k = 3 AIC = 4547.909 BIC = 4574.687
## q = 19 k = 4 AIC = 4471.74 BIC = 4502.981
## q = 20 k = 2 AIC = 4671.102 BIC = 4693.409
## q = 20 k = 3 AIC = 4620.552 BIC = 4647.321
## q = 20 k = 4 AIC = 4447.518 BIC = 4478.748
## q = 21 k = 2 AIC = 4705.769 BIC = 4728.069
## q = 21 k = 3 AIC = 4687.75 BIC = 4714.51
## q = 21 k = 4 AIC = 4427.129 BIC = 4458.349
## q = 22 k = 2 AIC = 4660.109 BIC = 4682.401
## q = 22 k = 3 AIC = 4657.917 BIC = 4684.667
## q = 22 k = 4 AIC = 4407.213 BIC = 4438.421
## q = 23 k = 2 AIC = 4560.583 BIC = 4582.867
## q = 23 k = 3 AIC = 4562.51 BIC = 4589.251
## q = 23 k = 4 AIC = 4389.327 BIC = 4420.525
## q = 24 k = 2 AIC = 4486.217 BIC = 4508.493
## q = 24 k = 3 AIC = 4483.412 BIC = 4510.143
## q = 24 k = 4 AIC = 4386.174 BIC = 4417.361
# When q = 24, k = 4, the modeo has the lowest AIC = 4386.174 and BIC = 4417.361
model2 = polyDlm(x = as.vector(ppt), y = as.vector(solar), q = 24, k = 4, show.beta = TRUE)
## Estimates and t-tests for beta coefficients:
## Estimate Std. Error t value P(>|t|)
## beta.0 -1.4100 0.492 -2.870 4.24e-03
## beta.1 0.3010 0.390 0.772 4.40e-01
## beta.2 1.2800 0.369 3.470 5.63e-04
## beta.3 1.6800 0.369 4.570 6.00e-06
## beta.4 1.6600 0.363 4.590 5.46e-06
## beta.5 1.3500 0.348 3.880 1.15e-04
## beta.6 0.8620 0.332 2.600 9.53e-03
## beta.7 0.2920 0.321 0.912 3.62e-01
## beta.8 -0.2750 0.320 -0.861 3.90e-01
## beta.9 -0.7760 0.329 -2.360 1.87e-02
## beta.10 -1.1600 0.342 -3.390 7.36e-04
## beta.11 -1.3900 0.353 -3.960 8.52e-05
## beta.12 -1.4700 0.357 -4.110 4.59e-05
## beta.13 -1.3700 0.353 -3.890 1.13e-04
## beta.14 -1.1300 0.342 -3.310 9.99e-04
## beta.15 -0.7770 0.328 -2.370 1.80e-02
## beta.16 -0.3600 0.316 -1.140 2.55e-01
## beta.17 0.0538 0.312 0.172 8.63e-01
## beta.18 0.3810 0.318 1.200 2.32e-01
## beta.19 0.5220 0.331 1.580 1.16e-01
## beta.20 0.3610 0.344 1.050 2.95e-01
## beta.21 -0.2360 0.350 -0.674 5.01e-01
## beta.22 -1.4200 0.353 -4.010 6.69e-05
## beta.23 -3.3500 0.381 -8.790 1.50e-17
## beta.24 -6.2100 0.489 -12.700 6.76e-33
summary(model2)
##
## Call:
## "Y ~ (Intercept) + X.t"
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.905 -4.932 -0.348 3.929 29.202
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.347e+01 1.180e+00 19.892 < 2e-16 ***
## z.t0 -1.413e+00 4.921e-01 -2.870 0.00424 **
## z.t1 2.141e+00 2.886e-01 7.420 3.8e-13 ***
## z.t2 -4.580e-01 5.183e-02 -8.837 < 2e-16 ***
## z.t3 3.152e-02 3.279e-03 9.614 < 2e-16 ***
## z.t4 -6.875e-04 6.665e-05 -10.314 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.561 on 630 degrees of freedom
## Multiple R-squared: 0.3858, Adjusted R-squared: 0.3809
## F-statistic: 79.15 on 5 and 630 DF, p-value: < 2.2e-16
checkresiduals(model2$model)
##
## Breusch-Godfrey test for serial correlation of order up to 10
##
## data: Residuals
## LM test = 562.07, df = 10, p-value < 2.2e-16
MASE(model2) # MASE 1.477864
## MASE
## model2 1.477864
# R-squared value is 0.3809, which is not very high.
# This model and all parameters are significant at 5% level of significance.
# The standard errors of estimators are much less than their unconstrained counterparts, meaning the polinomial DLM is more precise.
# Looking at the residuals, there exists some non random patterns. ACF plot shows significant correlation. The histogram is also more skewed than a normal distribution.
# BG test shows the serial correlation left in residuals is significant.
# Fit the Koyck model.
model3 = koyckDlm(x = as.vector(ppt) , y = as.vector(solar))
summary(model3$model, diagnostics = TRUE)
##
## Call:
## "Y ~ (Intercept) + Y.1 + X.t"
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.0926 -3.5961 0.3176 3.6103 14.8399
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.23925 0.76549 -2.925 0.00356 **
## Y.1 0.98546 0.02424 40.650 < 2e-16 ***
## X.t 5.34684 0.84383 6.336 4.37e-10 ***
##
## Diagnostic tests:
## df1 df2 statistic p-value
## Weak instruments 1 656 710.7 <2e-16 ***
## Wu-Hausman 1 655 146.8 <2e-16 ***
## Sargan 0 NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.814 on 656 degrees of freedom
## Multiple R-Squared: 0.7598, Adjusted R-squared: 0.7591
## Wald test: 1104 on 2 and 656 DF, p-value: < 2.2e-16
checkresiduals(model3$model)
MASE(model3) # MASE is better 1.032483
## MASE
## model3 1.032483
# Weak Instruments: the model at the first stage of the least squares fitting is significant at 5% level of significance.
# This model and both parameters are significant at 5% level. Adjusted R-squared value is 0.7591.
# Wu-Hausman test output - the correlation between explanatory variable and error term is significant at 5% level. As endogeneity is present, we'll use instrumental variable estimator instead of ordinary least square estimator.
# Based on the residuals check, non random patterns exist in residuals. ACF plot shows significant correlation. The histogram is not a normal distribution.
# Fit autoregressive distributed lag models and choose one of the models.
for (i in 1:5){
for(j in 1:5){
model4 = ardlDlm(x = as.vector(ppt) , y = as.vector(solar), p = i , q = j )
cat("p = ", i, "q = ", j, "AIC = ", AIC(model4$model), "BIC = ", BIC(model4$model),"\n")
}
}
## p = 1 q = 1 AIC = 3712.311 BIC = 3734.765
## p = 1 q = 2 AIC = 3239.416 BIC = 3266.352
## p = 1 q = 3 AIC = 3143.522 BIC = 3174.936
## p = 1 q = 4 AIC = 3138.399 BIC = 3174.288
## p = 1 q = 5 AIC = 3100.283 BIC = 3140.644
## p = 2 q = 1 AIC = 3639.223 BIC = 3666.159
## p = 2 q = 2 AIC = 3229.051 BIC = 3260.476
## p = 2 q = 3 AIC = 3137.634 BIC = 3173.535
## p = 2 q = 4 AIC = 3132.962 BIC = 3173.337
## p = 2 q = 5 AIC = 3097.288 BIC = 3142.134
## p = 3 q = 1 AIC = 3608.793 BIC = 3640.207
## p = 3 q = 2 AIC = 3226.623 BIC = 3262.524
## p = 3 q = 3 AIC = 3139.409 BIC = 3179.798
## p = 3 q = 4 AIC = 3134.777 BIC = 3179.638
## p = 3 q = 5 AIC = 3098.808 BIC = 3148.139
## p = 4 q = 1 AIC = 3602.664 BIC = 3638.553
## p = 4 q = 2 AIC = 3224.285 BIC = 3264.66
## p = 4 q = 3 AIC = 3131.289 BIC = 3176.15
## p = 4 q = 4 AIC = 3131.424 BIC = 3180.772
## p = 4 q = 5 AIC = 3096.024 BIC = 3149.839
## p = 5 q = 1 AIC = 3599.402 BIC = 3639.764
## p = 5 q = 2 AIC = 3221.853 BIC = 3266.699
## p = 5 q = 3 AIC = 3127.103 BIC = 3176.434
## p = 5 q = 4 AIC = 3127.868 BIC = 3181.684
## p = 5 q = 5 AIC = 3097.877 BIC = 3156.177
# p = 4, q = 5 gives the lowest AIC: AIC = 3096.024 BIC = 3149.839
# p = 3, q = 5 gives the lowest BIC: AIC = 3098.808 BIC = 3148.139
model4.1 <- ardlDlm(x = as.vector(ppt), y = as.vector(solar), p = 4, q = 5)
summary(model4.1)
##
## Time series regression with "ts" data:
## Start = 6, End = 660
##
## Call:
## dynlm(formula = as.formula(model.text), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.5901 -1.3826 -0.2867 1.0526 18.5709
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.46103 0.43729 5.628 2.72e-08 ***
## X.t -0.61439 0.54768 -1.122 0.262364
## X.1 0.78469 0.77617 1.011 0.312409
## X.2 1.28470 0.79026 1.626 0.104509
## X.3 0.80457 0.77946 1.032 0.302355
## X.4 -1.20750 0.55570 -2.173 0.030148 *
## Y.1 1.27195 0.03849 33.049 < 2e-16 ***
## Y.2 -0.01868 0.06249 -0.299 0.765112
## Y.3 -0.40480 0.06019 -6.725 3.87e-11 ***
## Y.4 -0.23201 0.06221 -3.729 0.000209 ***
## Y.5 0.21746 0.03772 5.766 1.26e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.546 on 644 degrees of freedom
## Multiple R-squared: 0.9338, Adjusted R-squared: 0.9328
## F-statistic: 908.5 on 10 and 644 DF, p-value: < 2.2e-16
checkresiduals(model4.1$model)
##
## Breusch-Godfrey test for serial correlation of order up to 14
##
## data: Residuals
## LM test = 103.8, df = 14, p-value = 8.811e-16
MASE(model4.1) # MASE 0.4479481
## MASE
## model4.1 0.4479481
# R-squared value is 0.9328, which is high enough.
# This model is significant at 5% level of significance, but the parameters for X.t, X.1, X.2, X.3 and Y.2 are not significant.
# The residuals appear somewhat random. ACF plot shows significant correlation and seasonality at lag 5. The histogram is not a normal distribution.
# BG test shows the serial correlation left in residuals is significant.
model4.2 <- ardlDlm(x = as.vector(ppt), y = as.vector(solar), p = 3, q = 5)
summary(model4.2)
##
## Time series regression with "ts" data:
## Start = 6, End = 660
##
## Call:
## dynlm(formula = as.formula(model.text), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.6649 -1.4447 -0.2663 1.0644 18.7430
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.20532 0.42237 5.221 2.40e-07 ***
## X.t -0.60604 0.54924 -1.103 0.270258
## X.1 0.89253 0.77682 1.149 0.251001
## X.2 1.60774 0.77838 2.065 0.039276 *
## X.3 -0.38294 0.55738 -0.687 0.492308
## Y.1 1.27345 0.03859 32.999 < 2e-16 ***
## Y.2 -0.02859 0.06250 -0.457 0.647495
## Y.3 -0.40351 0.06036 -6.685 5.01e-11 ***
## Y.4 -0.22632 0.06234 -3.630 0.000305 ***
## Y.5 0.22116 0.03779 5.853 7.69e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.553 on 645 degrees of freedom
## Multiple R-squared: 0.9333, Adjusted R-squared: 0.9324
## F-statistic: 1003 on 9 and 645 DF, p-value: < 2.2e-16
checkresiduals(model4.2$model)
##
## Breusch-Godfrey test for serial correlation of order up to 13
##
## data: Residuals
## LM test = 112.86, df = 13, p-value < 2.2e-16
MASE(model4.2) # MASE 0.4502885
## MASE
## model4.2 0.4502885
# R-squared value is 0.9324, which is high enough.
# This model is significant at 5% level of significance, but the parameters for X.t, X.1, X.3 and Y.2 are not significant.
# The residuals appear a little random. ACF plot shows significant correlations at some lags. The histogram is not a normal distribution.
# BG test shows the serial correlation left in residuals is significant.
# None of the time series regression methods can remove the serial correlation in the residuals. Model4.1 has good MASE value.
# Fit exponential smoothing models
# According to previous plots, we can infer the following
# A little bit of trend.
# Strong seasonality.
# Changing variance exists
# Autoregressive behaviousr exists
# No sign of an intervention
# Fit Holt-Winters' model with additive and multiplicative seasonal component and damped and exponential trend components where appropriate.
min(solar) # Non negative data values
## [1] 1.581982
BC = BoxCox.ar(solar)
BC$ci # Let lambda = 1. It's not necessary to transform the data.We will use the original series.
## [1] 0.9 0.9
model5.1 <- hw(solar, seasonal = "additive", h=2*frequency(solar))
summary(model5.1)
##
## Forecast method: Holt-Winters' additive method
##
## Model Information:
## Holt-Winters' additive method
##
## Call:
## hw(y = solar, h = 2 * frequency(solar), seasonal = "additive")
##
## Smoothing parameters:
## alpha = 0.9993
## beta = 0.0019
## gamma = 1e-04
##
## Initial states:
## l = 11.3306
## b = 0.209
## s = -10.7542 -8.4968 -3.227 3.0768 7.9264 10.9122
## 10.1422 7.3322 2.2353 -1.9528 -7.3626 -9.8316
##
## sigma: 2.3575
##
## AIC AICc BIC
## 5434.708 5435.662 5511.076
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1179476 2.328736 1.504489 -2.230359 12.68101 0.24716
## ACF1
## Training set 0.1703355
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2015 6.127399 3.1061581 9.148641 1.506810 10.74799
## Feb 2015 8.655602 4.3802222 12.930983 2.116973 15.19423
## Mar 2015 14.122221 8.8814747 19.362968 6.107191 22.13725
## Apr 2015 18.366625 12.3095941 24.423656 9.103196 27.63005
## May 2015 23.522264 16.7439512 30.300578 13.155729 33.88880
## Jun 2015 26.390991 18.9586817 33.823300 15.024255 37.75773
## Jul 2015 27.219228 19.1837595 35.254697 14.930039 39.50842
## Aug 2015 24.290519 15.6920179 32.889020 11.140246 37.44079
## Sep 2015 19.495874 10.3670365 28.624711 5.534522 33.45723
## Oct 2015 13.253765 3.6218809 22.885648 -1.476930 27.98446
## Nov 2015 8.042144 -2.0695730 18.153861 -7.422393 23.50668
## Dec 2015 5.840110 -4.7313921 16.411612 -10.327607 22.00783
## Jan 2016 6.819616 -4.1942247 17.833457 -10.024600 23.66383
## Feb 2016 9.347819 -2.0927719 20.788410 -8.149055 26.84469
## Mar 2016 14.814438 2.9609172 26.667959 -3.313958 32.94283
## Apr 2016 19.058842 6.8048110 31.312872 0.317919 37.79976
## May 2016 24.214481 11.5711780 36.857784 4.878218 43.55074
## Jun 2016 27.083208 14.0608586 40.105557 7.167243 46.99917
## Jul 2016 27.911445 14.5194061 41.303485 7.430089 48.39280
## Aug 2016 24.982736 11.2296053 38.735867 3.949138 46.01633
## Sep 2016 20.188091 6.0818048 34.294377 -1.385612 41.76179
## Oct 2016 13.945981 -0.5061083 28.398071 -8.156582 36.04855
## Nov 2016 8.734361 -6.0566988 23.525420 -13.886613 31.35533
## Dec 2016 6.532327 -8.5913309 21.655984 -16.597311 29.66196
checkresiduals(model5.1)
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' additive method
## Q* = 205.55, df = 8, p-value < 2.2e-16
##
## Model df: 16. Total lags used: 24
# MASE 0.24716, AIC 5434.708, BIC 5511.076
# The residuals look random but there are effects of seasonality left in ACF. The histogram is not a good normal distribution. Ljung-Box test reject the null hypothesis and the residuals exhibit serial correlation.
model5.2 <- hw(solar, seasonal="additive",damped = TRUE, h=2*frequency(solar))
summary(model5.2)
##
## Forecast method: Damped Holt-Winters' additive method
##
## Model Information:
## Damped Holt-Winters' additive method
##
## Call:
## hw(y = solar, h = 2 * frequency(solar), seasonal = "additive",
##
## Call:
## damped = TRUE)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 1e-04
## gamma = 1e-04
## phi = 0.9388
##
## Initial states:
## l = 11.154
## b = 0.7632
## s = -10.4919 -8.137 -3.348 2.5794 8.08 11.1219
## 9.9586 6.9916 1.9573 -1.8565 -7.1607 -9.6946
##
## sigma: 2.3446
##
## AIC AICc BIC
## 5428.422 5429.489 5509.282
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01091357 2.314163 1.498521 -1.468083 12.44796 0.2461797
## ACF1
## Training set 0.1700724
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2015 5.945198 2.940530 8.949866 1.3499548 10.54044
## Feb 2015 8.479778 4.230548 12.729009 1.9811413 14.97842
## Mar 2015 13.784309 8.579902 18.988716 5.8248548 21.74376
## Apr 2015 17.598568 11.588776 23.608360 8.4073847 26.78975
## May 2015 22.632258 15.912803 29.351713 12.3557383 32.90878
## Jun 2015 25.599150 18.238024 32.960277 14.3412786 36.85702
## Jul 2015 26.761775 18.810497 34.713053 14.6013444 38.92221
## Aug 2015 23.722251 15.221610 32.222892 10.7216429 36.72286
## Sep 2015 18.222624 9.205955 27.239293 4.4328191 32.01243
## Oct 2015 12.293259 2.788471 21.798047 -2.2430606 26.82958
## Nov 2015 7.504219 -2.464877 17.473315 -7.7421977 22.75064
## Dec 2015 5.150488 -5.262286 15.563263 -10.7744758 21.07545
## Jan 2016 5.947275 -4.891175 16.785726 -10.6287044 22.52326
## Feb 2016 8.481728 -2.766251 19.729707 -8.7205712 25.68403
## Mar 2016 13.786139 2.142988 25.429291 -4.0205242 31.59280
## Apr 2016 17.600287 5.574906 29.625668 -0.7909464 35.99152
## May 2016 22.633871 10.238009 35.029734 3.6760353 41.59171
## Jun 2016 25.600665 12.845046 38.356283 6.0926298 45.10870
## Jul 2016 26.763197 13.657667 39.868726 6.7200186 46.80637
## Aug 2016 23.723586 10.277223 37.169950 3.1591478 44.28802
## Sep 2016 18.223877 4.445085 32.002669 -2.8489663 39.29672
## Oct 2016 12.294435 -1.808972 26.397843 -9.2748652 33.86374
## Nov 2016 7.505324 -6.915414 21.926061 -14.5492911 29.55994
## Dec 2016 5.151525 -9.579726 19.882776 -17.3779790 27.68103
checkresiduals(model5.2)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' additive method
## Q* = 210.76, df = 7, p-value < 2.2e-16
##
## Model df: 17. Total lags used: 24
# MASE 0.2461797, AIC 5428.422, BIC 5509.282
# Again, the residuals look random but there are effects of seasonality left in ACF. The histogram is not a good normal distribution. Ljung-Box test reject the null hypothesis and the residuals exhibit serial correlation.
model5.3 <- hw(solar, seasonal="multiplicative", h=2*frequency(solar))
summary(model5.3)
##
## Forecast method: Holt-Winters' multiplicative method
##
## Model Information:
## Holt-Winters' multiplicative method
##
## Call:
## hw(y = solar, h = 2 * frequency(solar), seasonal = "multiplicative")
##
## Smoothing parameters:
## alpha = 0.8251
## beta = 1e-04
## gamma = 0.0259
##
## Initial states:
## l = 10.5659
## b = -0.2371
## s = 0.5024 0.6292 0.9319 1.2229 1.4923 1.6309
## 1.5606 1.3672 1.0278 0.8128 0.5106 0.3114
##
## sigma: 0.3971
##
## AIC AICc BIC
## 6648.746 6649.699 6725.114
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04281136 2.12539 1.359297 -0.4896895 10.87401 0.2233077
## ACF1
## Training set 0.06551473
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2015 5.610335 2.75534413 8.465326 1.2440033 9.976666
## Feb 2015 6.512806 2.03809262 10.987520 -0.3306776 13.356290
## Mar 2015 8.786120 1.33667747 16.235562 -2.6068190 20.179058
## Apr 2015 10.192785 -0.02672876 20.412298 -5.4366123 25.822181
## May 2015 12.512597 -1.96157245 26.986766 -9.6237347 34.648928
## Jun 2015 14.061609 -4.41048190 32.533701 -14.1890164 42.312235
## Jul 2015 14.502088 -6.89909775 35.903274 -18.2282011 47.232377
## Aug 2015 12.945620 -8.35024040 34.241481 -19.6235881 45.514829
## Sep 2015 10.352210 -8.52365174 29.228072 -18.5159294 39.220350
## Oct 2015 7.418279 -7.51119146 22.347750 -15.4143760 30.250935
## Nov 2015 4.989730 -6.05900058 16.038460 -11.9078451 21.887305
## Dec 2015 3.871776 -5.53876625 13.282319 -10.5204066 18.263960
## Jan 2016 4.199400 -7.01753622 15.416336 -12.9554236 21.354224
## Feb 2016 4.838892 -9.30305408 18.980837 -16.7893479 26.467131
## Mar 2016 6.477174 -14.21845674 27.172805 -25.1740619 38.128410
## Apr 2016 7.452633 -18.56883703 33.474103 -32.3437711 47.249037
## May 2016 9.069749 -25.52978297 43.669280 -43.8456686 61.985166
## Jun 2016 10.099485 -31.99838423 52.197355 -54.2836502 74.482621
## Jul 2016 10.315199 -36.68017471 57.310572 -61.5580226 82.188420
## Aug 2016 9.113765 -36.29212001 54.519651 -60.3285438 78.556075
## Sep 2016 7.208700 -32.09306283 46.510463 -52.8981594 67.315559
## Oct 2016 5.105869 -25.38360449 35.595343 -41.5237568 51.735495
## Nov 2016 3.391945 -18.81647619 25.600367 -30.5729044 37.356795
## Dec 2016 2.597256 -16.07148862 21.266001 -25.9541251 31.148637
checkresiduals(model5.3)
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' multiplicative method
## Q* = 38.585, df = 8, p-value = 5.868e-06
##
## Model df: 16. Total lags used: 24
# MASE 0.2233077, AIC 6648.746, BIC 6725.114
# MASE is smaller but there are seasonalities left in the residuals. ACF shows seasonality at lag 24. Histogram is very skewed. Ljung-Box test reject the null hypothesis and the residuals exhibit serial correlation.
model5.4 <- hw(solar,seasonal="multiplicative",exponential = TRUE, h=2*frequency(solar))
summary(model5.4)
##
## Forecast method: Holt-Winters' multiplicative method with exponential trend
##
## Model Information:
## Holt-Winters' multiplicative method with exponential trend
##
## Call:
## hw(y = solar, h = 2 * frequency(solar), seasonal = "multiplicative",
##
## Call:
## exponential = TRUE)
##
## Smoothing parameters:
## alpha = 0.6499
## beta = 1e-04
## gamma = 0.0597
##
## Initial states:
## l = 9.3759
## b = 0.9889
## s = 0.3552 0.4789 0.952 1.0553 1.4608 1.7926
## 1.6746 1.4529 1.1145 0.8672 0.5333 0.2627
##
## sigma: 0.3755
##
## AIC AICc BIC
## 6584.208 6585.161 6660.576
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.06388152 2.208727 1.412454 -1.303792 11.28449 0.2320404
## ACF1
## Training set 0.153871
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2015 5.797870 3.0300525 8.557926 1.5597902 10.09030
## Feb 2015 7.513359 3.4721136 12.051126 1.7752709 15.17225
## Mar 2015 10.758212 4.4315276 18.368550 2.3001502 24.15223
## Apr 2015 12.694087 4.5398982 23.061416 2.1722031 31.71563
## May 2015 15.340074 4.9675524 28.397491 2.3763870 41.38764
## Jun 2015 17.239431 5.0740617 34.121774 2.4246789 49.53488
## Jul 2015 18.004700 4.7567353 36.573870 2.2086009 56.17558
## Aug 2015 16.206596 3.9647552 33.201649 1.9415336 52.99730
## Sep 2015 13.011256 2.9525125 27.505842 1.2919421 44.93650
## Oct 2015 9.215869 1.9069097 19.737245 0.8853709 33.46481
## Nov 2015 6.214762 1.1849987 13.585078 0.5419178 24.29873
## Dec 2015 4.565667 0.8244109 10.379696 0.3703384 19.29663
## Jan 2016 5.220661 0.8563642 12.067751 0.3286072 23.36445
## Feb 2016 6.765364 1.0635460 15.726272 0.4422072 30.59695
## Mar 2016 9.687175 1.4044794 22.973833 0.5453327 45.71817
## Apr 2016 11.430324 1.5511804 27.076670 0.5854901 54.97929
## May 2016 13.812889 1.7347646 33.542025 0.6945319 68.40311
## Jun 2016 15.523155 1.7697372 38.310062 0.6763385 80.60471
## Jul 2016 16.212237 1.7614942 40.234893 0.6120635 84.65402
## Aug 2016 14.593143 1.4834201 35.891548 0.5451137 78.51439
## Sep 2016 11.715916 1.0679380 29.746642 0.3840903 65.50733
## Oct 2016 8.298381 0.6992148 21.410025 0.2461107 44.51643
## Nov 2016 5.596049 0.4367394 13.986289 0.1530505 30.51368
## Dec 2016 4.111130 0.3074611 10.521627 0.0919958 23.05737
checkresiduals(model5.4)
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' multiplicative method with exponential trend
## Q* = 21.246, df = 8, p-value = 0.006521
##
## Model df: 16. Total lags used: 24
# MASE 0.2320404, AIC 6584.208, BIC 6660.576
# There are seasonalities left in the residuals. ACF shows seasonality at lag 24. Histogram is very skewed. Ljung-Box test reject the null hypothesis and the residuals exhibit serial correlation.
model5.5 <- hw(solar,seasonal="multiplicative",damped = TRUE, h=2*frequency(solar))
summary(model5.5)
##
## Forecast method: Damped Holt-Winters' multiplicative method
##
## Model Information:
## Damped Holt-Winters' multiplicative method
##
## Call:
## hw(y = solar, h = 2 * frequency(solar), seasonal = "multiplicative",
##
## Call:
## damped = TRUE)
##
## Smoothing parameters:
## alpha = 0.805
## beta = 1e-04
## gamma = 0.0124
## phi = 0.94
##
## Initial states:
## l = 9.9033
## b = 0.4703
## s = 0.4341 0.5649 0.8263 1.1716 1.4514 1.6316
## 1.5503 1.3834 1.1045 0.8837 0.5761 0.4221
##
## sigma: 0.3145
##
## AIC AICc BIC
## 6366.701 6367.768 6447.561
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0456208 2.045442 1.239102 -2.172476 10.00296 0.2035619
## ACF1
## Training set 0.04177493
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2015 5.611616 3.3502090 7.873023 2.1530925 9.070139
## Feb 2015 7.060438 3.3373603 10.783517 1.3664818 12.754395
## Mar 2015 10.515176 3.8550902 17.175263 0.3294535 20.700899
## Apr 2015 12.993446 3.5145037 22.472389 -1.5033454 27.490238
## May 2015 16.284381 2.9386340 29.630128 -4.1261776 36.694939
## Jun 2015 18.261020 1.7242212 34.797820 -7.0298315 43.551872
## Jul 2015 19.031597 0.2101276 37.853067 -9.7533565 47.816551
## Aug 2015 17.105654 -1.2074754 35.418784 -10.9018606 45.113169
## Sep 2015 13.764743 -2.0801991 29.609686 -10.4680049 37.997491
## Oct 2015 9.774733 -2.2585804 21.808047 -8.6286319 28.178099
## Nov 2015 6.581938 -2.0456267 15.209503 -6.6127835 19.776659
## Dec 2015 5.163049 -2.0168711 12.342970 -5.8176914 16.143790
## Jan 2016 5.616270 -2.6571166 13.889656 -7.0367828 18.269322
## Feb 2016 7.066360 -3.9144360 18.047156 -9.7273184 23.860039
## Mar 2016 10.524088 -6.6891829 27.737360 -15.8013383 36.849515
## Apr 2016 13.004566 -9.3405081 35.349641 -21.1692761 47.178409
## May 2016 16.298445 -13.0720043 45.668893 -28.6197807 61.216670
## Jun 2016 18.276925 -16.2140342 52.767885 -34.4724452 71.026296
## Jul 2016 19.048304 -18.5466197 56.643228 -38.4481704 76.544779
## Aug 2016 17.120781 -18.1782620 52.419825 -36.8644468 71.106009
## Sep 2016 13.777000 -15.8651593 43.419159 -31.5567705 59.110770
## Oct 2016 9.783493 -12.1627698 31.729756 -23.7804195 43.347405
## Nov 2016 6.587872 -8.8063951 21.982139 -16.9556278 30.131371
## Dec 2016 5.167730 -7.4021686 17.737629 -14.0562710 24.391731
checkresiduals(model5.5)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' multiplicative method
## Q* = 23.57, df = 7, p-value = 0.001355
##
## Model df: 17. Total lags used: 24
# MASE 0.2035619, AIC 6366.701, BIC 6447.561
# There are seasonalities left in the residuals. ACF shows seasonality at lag 12. Histogram is very skewed. Ljung-Box test reject the null hypothesis and the residuals exhibit serial correlation.
model5.6 <- hw(solar,seasonal="multiplicative",exponential = TRUE, damped = TRUE, h = 2*frequency(solar), level = c(80.95))
summary(model5.6)
##
## Forecast method: Damped Holt-Winters' multiplicative method with exponential trend
##
## Model Information:
## Damped Holt-Winters' multiplicative method with exponential trend
##
## Call:
## hw(y = solar, h = 2 * frequency(solar), seasonal = "multiplicative",
##
## Call:
## damped = TRUE, level = c(80.95), exponential = TRUE)
##
## Smoothing parameters:
## alpha = 0.8468
## beta = 1e-04
## gamma = 0.0111
## phi = 0.8363
##
## Initial states:
## l = 9.3778
## b = 1.1479
## s = 0.4388 0.5653 0.8294 1.1487 1.449 1.6183
## 1.5604 1.3968 1.1149 0.8758 0.5804 0.4221
##
## sigma: 0.3143
##
## AIC AICc BIC
## 6365.329 6366.396 6446.190
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.03713562 2.041516 1.236905 -2.113647 9.98074 0.2032009
## ACF1
## Training set -0.003606111
##
## Forecasts:
## Point Forecast Lo 80.95 Hi 80.95
## Jan 2015 5.562434 3.3243559 7.963821
## Feb 2015 7.022873 3.3995493 11.142244
## Mar 2015 10.401147 4.2186222 18.098002
## Apr 2015 12.948609 4.6743445 23.603726
## May 2015 16.301555 5.1625546 31.585990
## Jun 2015 18.277245 5.0793354 37.082561
## Jul 2015 18.904626 4.7026672 39.420209
## Aug 2015 16.978636 3.9105969 36.994895
## Sep 2015 13.530464 2.7203909 30.184596
## Oct 2015 9.679038 1.8223655 22.123413
## Nov 2015 6.541756 1.1066519 15.129146
## Dec 2015 5.166293 0.8021468 12.035059
## Jan 2016 5.562371 0.7705478 13.258286
## Feb 2016 7.022805 0.8709169 16.988914
## Mar 2016 10.401064 1.1794780 25.798234
## Apr 2016 12.948522 1.3237257 32.162754
## May 2016 16.301464 1.5680559 40.846041
## Jun 2016 18.277160 1.5785301 45.451611
## Jul 2016 18.904552 1.5469452 47.057893
## Aug 2016 16.978581 1.2511362 41.785023
## Sep 2016 13.530427 0.8748983 33.951694
## Oct 2016 9.679016 0.5886332 24.068506
## Nov 2016 6.541743 0.3604458 16.226321
## Dec 2016 5.166285 0.2622662 12.630236
checkresiduals(model5.6)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' multiplicative method with exponential trend
## Q* = 23.862, df = 7, p-value = 0.001205
##
## Model df: 17. Total lags used: 24
# MASE 0.2032009, AIC 6365.329, BIC 6446.190
# There are seasonalities left in the residuals. ACF shows seasonality at lag 12 and lag 24. Histogram is very skewed. Ljung-Box test reject the null hypothesis and the residuals exhibit serial correlation.
# None of the exponential smoothing models removed the auto-correlations in the residuals. Model5.6 has the lowest MASE. It corresponds to the state-space model "AMdM" or "MMdM"
model6.0 <- ets(solar, model = "MMM", damped = TRUE)
summary(model6.0) # MASE 0.3201193
## ETS(M,Md,M)
##
## Call:
## ets(y = solar, model = "MMM", damped = TRUE)
##
## Smoothing parameters:
## alpha = 0.7892
## beta = 0.016
## gamma = 0.0696
## phi = 0.9575
##
## Initial states:
## l = 9.2782
## b = 1.1745
## s = 0.7133 0.3063 0.6852 1.1282 1.2133 1.5082
## 1.7211 1.2962 1.1358 0.9309 0.656 0.7056
##
## sigma: 0.2341
##
## AIC AICc BIC
## 5995.550 5996.617 6076.410
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02448728 3.076938 1.948599 -5.645612 16.87559 0.3201193
## ACF1
## Training set 0.1452617
checkresiduals(model6.0)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Md,M)
## Q* = 79.153, df = 7, p-value = 2.054e-14
##
## Model df: 17. Total lags used: 24
# Residuals look random but ACF shows seasonality at lag 24 and auto-correlations at other lags. The histogram is skewed. Ljung-Box test reject the null hypothesis and the residuals exhibit serial correlation.
# Fit state-space models
model.auto <- ets(solar)
summary(model.auto)
## ETS(A,Ad,A)
##
## Call:
## ets(y = solar)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 1e-04
## gamma = 1e-04
## phi = 0.9388
##
## Initial states:
## l = 11.154
## b = 0.7632
## s = -10.4919 -8.137 -3.348 2.5794 8.08 11.1219
## 9.9586 6.9916 1.9573 -1.8565 -7.1607 -9.6946
##
## sigma: 2.3446
##
## AIC AICc BIC
## 5428.422 5429.489 5509.282
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01091357 2.314163 1.498521 -1.468083 12.44796 0.2461797
## ACF1
## Training set 0.1700724
checkresiduals(model.auto)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,A)
## Q* = 210.76, df = 7, p-value < 2.2e-16
##
## Model df: 17. Total lags used: 24
# The chosen method includes additive error terms, additive damped trend and additive seasonality. MASE 0.2461797, AIC 5428.422, BIC 5509.282. Some seasonality and changing variances exist in the residuals. ACF shows seasonality at lag 12 and auto-correlations at other lags. The histogram is good enough. Ljung-Box test reject the null hypothesis and the residuals exhibit serial correlation.
models = c("MAM", "MAA", "AAA")
dampeds = c(TRUE, FALSE)
expand = expand.grid(models, dampeds)
fit.AIC = array(NA,9)
levels = array(NA, dim=c(9,2))
for (i in 1:6){
fit.AIC[i] = ets(solar, model = toString(expand[i , 1]),
damped = expand[i , 2])$aic
levels[i, 1] = toString(expand[i , 1])
levels[i, 2] = expand[i , 2]
}
fit.AIC[7] = ets(solar, model = "ANA")$aic
fit.AIC[8] = ets(solar, model = "MNA")$aic
fit.AIC[9] = ets(solar, model = "MNM")$aic
levels[7,1] = "ANA"
levels[8,1] = "MNA"
levels[9,1] = "MNM"
levels[7,2] = FALSE
levels[8,2] = FALSE
levels[9,2] = FALSE
results = data.frame(levels, fit.AIC)
colnames(results) = c("Model","Damped","AIC")
results
## Model Damped AIC
## 1 MAM TRUE 5953.502
## 2 MAA TRUE 6469.079
## 3 AAA TRUE 5428.422
## 4 MAM FALSE 6105.959
## 5 MAA FALSE 7602.755
## 6 AAA FALSE 5434.708
## 7 ANA FALSE 5449.974
## 8 MNA FALSE 6546.856
## 9 MNM FALSE 5986.778
# According to the results, "AAdA" has the lowest AIC=5428.422. Following that, AIC value of "AAA" model is 5434.708, AIC value of "ANA" is 5449.974, AIC value of "MAdM" is 5953.502, AIC value of "MNM" is 5986.778.
# Let's look at the residuals.
model6.1 <- ets(solar, model = "AAA", damped = TRUE)
summary(model6.1) # MASE 0.2461797
## ETS(A,Ad,A)
##
## Call:
## ets(y = solar, model = "AAA", damped = TRUE)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 1e-04
## gamma = 1e-04
## phi = 0.9388
##
## Initial states:
## l = 11.154
## b = 0.7632
## s = -10.4919 -8.137 -3.348 2.5794 8.08 11.1219
## 9.9586 6.9916 1.9573 -1.8565 -7.1607 -9.6946
##
## sigma: 2.3446
##
## AIC AICc BIC
## 5428.422 5429.489 5509.282
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01091357 2.314163 1.498521 -1.468083 12.44796 0.2461797
## ACF1
## Training set 0.1700724
checkresiduals(model6.1)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,A)
## Q* = 210.76, df = 7, p-value < 2.2e-16
##
## Model df: 17. Total lags used: 24
# Residuals look random but ACF shows seasonality at lag 12 and auto-correlations at other lags. The histogram is good enough. Ljung-Box test reject the null hypothesis and the residuals exhibit serial correlation.
model6.2 <- ets(solar, model = "AAA", damped = FALSE)
summary(model6.2) # MASE 0.24716
## ETS(A,A,A)
##
## Call:
## ets(y = solar, model = "AAA", damped = FALSE)
##
## Smoothing parameters:
## alpha = 0.9993
## beta = 0.0019
## gamma = 1e-04
##
## Initial states:
## l = 11.3306
## b = 0.209
## s = -10.7542 -8.4968 -3.227 3.0768 7.9264 10.9122
## 10.1422 7.3322 2.2353 -1.9528 -7.3626 -9.8316
##
## sigma: 2.3575
##
## AIC AICc BIC
## 5434.708 5435.662 5511.076
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1179476 2.328736 1.504489 -2.230359 12.68101 0.24716
## ACF1
## Training set 0.1703355
checkresiduals(model6.2)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,A,A)
## Q* = 205.55, df = 8, p-value < 2.2e-16
##
## Model df: 16. Total lags used: 24
# Again, residuals look random but ACF shows seasonality at lag 12 and auto-correlations at other lags. The histogram is good enough. Ljung-Box test reject the null hypothesis and the residuals exhibit serial correlation.
model6.3 <- ets(solar, model = "ANA")
summary(model6.3) # MASE 0.254704
## ETS(A,N,A)
##
## Call:
## ets(y = solar, model = "ANA")
##
## Smoothing parameters:
## alpha = 0.9997
## gamma = 3e-04
##
## Initial states:
## l = 22.6142
## s = -10.2887 -8.5269 -4.0534 1.7184 7.355 10.8208
## 10.4774 7.7101 2.4749 -1.7156 -6.7936 -9.1783
##
## sigma: 2.3884
##
## AIC AICc BIC
## 5449.974 5450.719 5517.358
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0108645 2.362974 1.55041 -2.640142 12.95336 0.254704
## ACF1
## Training set 0.1825724
checkresiduals(model6.3)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 207.44, df = 10, p-value < 2.2e-16
##
## Model df: 14. Total lags used: 24
# The residuals didn't improve much. Ljung-Box test reject the null hypothesis and the residuals exhibit serial correlation.
model6.4 <- ets(solar, model = "MAM", damped = TRUE)
summary(model6.4) # MASE 0.3222574
## ETS(M,Ad,M)
##
## Call:
## ets(y = solar, model = "MAM", damped = TRUE)
##
## Smoothing parameters:
## alpha = 0.7922
## beta = 6e-04
## gamma = 0.0707
## phi = 0.98
##
## Initial states:
## l = 9.3693
## b = 1.2404
## s = 0.6953 0.3066 0.5802 0.9879 1.3594 1.5469
## 1.4723 1.4055 1.227 0.9573 0.6959 0.7656
##
## sigma: 0.2274
##
## AIC AICc BIC
## 5953.502 5954.569 6034.363
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1750605 3.02791 1.961614 -5.497209 17.21119 0.3222574
## ACF1
## Training set 0.2565219
checkresiduals(model6.4)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,M)
## Q* = 102.28, df = 7, p-value < 2.2e-16
##
## Model df: 17. Total lags used: 24
# The residuals still has seasonality from ACF. However, there are still some non-random patterns in the residuals. Some auto-correlations from the ACF. The histogram doesn't show a normal distribution very well. Ljung-Box test shows the residuals exhibit serial correlation.
model6.5 <- ets(solar, model = "MNM")
summary(model6.5) # MASE 0.3137226
## ETS(M,N,M)
##
## Call:
## ets(y = solar, model = "MNM")
##
## Smoothing parameters:
## alpha = 0.7497
## gamma = 0.072
##
## Initial states:
## l = 22.6418
## s = 0.729 0.3074 0.6168 0.9928 1.2851 1.5457
## 1.5509 1.5263 1.1103 0.9424 0.6392 0.7541
##
## sigma: 0.235
##
## AIC AICc BIC
## 5986.778 5987.524 6054.162
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.2103645 2.971741 1.909662 -4.962262 16.94307 0.3137226
## ACF1
## Training set 0.2400707
checkresiduals(model6.5)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,N,M)
## Q* = 77.462, df = 10, p-value = 1.575e-12
##
## Model df: 14. Total lags used: 24
# Again seasonality from ACF at lag 24. There are still some non-random patterns in the residuals. Some auto-correlations from the ACF. The histogram doesn't show a normal distribution very well. Ljung-Box test shows the residuals exhibit serial correlation.
# None of the state space models is able to remove the autocorrelations or seasonalities in residuals.
# "AAdA" has the best MASE value 0.2461797 and it's the same with model6.auto, so model6.1 will be used to forecast.
# Forecast with autoregressive distributed lag model - model4.1
measurements <- read.csv("C:/RMIT/Forecasting S2 19/Assignment 2/data.x.csv")
measurements <- as.numeric(measurements[,1])
model4.1fore <- dLagM::forecast(model4.1, x = measurements, h = 2*12)$forecasts
plot(ts(c(solar, model4.1fore)), type="l", xaxt="n", ylab="Average monthly solar radiation", xlab="Time", main="Average monthly solar radiation with 2 year ahead forecasts")
# This is not a good forecast. The changing variances become a lot bigger in the forecasts.
# Forecast with Holt's winter model - model5.6
model5.6fore <- forecast(model5.6$model, h = 2*12)
# Forecast with state space model - model6.1
model6.1fore <- forecast(model6.1, h = 2*12)
plot(model5.6fore, type="l", ylab="Average monthly solar radiation",main="Forecasts of solar radiation for 2015 and 2016", xlab="Year", plot.conf=FALSE)
lines(fitted(model5.6), col="blue")
lines(fitted(model6.1), col="red")
lines(model6.1fore$mean, col="red", type="l")
legend("topleft", lty=1, col=c("black", "blue", "red"), c("Data", "Holt's winter model","AAdA"))
# From the plot, blue line fits better and provide a better forecasts. Also, the corresponding model model.5.6 has the lowest MASE. Therefore, damped Holt-Winters' multiplicative method with exponential trend is used to forecast.
model5.6fore
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2015 5.562434 3.2390134 7.789973 2.09802570 8.841359
## Feb 2015 7.022873 3.3886785 10.901710 2.05127208 13.497650
## Mar 2015 10.401147 4.2928906 17.097107 2.41209949 22.356707
## Apr 2015 12.948609 4.6990869 22.919299 2.69819187 30.791777
## May 2015 16.301555 5.2762229 30.027825 2.77362936 42.585466
## Jun 2015 18.277245 5.1514666 35.018490 2.52775157 53.384766
## Jul 2015 18.904626 4.8898838 37.493519 2.46288632 58.832391
## Aug 2015 16.978636 3.9534256 35.606541 1.81849902 54.786404
## Sep 2015 13.530464 2.8112003 28.276488 1.26418757 47.418499
## Oct 2015 9.679038 1.8258160 20.558828 0.80527730 35.087467
## Nov 2015 6.541756 1.1338486 14.175084 0.51068007 25.658056
## Dec 2015 5.166293 0.7882244 11.407680 0.35333609 21.127047
## Jan 2016 5.562371 0.7798079 12.639942 0.32515224 23.586170
## Feb 2016 7.022805 0.9001151 16.025689 0.36742276 31.982923
## Mar 2016 10.401064 1.1974081 23.521751 0.48221033 48.746620
## Apr 2016 12.948522 1.3819473 29.661579 0.50642330 62.320802
## May 2016 16.301464 1.6415454 38.144032 0.55697967 80.816774
## Jun 2016 18.277160 1.6121922 42.732417 0.56204397 92.607048
## Jul 2016 18.904552 1.5308042 45.102775 0.53168333 94.676365
## Aug 2016 16.978581 1.2952866 39.001316 0.43755742 88.373134
## Sep 2016 13.530427 0.9721761 31.594873 0.30947173 71.419187
## Oct 2016 9.679016 0.6351890 22.345578 0.20260287 51.015789
## Nov 2016 6.541743 0.3977699 15.094209 0.11960681 33.633448
## Dec 2016 5.166285 0.2852654 12.151004 0.09224016 26.789161
In both of the series, there are simultaneously increasing patterns in the plot. The correlation is 0.6970439 which indicates the series is at least moderately correlated. However, sample CCF after prewhitening shows no significant lags.The prewhitening process removed all existing correlation structure between Melbourne’s property price and Victoria’s population change. The correlation between these two series is spurious.
data <- read.csv("C:/RMIT/Forecasting S2 19/Assignment 2/data2.csv")
data <- ts(data, start = c(2003,3), frequency = 4)
price <- data[,2]
change <- data[,3]
data.joint <- ts.intersect(price, change)
plot(data.joint, yax.flip=T)
cor(data.joint)
## price change
## price 1.0000000 0.6970439
## change 0.6970439 1.0000000
# In both of the series, there are simultaneously increasing patterns in the plot. The correlation is 0.6970439 which indicates the series is at least moderately correlated.
ccf(as.vector(price), as.vector(change), ylab='CCF', main="Sample CCF between quarterly Property Price and population change in Vic Sep 2003 to Dec 2016")
# According to CCF, the cross-correlations are significant. We'll prewhiten these series to kill all existing correlation between the series.
# Prewhitening
data.dif <- ts.intersect(diff(diff(price,4)),diff(diff(change,4)))
prewhiten(as.vector(data.dif[,1]),as.vector(data.dif[,2]),ylab='CCF', main="Sample CCF after prewhitening")
# Sample CCF after prewhitening shows no significant lags.The prewhitening process removed all existing correlation structure between Melbourne's property price and Victoria's population change. The correlation between these two series is spurious.