The goal is to provide the best MASE forecasts for the solar radiation series for the next two years utilising one of the time series regression methods (dLagM package), dynamic linear models (dynlm package), exponential smoothing, and matching state-space models.
Importing the data and then scaling it appropriately
solarRadiation <- read.csv("data1.csv")
solarRad <- ts(solarRadiation$solar, start = c(1960,1), frequency = 12)
prec <- ts(solarRadiation$ppt, start= c(1960,1), frequency = 12)
solarprec <- ts.intersect(solarRad, prec) #Joining time series of radiation and precipitation
forecastData <- read.csv("data.x.csv")
forecastData_ts <- ts(forecastData)
#Scaling the data as they are on different scales
scaled_solarPrec = scale(solarprec)
par(oma=c(0,0,2,0))
plot(scaled_solarPrec, plot.type="s", ,col = c("steelblue4", "sienna1"),
main = "Solar Radiation vs Precipitation over years")
legend("topleft",lty=1, text.width = 11, col=c("steelblue4", "sienna1"), c("Solar Radiation", "prec"))
ACF, PACF and seasonality check to understand the time series. All three are important in order to understand which model could be fitted to this particular series’.
#ACF and PACF
par(mar=c(4,4.5,5,1),mfrow=c(1,2),cex.main=0.75, cex.lab=0.75, cex.axis=0.75)
acf(solarRad, main = "ACF plot")
pacf(solarRad, main = "PACF plot")
We observe some interesting things from the ACF and PACF plots. * There seems to be some kind of seasonality, as noticed due to the wavy pattern of the ACF plot * Existence of successive points indicates moving average patterns * Changing variance is not visible * No intervention at any points * Possibility of a trend as seen on the PACF plot’s lengthier first line
Seasonality check
#Seasonality check
plotSeasFac(x12(solarRad))
Solar radiation moves up during the middle of the year while it is at it’s lowest during the start and end of the year. There is a curve which is at its highest during the June and July.
Now for our Task’s main objective we need to see how well different models fit with the solar radiation series. We would use different Distributed Lag models, Dynamic Lag models, ETS models and Exponential smoothing models.
We would try 4 different DLM models, namely - * DLM * Poly DLM * Koyk DLM * ARDLM
# 1. DLM
model_dlm <- data.frame(Model=character() , MASE=numeric() ,
BIC= numeric() , AICC=numeric() , AIC=numeric())
for ( i in 1:10){
dlm = dlm( y = as.vector(solarRad) , x = as.vector(prec), q = i )
model_dlm = rbind.fill(model_dlm,
cbind(Model= paste0("DLM q= ",i), MASE=MASE(dlm$model),
BIC=BIC(dlm$model),AICC = AICc(dlm$model),
AIC = AIC(dlm$model)))
}
model_dlm
## Model MASE BIC AICC AIC
## 1 DLM q= 1 1.688457 4746.676 4728.774 4728.713
## 2 DLM q= 2 1.675967 4735.095 4712.741 4712.649
## 3 DLM q= 3 1.662703 4715.478 4688.681 4688.551
## 4 DLM q= 4 1.646357 4695.003 4663.772 4663.600
## 5 DLM q= 5 1.613848 4680.499 4644.845 4644.622
## 6 DLM q= 6 1.607532 4677.837 4637.769 4637.489
## 7 DLM q= 7 1.607042 4677.532 4633.059 4632.716
## 8 DLM q= 8 1.604806 4675.267 4626.399 4625.986
## 9 DLM q= 9 1.593121 4668.827 4615.573 4615.084
## 10 DLM q= 10 1.577996 4660.858 4603.230 4602.658
No we run the Breush-Godfrey test
bgtest(dlm$model)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: dlm$model
## LM test = 522.18, df = 1, p-value < 2.2e-16
checkresiduals(dlm$model$residuals, test = F)
# 2. Poly DLM
model_dlm <- data.frame(Model=character(),MASE=numeric(),
BIC= numeric(),AICC=numeric(),AIC=numeric())
for ( i in 1:10){
polydlm = polyDlm( y = as.vector(solarRad) , x = as.vector(prec), q = i, k= 2, show.beta = FALSE )
model_dlm = rbind.fill(model_dlm,
cbind(Model= paste0("Poly DLM q = ",i, " k = 2" ), MASE=MASE(polydlm$model),
BIC=BIC(polydlm$model),
AICC =AICc(polydlm$model),
AIC = AIC(polydlm$model)))
}
model_dlm
## Model MASE BIC AICC AIC
## 1 Poly DLM q = 1 k = 2 1.688457 4746.676 4728.774 4728.713
## 2 Poly DLM q = 2 k = 2 1.675967 4735.095 4712.741 4712.649
## 3 Poly DLM q = 3 k = 2 1.666349 4711.457 4689.111 4689.018
## 4 Poly DLM q = 4 k = 2 1.653286 4687.171 4664.833 4664.741
## 5 Poly DLM q = 5 k = 2 1.631939 4667.673 4645.342 4645.250
## 6 Poly DLM q = 6 k = 2 1.619987 4656.942 4634.619 4634.526
## 7 Poly DLM q = 7 k = 2 1.619091 4650.382 4628.067 4627.974
## 8 Poly DLM q = 8 k = 2 1.619034 4642.870 4620.563 4620.470
## 9 Poly DLM q = 9 k = 2 1.608805 4630.253 4607.954 4607.861
## 10 Poly DLM q = 10 k = 2 1.592555 4614.289 4591.997 4591.904
bgtest(dlm$model)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: dlm$model
## LM test = 522.18, df = 1, p-value < 2.2e-16
checkresiduals(dlm$model)
##
## Breusch-Godfrey test for serial correlation of order up to 15
##
## data: Residuals
## LM test = 588.43, df = 15, p-value < 2.2e-16
# 3.Koyk DLM
koyck = koyckDlm(y=as.vector(solarRad),x=as.vector(prec))
model_dlm <- data.frame(Model=character(),MASE=numeric(),
BIC= numeric(),AICC=numeric(),AIC=numeric())
attr(koyck$model, "class")="lm"
Koyckaic <- AIC(koyck$model)
model_dlm = rbind.fill(model_dlm,cbind(Model="Koyck",MASE=MASE(koyck),
BIC = NA,
AICC = NA,
AIC = Koyckaic ))
model_dlm
## Model MASE BIC AICC AIC
## 1 Koyck 1.032483 NA NA 3946.476
# 4. ARDLM
model_dlm <- data.frame(Model=character(),MASE=numeric(),
BIC= numeric(),AICC=numeric(),AIC=numeric())
for ( i in 1:10){
ardlm = ardlDlm( y = as.vector(solarRad) , x = as.vector(prec), p = i, q= i)
model_dlm = rbind.fill(model_dlm,
cbind(Model= paste0("ARDLM q = ",i, " k = ",i ), MASE=MASE(ardlm),
BIC=BIC(ardlm$model),
AICC =AICc(ardlm$model),
AIC = AIC(ardlm$model)))
}
model_dlm
## Model MASE BIC AICC AIC
## 1 ARDLM q = 1 k = 1 0.8392434 3734.765 3712.403 3712.311
## 2 ARDLM q = 2 k = 2 0.4951319 3260.476 3229.224 3229.051
## 3 ARDLM q = 3 k = 3 0.4737144 3179.798 3139.687 3139.409
## 4 ARDLM q = 4 k = 4 0.4665123 3180.772 3131.834 3131.424
## 5 ARDLM q = 5 k = 5 0.4479311 3156.177 3098.445 3097.877
## 6 ARDLM q = 6 k = 6 0.4205664 3098.223 3031.728 3030.976
## 7 ARDLM q = 7 k = 7 0.4103932 3078.600 3003.377 3002.413
## 8 ARDLM q = 8 k = 8 0.4064415 3078.922 2995.003 2993.801
## 9 ARDLM q = 9 k = 9 0.4042283 3069.631 2977.052 2975.583
## 10 ARDLM q = 10 k = 10 0.3996089 3065.571 2964.364 2962.601
ardl10 = ardlDlm(y=as.vector(solarRad),x=as.vector(prec),p = 10,q = 10)
checkresiduals(ardl10$model, test = F)
Y.t = solarRad
X.t = prec
dynlm1 = dynlm(Y.t ~ L(Y.t ,1 ) + trend(Y.t) + season(Y.t))
summary(dynlm1)
##
## Time series regression with "ts" data:
## Start = 1960(2), End = 2014(12)
##
## Call:
## dynlm(formula = Y.t ~ L(Y.t, 1) + trend(Y.t) + season(Y.t))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8501 -0.9906 -0.1119 0.8325 16.8276
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2105921 0.3640843 3.325 0.000934 ***
## L(Y.t, 1) 0.9323820 0.0142258 65.542 < 2e-16 ***
## trend(Y.t) -0.0006813 0.0056406 -0.121 0.903905
## season(Y.t)Feb 1.9262673 0.4397952 4.380 1.39e-05 ***
## season(Y.t)Mar 4.8864291 0.4420720 11.053 < 2e-16 ***
## season(Y.t)Apr 3.7894322 0.4563152 8.304 5.90e-16 ***
## season(Y.t)May 5.1503064 0.4741868 10.861 < 2e-16 ***
## season(Y.t)Jun 3.3685316 0.5051379 6.669 5.56e-11 ***
## season(Y.t)Jul 1.6296826 0.5265113 3.095 0.002052 **
## season(Y.t)Aug -2.1600321 0.5340317 -4.045 5.87e-05 ***
## season(Y.t)Sep -4.7002552 0.5116903 -9.186 < 2e-16 ***
## season(Y.t)Oct -5.8121501 0.4778130 -12.164 < 2e-16 ***
## season(Y.t)Nov -5.0968011 0.4512694 -11.294 < 2e-16 ***
## season(Y.t)Dec -2.8325126 0.4408651 -6.425 2.56e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.295 on 645 degrees of freedom
## Multiple R-squared: 0.9463, Adjusted R-squared: 0.9452
## F-statistic: 874.6 on 13 and 645 DF, p-value: < 2.2e-16
dynlm2 = dynlm(Y.t ~ L(Y.t ,1 ) + L(Y.t ,2 ) + trend(Y.t) + season(Y.t))
summary(dynlm2)
##
## Time series regression with "ts" data:
## Start = 1960(3), End = 2014(12)
##
## Call:
## dynlm(formula = Y.t ~ L(Y.t, 1) + L(Y.t, 2) + trend(Y.t) + season(Y.t))
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.2013 -1.0035 -0.0807 0.8270 16.9555
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.8122916 0.3719451 4.872 1.39e-06 ***
## L(Y.t, 1) 1.1319003 0.0385108 29.392 < 2e-16 ***
## L(Y.t, 2) -0.2141527 0.0385114 -5.561 3.94e-08 ***
## trend(Y.t) -0.0006783 0.0055291 -0.123 0.902394
## season(Y.t)Feb 1.3206473 0.4468083 2.956 0.003233 **
## season(Y.t)Mar 3.8878770 0.4681882 8.304 5.95e-16 ***
## season(Y.t)Apr 2.2727488 0.5231550 4.344 1.62e-05 ***
## season(Y.t)May 4.0033234 0.5077132 7.885 1.35e-14 ***
## season(Y.t)Jun 2.0598056 0.5474589 3.762 0.000184 ***
## season(Y.t)Jul 0.8170809 0.5354520 1.526 0.127510
## season(Y.t)Aug -2.5446968 0.5269572 -4.829 1.72e-06 ***
## season(Y.t)Sep -4.3021817 0.5054747 -8.511 < 2e-16 ***
## season(Y.t)Oct -4.9890230 0.4900800 -10.180 < 2e-16 ***
## season(Y.t)Nov -4.1995146 0.4698438 -8.938 < 2e-16 ***
## season(Y.t)Dec -2.2469226 0.4438227 -5.063 5.41e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.245 on 643 degrees of freedom
## Multiple R-squared: 0.9487, Adjusted R-squared: 0.9476
## F-statistic: 849.5 on 14 and 643 DF, p-value: < 2.2e-16
dynlm3 = dynlm(Y.t ~ L(Y.t ,1 ) + L(Y.t ,2 ) + X.t + trend(Y.t) + season(Y.t))
summary(dynlm3)
##
## Time series regression with "ts" data:
## Start = 1960(3), End = 2014(12)
##
## Call:
## dynlm(formula = Y.t ~ L(Y.t, 1) + L(Y.t, 2) + X.t + trend(Y.t) +
## season(Y.t))
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.1317 -0.9985 -0.0884 0.8337 17.0093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.983337 0.436286 4.546 6.54e-06 ***
## L(Y.t, 1) 1.131707 0.038525 29.376 < 2e-16 ***
## L(Y.t, 2) -0.214438 0.038526 -5.566 3.83e-08 ***
## X.t -0.243985 0.324990 -0.751 0.453081
## trend(Y.t) -0.000493 0.005536 -0.089 0.929069
## season(Y.t)Feb 1.299241 0.447869 2.901 0.003848 **
## season(Y.t)Mar 3.862038 0.469610 8.224 1.09e-15 ***
## season(Y.t)Apr 2.245544 0.524586 4.281 2.15e-05 ***
## season(Y.t)May 3.952718 0.512339 7.715 4.64e-14 ***
## season(Y.t)Jun 1.958115 0.564147 3.471 0.000553 ***
## season(Y.t)Jul 0.685603 0.563537 1.217 0.224201
## season(Y.t)Aug -2.680867 0.557469 -4.809 1.89e-06 ***
## season(Y.t)Sep -4.420256 0.529541 -8.347 4.29e-16 ***
## season(Y.t)Oct -5.038937 0.494734 -10.185 < 2e-16 ***
## season(Y.t)Nov -4.206824 0.470104 -8.949 < 2e-16 ***
## season(Y.t)Dec -2.221271 0.445286 -4.988 7.85e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.245 on 642 degrees of freedom
## Multiple R-squared: 0.9488, Adjusted R-squared: 0.9476
## F-statistic: 792.3 on 15 and 642 DF, p-value: < 2.2e-16
dynlm4 = dynlm(Y.t ~ L(Y.t ,1 ) + L(Y.t ,2 ) +
X.t + L(X.t,1) + trend(Y.t) + season(Y.t))
summary(dynlm4)
##
## Time series regression with "ts" data:
## Start = 1960(3), End = 2014(12)
##
## Call:
## dynlm(formula = Y.t ~ L(Y.t, 1) + L(Y.t, 2) + X.t + L(X.t, 1) +
## trend(Y.t) + season(Y.t))
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.1184 -1.0169 -0.1062 0.8286 16.8855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0433705 0.4559411 4.482 8.77e-06 ***
## L(Y.t, 1) 1.1310883 0.0385724 29.324 < 2e-16 ***
## L(Y.t, 2) -0.2139682 0.0385638 -5.548 4.22e-08 ***
## X.t -0.0692011 0.5023997 -0.138 0.890488
## L(X.t, 1) -0.2291443 0.5020643 -0.456 0.648254
## trend(Y.t) -0.0004628 0.0055403 -0.084 0.933450
## season(Y.t)Feb 1.2931622 0.4483428 2.884 0.004054 **
## season(Y.t)Mar 3.8426013 0.4718258 8.144 2.00e-15 ***
## season(Y.t)Apr 2.2243271 0.5269639 4.221 2.78e-05 ***
## season(Y.t)May 3.9468506 0.5128165 7.696 5.31e-14 ***
## season(Y.t)Jun 1.9674717 0.5648678 3.483 0.000529 ***
## season(Y.t)Jul 0.6672213 0.5653213 1.180 0.238338
## season(Y.t)Aug -2.7257717 0.5664231 -4.812 1.86e-06 ***
## season(Y.t)Sep -4.4859263 0.5490569 -8.170 1.64e-15 ***
## season(Y.t)Oct -5.1393686 0.5417433 -9.487 < 2e-16 ***
## season(Y.t)Nov -4.2751836 0.4936640 -8.660 < 2e-16 ***
## season(Y.t)Dec -2.2728364 0.4596625 -4.945 9.76e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.247 on 641 degrees of freedom
## Multiple R-squared: 0.9488, Adjusted R-squared: 0.9475
## F-statistic: 741.9 on 16 and 641 DF, p-value: < 2.2e-16
dynlm5 = dynlm(Y.t ~ L(Y.t ,1 ) + season(Y.t))
summary(dynlm5)
##
## Time series regression with "ts" data:
## Start = 1960(2), End = 2014(12)
##
## Call:
## dynlm(formula = Y.t ~ L(Y.t, 1) + season(Y.t))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8554 -1.0002 -0.1205 0.8259 16.8225
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.19188 0.32924 3.620 0.000317 ***
## L(Y.t, 1) 0.93237 0.01421 65.592 < 2e-16 ***
## season(Y.t)Feb 1.92656 0.43945 4.384 1.36e-05 ***
## season(Y.t)Mar 4.88669 0.44173 11.063 < 2e-16 ***
## season(Y.t)Apr 3.78970 0.45596 8.311 5.58e-16 ***
## season(Y.t)May 5.15056 0.47382 10.870 < 2e-16 ***
## season(Y.t)Jun 3.36878 0.50475 6.674 5.36e-11 ***
## season(Y.t)Jul 1.62991 0.52611 3.098 0.002032 **
## season(Y.t)Aug -2.15985 0.53362 -4.048 5.80e-05 ***
## season(Y.t)Sep -4.70016 0.51130 -9.193 < 2e-16 ***
## season(Y.t)Oct -5.81217 0.47745 -12.173 < 2e-16 ***
## season(Y.t)Nov -5.09695 0.45092 -11.303 < 2e-16 ***
## season(Y.t)Dec -2.83277 0.44052 -6.430 2.47e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.293 on 646 degrees of freedom
## Multiple R-squared: 0.9463, Adjusted R-squared: 0.9453
## F-statistic: 948.9 on 12 and 646 DF, p-value: < 2.2e-16
model_dlm <- data.frame(Model=character(),MASE=numeric(),
BIC= numeric(),AICC=numeric(),AIC=numeric())
model_dlm = rbind(model_dlm,cbind(Model="DYNLM 5",
BIC = BIC(dynlm5),
AICC = AICc(dynlm5),
AIC = AIC(dynlm5)))
model_dlm
## Model BIC AICC AIC
## 1 DYNLM 5 3041.90343987038 2979.68548430039 2979.03331038734
checkresiduals(dynlm5 ,test = F)
model_dlm <- data.frame(Model=character() , MASE=numeric() ,
BIC= numeric() , AICC=numeric() , AIC=numeric())
hw_addi = hw(solarRad,seasonal="additive")
hw_multip = hw(solarRad,seasonal="multiplicative")
hw_multipDmp = hw(solarRad,seasonal="multiplicative" , damped = T)
hw_multiDmpExp = hw(solarRad,seasonal="multiplicative" , damped = T, exponential = T)
hw_multipExp = hw(solarRad,seasonal="multiplicative" , damped = F, exponential = T)
model_dlm = rbind(model_dlm,cbind(Model="hw_addi",MASE=accuracy(hw_addi)[6],
BIC = hw_addi$model$bic,
AICC =hw_addi$model$aicc,
AIC = hw_addi$model$aic))
model_dlm = rbind(model_dlm,cbind(Model="hw_multip",MASE=accuracy(hw_multip)[6],
BIC = hw_multip$model$bic,
AICC = hw_multip$model$aicc,
AIC = hw_multip$model$aic))
model_dlm = rbind(model_dlm,cbind(Model="hw_multipDmp",MASE=accuracy(hw_multipDmp)[6],
BIC = hw_multipDmp$model$bic,
AICC = hw_multipDmp$model$aicc,
AIC = hw_multipDmp$model$aic))
model_dlm = rbind(model_dlm,cbind(Model="hw_multiDmpExp",MASE=accuracy(hw_multiDmpExp)[6],
BIC = hw_multiDmpExp$model$bic,
AICC = hw_multiDmpExp$model$aicc,
AIC = hw_multiDmpExp$model$aic))
model_dlm = rbind(model_dlm,cbind(Model="hw_multipExp",MASE=accuracy(hw_multipExp)[6],
BIC = hw_multipExp$model$bic,
AICC = hw_multipExp$model$aicc,
AIC = hw_multipExp$model$aic))
model_dlm
## Model MASE BIC AICC
## 1 hw_addi 0.247160046952369 5511.07634885595 5435.66154268864
## 2 hw_multip 0.223307745945573 6725.11394280828 6649.69913664097
## 3 hw_multipDmp 0.203561894680514 6447.56127723734 6367.76804289028
## 4 hw_multiDmpExp 0.203200910880705 6446.18950399393 6366.39626964687
## 5 hw_multipExp 0.23204037862927 6660.57583100187 6585.16102483456
## AIC
## 1 5434.7082716606
## 2 6648.74586561293
## 3 6366.70096020697
## 4 6365.32918696357
## 5 6584.20775380652
hw_multipDmp = hw(solarRad,seasonal="multiplicative" , damped = T)
checkresiduals(hw_multipDmp,test = F)
hw_multipExp = hw(solarRad,seasonal="multiplicative" , damped = F, exponential = T)
checkresiduals(hw_multipExp,test = F)
# ETS
etsmmm <- ets(solarRad, model = "MMM", damped = TRUE)
summary(etsmmm)
## ETS(M,Md,M)
##
## Call:
## ets(y = solarRad, model = "MMM", damped = TRUE)
##
## Smoothing parameters:
## alpha = 0.7892
## beta = 0.016
## gamma = 0.0696
## phi = 0.9575
##
## Initial states:
## l = 9.2782
## b = 1.1745
## s = 0.7133 0.3063 0.6852 1.1282 1.2133 1.5082
## 1.7211 1.2962 1.1358 0.9309 0.656 0.7056
##
## sigma: 0.2341
##
## AIC AICc BIC
## 5995.550 5996.617 6076.410
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02448728 3.076938 1.948599 -5.645612 16.87559 0.3201193
## ACF1
## Training set 0.1452617
checkresiduals(etsmmm)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Md,M)
## Q* = 79.153, df = 7, p-value = 2.054e-14
##
## Model df: 17. Total lags used: 24
# Fit state-space models
modelAuto <- ets(solarRad)
summary(modelAuto)
## ETS(A,Ad,A)
##
## Call:
## ets(y = solarRad)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 1e-04
## gamma = 1e-04
## phi = 0.9388
##
## Initial states:
## l = 11.154
## b = 0.7632
## s = -10.4919 -8.137 -3.348 2.5794 8.08 11.1219
## 9.9586 6.9916 1.9573 -1.8565 -7.1607 -9.6946
##
## sigma: 2.3446
##
## AIC AICc BIC
## 5428.422 5429.489 5509.282
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01091357 2.314163 1.498521 -1.468083 12.44796 0.2461797
## ACF1
## Training set 0.1700724
models = c("MAM", "MAA", "AAA")
dampeds = c(TRUE, FALSE)
expand = expand.grid(models, dampeds)
fit.AIC = array(NA,9)
levels = array(NA, dim=c(9,2))
for (i in 1:6){
fit.AIC[i] = ets(solarRad, model = toString(expand[i , 1]),
damped = expand[i , 2])$aic
levels[i, 1] = toString(expand[i , 1])
levels[i, 2] = expand[i , 2]
}
fit.AIC[7] = ets(solarRad, model = "ANA")$aic
fit.AIC[8] = ets(solarRad, model = "MNA")$aic
fit.AIC[9] = ets(solarRad, model = "MNM")$aic
levels[7,1] = "ANA"
levels[8,1] = "MNA"
levels[9,1] = "MNM"
levels[7,2] = FALSE
levels[8,2] = FALSE
levels[9,2] = FALSE
results = data.frame(levels, fit.AIC)
colnames(results) = c("Model","Damped","AIC")
results
## Model Damped AIC
## 1 MAM TRUE 5953.502
## 2 MAA TRUE 6469.079
## 3 AAA TRUE 5428.422
## 4 MAM FALSE 6105.959
## 5 MAA FALSE 7602.755
## 6 AAA FALSE 5434.708
## 7 ANA FALSE 5449.974
## 8 MNA FALSE 6546.856
## 9 MNM FALSE 5986.778
modelaada <- ets(solarRad, model = "AAA", damped = TRUE)
summary(modelaada)
## ETS(A,Ad,A)
##
## Call:
## ets(y = solarRad, model = "AAA", damped = TRUE)
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 1e-04
## gamma = 1e-04
## phi = 0.9388
##
## Initial states:
## l = 11.154
## b = 0.7632
## s = -10.4919 -8.137 -3.348 2.5794 8.08 11.1219
## 9.9586 6.9916 1.9573 -1.8565 -7.1607 -9.6946
##
## sigma: 2.3446
##
## AIC AICc BIC
## 5428.422 5429.489 5509.282
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01091357 2.314163 1.498521 -1.468083 12.44796 0.2461797
## ACF1
## Training set 0.1700724
checkresiduals(modelaada)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,A)
## Q* = 210.76, df = 7, p-value < 2.2e-16
##
## Model df: 17. Total lags used: 24
modelaaa <- ets(solarRad, model = "AAA", damped = FALSE)
summary(modelaaa)
## ETS(A,A,A)
##
## Call:
## ets(y = solarRad, model = "AAA", damped = FALSE)
##
## Smoothing parameters:
## alpha = 0.9993
## beta = 0.0019
## gamma = 1e-04
##
## Initial states:
## l = 11.3306
## b = 0.209
## s = -10.7542 -8.4968 -3.227 3.0768 7.9264 10.9122
## 10.1422 7.3322 2.2353 -1.9528 -7.3626 -9.8316
##
## sigma: 2.3575
##
## AIC AICc BIC
## 5434.708 5435.662 5511.076
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.1179476 2.328736 1.504489 -2.230359 12.68101 0.24716 0.1703355
checkresiduals(modelaaa)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,A,A)
## Q* = 205.55, df = 8, p-value < 2.2e-16
##
## Model df: 16. Total lags used: 24
modelana <- ets(solarRad, model = "ANA")
summary(modelana)
## ETS(A,N,A)
##
## Call:
## ets(y = solarRad, model = "ANA")
##
## Smoothing parameters:
## alpha = 0.9997
## gamma = 3e-04
##
## Initial states:
## l = 22.6142
## s = -10.2887 -8.5269 -4.0534 1.7184 7.355 10.8208
## 10.4774 7.7101 2.4749 -1.7156 -6.7936 -9.1783
##
## sigma: 2.3884
##
## AIC AICc BIC
## 5449.974 5450.719 5517.358
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.0108645 2.362974 1.55041 -2.640142 12.95336 0.254704 0.1825724
checkresiduals(modelana)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 207.44, df = 10, p-value < 2.2e-16
##
## Model df: 14. Total lags used: 24
modelmadm <- ets(solarRad, model = "MAM", damped = TRUE)
summary(modelmadm)
## ETS(M,Ad,M)
##
## Call:
## ets(y = solarRad, model = "MAM", damped = TRUE)
##
## Smoothing parameters:
## alpha = 0.7922
## beta = 6e-04
## gamma = 0.0707
## phi = 0.98
##
## Initial states:
## l = 9.3693
## b = 1.2404
## s = 0.6953 0.3066 0.5802 0.9879 1.3594 1.5469
## 1.4723 1.4055 1.227 0.9573 0.6959 0.7656
##
## sigma: 0.2274
##
## AIC AICc BIC
## 5953.502 5954.569 6034.363
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1750605 3.02791 1.961614 -5.497209 17.21119 0.3222574 0.2565219
checkresiduals(modelmadm)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,M)
## Q* = 102.28, df = 7, p-value < 2.2e-16
##
## Model df: 17. Total lags used: 24
modelmnm <- ets(solarRad, model = "MNM")
summary(modelmnm)
## ETS(M,N,M)
##
## Call:
## ets(y = solarRad, model = "MNM")
##
## Smoothing parameters:
## alpha = 0.7497
## gamma = 0.072
##
## Initial states:
## l = 22.6418
## s = 0.729 0.3074 0.6168 0.9928 1.2851 1.5457
## 1.5509 1.5263 1.1103 0.9424 0.6392 0.7541
##
## sigma: 0.235
##
## AIC AICc BIC
## 5986.778 5987.524 6054.162
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.2103645 2.971741 1.909662 -4.962262 16.94307 0.3137226 0.2400707
checkresiduals(modelmnm)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,N,M)
## Q* = 77.462, df = 10, p-value = 1.575e-12
##
## Model df: 14. Total lags used: 24
We are going to use the best models from above as mentioned and then use those to forecast the dataset “data.x.csv”, which we have already loaded into the variable “forecastData” and subsequently converted into timeseries “forecastData_ts”.
We are going to use only 3 models to forecast which are- * Koyk’s DLM * Holt Winter’s exponential multiplicative * ETS (A,Ad,A)
koyck= koyckDlm(y=as.vector(solarRad),x=as.vector(prec))
hw_multipExp = hw(solarRad,seasonal="multiplicative" , damped = F, exponential = T)
koyckforecast = forecast(koyck, x = as.vector(forecastData_ts), h = 24)$forecasts
hwforecast = hw_multipExp$mean
etsforecast = as.vector(forecast(modelaada,h=24)$mean)
y_x = c(prec,forecastData_ts)
y_koyck = c(solarRad,koyckforecast)
y_hw = c(solarRad, hwforecast)
y_ets = c(solarRad,etsforecast)
Dataforecast = ts.intersect(
ts(y_x,start=c(1960,1),frequency = 12),
ts(y_koyck,start=c(1960,1),frequency = 12),
ts(y_hw,start=c(1960,1),frequency = 12),
ts(y_ets,start=c(1960,1),frequency = 12)
)
colnames(Dataforecast) = c ("prec", "Koyck", "Holt Winters" , "ETS")
par(mar=c(4,4.5,5,1),cex.main=0.75, cex.lab=0.75, cex.axis=0.75)
ts.plot(Dataforecast,xlim=c(2014,2016),ylim=c(0,40),
gpars= list(col=RColorBrewer::brewer.pal(7, "Set1")),
plot.type = c("single"),
main = "Forecasting of Solar Radtiation using the best fit models")
legend("topleft",text.width = 5, lty = 1, adj = c(0, 0.6),
bty = "n",col=RColorBrewer::brewer.pal(8, "Set1"),
cex = 0.65, c("prec", "Koyck", "HW" , "ETS (A,Ad,A)"))
propertyData <- read.csv("data2.csv")
propertyData <- ts(propertyData, start = c(2003,3), frequency = 4)
price <- propertyData[,2]
change <- propertyData[,3]
propertyJoint <- ts.intersect(price, change)
plot(propertyJoint, yax.flip=T)
cor(propertyJoint)
## price change
## price 1.0000000 0.6970439
## change 0.6970439 1.0000000
ccf(as.vector(price), as.vector(change), ylab='CCF',
main="Sample CCF between quarterly Property Price and population change in Vic Sep 2003 to Dec 2016")
propertyDiff <- ts.intersect(diff(diff(price,4)),diff(diff(change,4)))
prewhiten(as.vector(propertyDiff[,1]),as.vector(propertyDiff[,2]),
ylab='CCF', main="Sample CCF after prewhitening")