library(astsa)
library(fGarch)
## Loading required package: timeDate
## Loading required package: timeSeries
## Loading required package: fBasics
##
## Attaching package: 'fBasics'
## The following object is masked from 'package:astsa':
##
## nyse
# set your working directory
load("Electricity.RData")
plot.ts(electricity)
log.electricity = log(electricity)
plot.ts(log.electricity)
diff.log.electricity = diff(log.electricity, lag = 1)
plot.ts(diff.log.electricity)
par(mfrow=c(1,2))
acf(diff.log.electricity, lag=60)
pacf(diff.log.electricity, lag=60)
sdiff.diff.log.electricity=diff(diff.log.electricity, lag=12)
par(mfrow=c(1,1))
plot.ts(sdiff.diff.log.electricity)
par(mfrow=c(1,2))
acf(sdiff.diff.log.electricity, lag=60)
pacf(sdiff.diff.log.electricity, lag=60)
There is a seasonal cutoff on the ACF at lag 1. Hence we get Q=1. As for non-seasonal cutoff it is seen at either lag 2 or 3 on the ACF so we will try both q=2 and q=3. Models to be tried are: ARIMA(0,1,2) * ARIMA(0,1,1) and ARIMA(0,1,3) * ARIMA(0,1,1)
electricity.sarima = sarima(log.electricity, p=0, d=1, q=2, P=0, D=1, Q=1, S=12)
## initial value -3.259231
## iter 2 value -3.522197
## iter 3 value -3.579146
## iter 4 value -3.599099
## iter 5 value -3.604570
## iter 6 value -3.606041
## iter 7 value -3.607197
## iter 8 value -3.607284
## iter 9 value -3.607286
## iter 10 value -3.607287
## iter 10 value -3.607287
## iter 10 value -3.607287
## final value -3.607287
## converged
## initial value -3.622337
## iter 2 value -3.625238
## iter 3 value -3.626447
## iter 4 value -3.626677
## iter 5 value -3.626715
## iter 6 value -3.626734
## iter 7 value -3.626735
## iter 7 value -3.626735
## iter 7 value -3.626735
## final value -3.626735
## converged
electricity.sarima2 = sarima(log.electricity, p=0, d=1, q=3, P=0, D=1, Q=1, S=12)
## initial value -3.259231
## iter 2 value -3.511239
## iter 3 value -3.578893
## iter 4 value -3.598227
## iter 5 value -3.603412
## iter 6 value -3.606701
## iter 7 value -3.608362
## iter 8 value -3.608443
## iter 9 value -3.608447
## iter 10 value -3.608450
## iter 10 value -3.608450
## final value -3.608450
## converged
## initial value -3.623348
## iter 2 value -3.626447
## iter 3 value -3.627437
## iter 4 value -3.627521
## iter 5 value -3.627759
## iter 6 value -3.627893
## iter 7 value -3.627899
## iter 7 value -3.627899
## iter 7 value -3.627899
## final value -3.627899
## converged
We observe that ACF of residuals are within the threshold for both the above models. Both the models look quite simlar. As per p values for Ljung-Box statistic they are low between 0.0-0.2,which may because of small data set.
electricity.sarima$AIC
## [1] -4.27201
electricity.sarima$BIC
## [1] -4.231928
electricity.sarima2$AIC
## [1] -4.269197
electricity.sarima2$BIC
## [1] -4.219095
AIC and BIC values for model with q=2 is lower than that of model with q=2.Also we saw earlier that ACF of residuals plot was also good and it more simpler. Hence we select this model with q=2 as our best model selected for forecasting.
pred.electricity = sarima.for(log.electricity, n.ahead=4, p=0, d=1, q=2, P=0, D=1, Q=1, S=12)
pred.electricity
## $pred
## Jan Feb Mar Apr
## 2006 12.79075 12.65921 12.69988 12.63031
##
## $se
## Jan Feb Mar Apr
## 2006 0.02607121 0.03015368 0.03109173 0.03200230
exp(pred.electricity$pred)
## Jan Feb Mar Apr
## 2006 358882.1 314647.2 327707.6 305684.4
Taking exponential to remove the log transformation of the data done earlier and get prediction values.
data(gtemp)
train.gtemp = ts(gtemp[1:120]) # train set: for the model selection and fit
test.gtemp = ts(gtemp[121:130]) # test set: only for the evaluation (for calculating prediction error)
plot.ts(train.gtemp)
We can see that there is a trend and it is additive or A. There appears no seasonality so third parameter is N. For the error term we will try with both Additive and Multiplicative A and M.
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
## The following object is masked from 'package:astsa':
##
## gas
train.gtemp.ets = ets(train.gtemp, model="AAN")
train.gtemp.ets2 = ets(train.gtemp)
train.gtemp.ets$aic
## [1] 27.15076
train.gtemp.ets$bic
## [1] 41.08821
train.gtemp.ets2$aic
## [1] 25.59878
train.gtemp.ets2$bic
## [1] 33.96126
#train.gtemp.ets = ets(train.gtemp, model="MAN")
It seems that MAN model doesnt work, so we have run AAN and auto ETS. ETS auto function selected ANN as the appropriate model. So making a comparision of AAN and ANN models in terms of BIC and AIC, we observe that ANN model has lower AIC and BIC. Hence, our final model selected for forecasting is ANN.
pred.ets.gtemp = forecast(train.gtemp.ets, h=10)
plot(pred.ets.gtemp)
print(pred.ets.gtemp$mean)
## Time Series:
## Start = 121
## End = 130
## Frequency = 1
## [1] 0.3740666 0.3791551 0.3842436 0.3893321 0.3944206 0.3995092 0.4045977
## [8] 0.4096862 0.4147747 0.4198632
print(mean((pred.ets.gtemp$mean - as.vector(test.gtemp))^2))
## [1] 0.01915633
pred.ets.gtemp = forecast(train.gtemp.ets2, h=10)
plot(pred.ets.gtemp)
print(pred.ets.gtemp$mean)
## Time Series:
## Start = 121
## End = 130
## Frequency = 1
## [1] 0.3762138 0.3762138 0.3762138 0.3762138 0.3762138 0.3762138 0.3762138
## [8] 0.3762138 0.3762138 0.3762138
print(mean((pred.ets.gtemp$mean - as.vector(test.gtemp))^2))
## [1] 0.02519147
We observe that prediction MSE error is 0.02519
pred.arima.gtemp = sarima.for(train.gtemp, 10, p=0, d=1, q=2)
pred.arima.gtemp$pred
## Time Series:
## Start = 121
## End = 130
## Frequency = 1
## [1] 0.3305345 0.3581846 0.3635754 0.3689661 0.3743569 0.3797477 0.3851385
## [8] 0.3905293 0.3959201 0.4013109
print(mean((pred.arima.gtemp$pred - as.vector(test.gtemp))^2))
## [1] 0.02408861
We observe that prediction MSE error is 0.02408
The ARIMA model has lower MSE compared to the ETS model however the difference is very small. ### Exercise 3: Exponential Smoothing Model ### (a) By visually checking first 370 time series plot, decide which ESM model seems the most appropriate. Explain your choice.
train.electricity = ts(electricity[1:370], freq=12) # train set: for the model selection and fit
test.electricity = ts(electricity[371:396], freq=12) # test set: only for the evaluation (for calculating prediction error)
plot.ts(train.electricity)
From time series above we can see that there is a clear trend and it is additive or A. There appears to be a seasonality so third parameter is M. For the error term we will also choose as Multiplicative M. So our model will be MAM
train.electricity.ets = ets(train.electricity, model="MAM")
train.electricity.ets2 = ets(train.electricity)
train.electricity.ets$aic
## [1] 8678.796
train.electricity.ets$bic
## [1] 8749.239
train.electricity.ets2$aic
## [1] 8678.796
train.electricity.ets2$bic
## [1] 8749.239
We observe that ETS auto function selected MAM model which is same as our manual selected mode. Hence, our final model selected for forecasting is MAM.
pred.ets.electricity = forecast(train.electricity, h=26)
plot(pred.ets.electricity)
print(pred.ets.electricity$mean)
## Jan Feb Mar Apr May Jun Jul Aug
## 31
## 32 346744.6 306258.0 317535.3 295170.2 314895.6 340250.6 374844.9 372008.6
## 33 346929.7 306418.0 317697.6 295317.9 315049.8 340413.7 375020.7 372179.4
## Sep Oct Nov Dec
## 31 303293.6 330488.9
## 32 323057.0 307804.4 303462.6 330669.2
## 33 323202.2 307939.7 303593.2 330808.5
print(mean((pred.ets.electricity$mean - as.vector(test.electricity))^2))
## [1] 142228849
We observe that the next 26 predicted values follow the trend and close to latest values. Also prediction MSE error is 142228849.
pred.electricity = sarima.for(log(train.electricity), n.ahead=26, p=0, d=1, q=2, P=0, D=1, Q=1, S=12)
pred.electricity
## $pred
## Jan Feb Mar Apr May Jun Jul Aug
## 31
## 32 12.73824 12.60966 12.65689 12.59127 12.66830 12.75219 12.85707 12.86036
## 33 12.75685 12.62827 12.67550 12.60988 12.68691 12.77080 12.87568 12.87898
## Sep Oct Nov Dec
## 31 12.60011 12.68877
## 32 12.70895 12.64849 12.61628 12.70739
## 33 12.72756 12.66710 12.63489 12.72600
##
## $se
## Jan Feb Mar Apr May Jun Jul
## 31
## 32 0.03133317 0.03233374 0.03330427 0.03424730 0.03516506 0.03605946 0.03693221
## 33 0.04364537 0.04462747 0.04558842 0.04652952 0.04745197 0.04835682 0.04924505
## Aug Sep Oct Nov Dec
## 31 0.02613900 0.03029958
## 32 0.03778481 0.03861858 0.03943474 0.04131902 0.04264065
## 33 0.05011754 0.05097510 0.05181847 0.05358995 0.05488736
exp(pred.electricity$pred)
## Jan Feb Mar Apr May Jun Jul Aug
## 31
## 32 340522.9 299436.7 313918.1 293980.5 317521.8 345308.0 383490.0 384755.5
## 33 346920.2 305062.1 319815.6 299503.4 323486.9 351795.2 390694.5 391983.8
## Sep Oct Nov Dec
## 31 296591.2 324089.3
## 32 330693.6 311292.9 301425.5 330177.8
## 33 336906.2 317141.0 307088.2 336380.8
print(mean((exp(pred.electricity$pred) - as.vector(test.electricity))^2))
## [1] 66245886
We observe from plot above that prediction MSE error is 66245886
We observe that ESM model test MSE is much lower than the ARIMA test MSE. Hence we can conclude that ESM is a better model for selection compared to ARIMA in this case.
load("usdhkd.RData")
#rtn.usdhkd = diff(log(usdhkd))
#plot.ts(rtn.usdhkd)
plot.ts(usdhkd)
ACF on data and squared data
par(mfrow=c(1,2))
acf(usdhkd);pacf(usdhkd)
acf(usdhkd^2);pacf((usdhkd^2))
From plot above we observe a cutoff at lag 1 on the PACF. The ACF tails off to zero, at the same time there appears to be a cutoff at lag1 for ACF too.
usdhkd.m1=garchFit(~garch(1,0), data=usdhkd, trace=F)
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
summary(usdhkd.m1)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(1, 0), data = usdhkd, trace = F)
##
## Mean and Variance Equation:
## data ~ garch(1, 0)
## <environment: 0x7fc7bcbd9e00>
## [data = usdhkd]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu omega alpha1
## -0.00117994 0.00045482 0.49888267
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu -1.180e-03 1.053e-03 -1.120 0.262606
## omega 4.548e-04 4.296e-05 10.588 < 2e-16 ***
## alpha1 4.989e-01 1.336e-01 3.733 0.000189 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 977.8896 normalized: 2.268885
##
## Description:
## Tue May 4 16:04:01 2021 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 4337.001 0
## Shapiro-Wilk Test R W 0.8287354 0
## Ljung-Box Test R Q(10) 20.34018 0.02619446
## Ljung-Box Test R Q(15) 26.32464 0.03474572
## Ljung-Box Test R Q(20) 30.61091 0.06054118
## Ljung-Box Test R^2 Q(10) 9.38141 0.4963288
## Ljung-Box Test R^2 Q(15) 10.35731 0.7966824
## Ljung-Box Test R^2 Q(20) 11.16525 0.9418239
## LM Arch Test R TR^2 9.46025 0.663197
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -4.523850 -4.495547 -4.523946 -4.512675
usdhkd.m2=garchFit(~garch(2,0), data=usdhkd, trace=F)
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
summary(usdhkd.m2)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(2, 0), data = usdhkd, trace = F)
##
## Mean and Variance Equation:
## data ~ garch(2, 0)
## <environment: 0x7fc7c0303138>
## [data = usdhkd]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu omega alpha1 alpha2
## -0.00046702 0.00026598 0.54327969 0.48701087
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu -4.670e-04 8.699e-04 -0.537 0.591
## omega 2.660e-04 3.473e-05 7.658 1.89e-14 ***
## alpha1 5.433e-01 1.323e-01 4.108 3.99e-05 ***
## alpha2 4.870e-01 1.108e-01 4.395 1.11e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 997.0851 normalized: 2.313422
##
## Description:
## Tue May 4 16:04:01 2021 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 1450.006 0
## Shapiro-Wilk Test R W 0.8744017 0
## Ljung-Box Test R Q(10) 18.6254 0.04528692
## Ljung-Box Test R Q(15) 26.25545 0.03542182
## Ljung-Box Test R Q(20) 28.93381 0.08907092
## Ljung-Box Test R^2 Q(10) 8.642008 0.5663814
## Ljung-Box Test R^2 Q(15) 11.90956 0.6858611
## Ljung-Box Test R^2 Q(20) 16.36083 0.6940018
## LM Arch Test R TR^2 9.24412 0.68195
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -4.608283 -4.570547 -4.608454 -4.593384
usdhkd.m3=garchFit(~garch(1,1), data=usdhkd, trace=F)
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
summary(usdhkd.m3)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(1, 1), data = usdhkd, trace = F)
##
## Mean and Variance Equation:
## data ~ garch(1, 1)
## <environment: 0x7fc7c3ca2710>
## [data = usdhkd]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu omega alpha1 beta1
## -1.6576e-04 1.5468e-05 2.4065e-01 8.0449e-01
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu -1.658e-04 8.902e-04 -0.186 0.8523
## omega 1.547e-05 1.904e-05 0.812 0.4167
## alpha1 2.406e-01 1.362e-01 1.767 0.0772 .
## beta1 8.045e-01 1.052e-01 7.648 2.04e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 999.3168 normalized: 2.3186
##
## Description:
## Tue May 4 16:04:01 2021 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 2690.144 0
## Shapiro-Wilk Test R W 0.858535 0
## Ljung-Box Test R Q(10) 20.78275 0.02266047
## Ljung-Box Test R Q(15) 24.94071 0.05074532
## Ljung-Box Test R Q(20) 26.92712 0.1373269
## Ljung-Box Test R^2 Q(10) 3.768855 0.9571724
## Ljung-Box Test R^2 Q(15) 6.011632 0.9795517
## Ljung-Box Test R^2 Q(20) 8.392027 0.9889398
## LM Arch Test R TR^2 4.156909 0.9804356
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -4.618639 -4.580903 -4.618810 -4.603740
usdhkd.m4=garchFit(~garch(2,2), data=usdhkd, trace=F)
## Warning in sqrt(diag(fit$cvar)): NaNs produced
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
summary(usdhkd.m4)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(formula = ~garch(2, 2), data = usdhkd, trace = F)
##
## Mean and Variance Equation:
## data ~ garch(2, 2)
## <environment: 0x7fc7a5bb3248>
## [data = usdhkd]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu omega alpha1 alpha2 beta1 beta2
## -1.0068e-04 1.3247e-05 2.4447e-01 1.0000e-08 5.5623e-01 2.4114e-01
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu -1.007e-04 8.768e-04 -0.115 0.90858
## omega 1.325e-05 9.320e-06 1.421 0.15519
## alpha1 2.445e-01 8.728e-02 2.801 0.00509 **
## alpha2 1.000e-08 NA NA NA
## beta1 5.562e-01 NA NA NA
## beta2 2.411e-01 NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 1000.581 normalized: 2.321534
##
## Description:
## Tue May 4 16:04:02 2021 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 2936.158 0
## Shapiro-Wilk Test R W 0.859941 0
## Ljung-Box Test R Q(10) 21.39029 0.01853056
## Ljung-Box Test R Q(15) 25.39566 0.04487562
## Ljung-Box Test R Q(20) 27.34351 0.1258733
## Ljung-Box Test R^2 Q(10) 3.013569 0.9811029
## Ljung-Box Test R^2 Q(15) 5.023137 0.9919287
## Ljung-Box Test R^2 Q(20) 7.216473 0.9959124
## LM Arch Test R TR^2 3.459938 0.9913337
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -4.615226 -4.558622 -4.615607 -4.592877
We select model as GARCH(1,1) because Squared residual terms for R squared are uncorrelated and AIC/BIC is least for them among all the 4 models and also it has less complex compared to GARCH(2,2)
predict(usdhkd.m3, n.ahead=5, plot=TRUE, conf =.9)
## meanForecast meanError standardDeviation lowerInterval upperInterval
## 1 -0.0001657554 0.01384428 0.01384428 -0.02293757 0.02260606
## 2 -0.0001657554 0.01468953 0.01468953 -0.02432788 0.02399637
## 3 -0.0001657554 0.01552382 0.01552382 -0.02570016 0.02536865
## 4 -0.0001657554 0.01635032 0.01635032 -0.02705965 0.02672813
## 5 -0.0001657554 0.01717167 0.01717167 -0.02841064 0.02807913
Prediction of the next 5 volatility can be seen in the end of the graph and it seems to follow the latest values and is a flat line.