Load library “astsa” and set working directory

library(astsa)
library(fGarch)
## Loading required package: timeDate
## Loading required package: timeSeries
## Loading required package: fBasics
## 
## Attaching package: 'fBasics'
## The following object is masked from 'package:astsa':
## 
##     nyse
# set your working directory

Exercise 1: Seasonal ARIMA

(a) By visually checking, decide what SARIMA models seem appropriate, i.e., specify p, d, q and P,D,Q in SARIMA model, ARIMA(p, d, q) ∗ ARIMA(P,D, Q)𝑠. Choose the most appropriate two models and explain your answer.

load("Electricity.RData")
plot.ts(electricity)

log.electricity = log(electricity)
plot.ts(log.electricity)

diff.log.electricity = diff(log.electricity, lag = 1)
plot.ts(diff.log.electricity)

par(mfrow=c(1,2))
acf(diff.log.electricity, lag=60)
pacf(diff.log.electricity, lag=60)

sdiff.diff.log.electricity=diff(diff.log.electricity, lag=12)
par(mfrow=c(1,1))
plot.ts(sdiff.diff.log.electricity)

par(mfrow=c(1,2))
acf(sdiff.diff.log.electricity, lag=60)
pacf(sdiff.diff.log.electricity, lag=60)

There is a seasonal cutoff on the ACF at lag 1. Hence we get Q=1. As for non-seasonal cutoff it is seen at either lag 2 or 3 on the ACF so we will try both q=2 and q=3. Models to be tried are: ARIMA(0,1,2) * ARIMA(0,1,1) and ARIMA(0,1,3) * ARIMA(0,1,1)

(b) Fit the two SARIMA models which you chose in part (a) using sarima function in R and interpret the result.

electricity.sarima = sarima(log.electricity, p=0, d=1, q=2, P=0, D=1, Q=1, S=12)
## initial  value -3.259231 
## iter   2 value -3.522197
## iter   3 value -3.579146
## iter   4 value -3.599099
## iter   5 value -3.604570
## iter   6 value -3.606041
## iter   7 value -3.607197
## iter   8 value -3.607284
## iter   9 value -3.607286
## iter  10 value -3.607287
## iter  10 value -3.607287
## iter  10 value -3.607287
## final  value -3.607287 
## converged
## initial  value -3.622337 
## iter   2 value -3.625238
## iter   3 value -3.626447
## iter   4 value -3.626677
## iter   5 value -3.626715
## iter   6 value -3.626734
## iter   7 value -3.626735
## iter   7 value -3.626735
## iter   7 value -3.626735
## final  value -3.626735 
## converged

electricity.sarima2 = sarima(log.electricity, p=0, d=1, q=3, P=0, D=1, Q=1, S=12)
## initial  value -3.259231 
## iter   2 value -3.511239
## iter   3 value -3.578893
## iter   4 value -3.598227
## iter   5 value -3.603412
## iter   6 value -3.606701
## iter   7 value -3.608362
## iter   8 value -3.608443
## iter   9 value -3.608447
## iter  10 value -3.608450
## iter  10 value -3.608450
## final  value -3.608450 
## converged
## initial  value -3.623348 
## iter   2 value -3.626447
## iter   3 value -3.627437
## iter   4 value -3.627521
## iter   5 value -3.627759
## iter   6 value -3.627893
## iter   7 value -3.627899
## iter   7 value -3.627899
## iter   7 value -3.627899
## final  value -3.627899 
## converged

We observe that ACF of residuals are within the threshold for both the above models. Both the models look quite simlar. As per p values for Ljung-Box statistic they are low between 0.0-0.2,which may because of small data set.

(c) Check model diagnostics of two candidate models and choose the final one based on diagnostics plots and AIC/ BIC comparison

electricity.sarima$AIC
## [1] -4.27201
electricity.sarima$BIC
## [1] -4.231928
electricity.sarima2$AIC
## [1] -4.269197
electricity.sarima2$BIC
## [1] -4.219095

AIC and BIC values for model with q=2 is lower than that of model with q=2.Also we saw earlier that ACF of residuals plot was also good and it more simpler. Hence we select this model with q=2 as our best model selected for forecasting.

(d) Make a prediction for next 4 values based on your final model. Make sure that you predict the values in original scale, not transformed values.

pred.electricity = sarima.for(log.electricity, n.ahead=4, p=0, d=1, q=2, P=0, D=1, Q=1, S=12)

pred.electricity
## $pred
##           Jan      Feb      Mar      Apr
## 2006 12.79075 12.65921 12.69988 12.63031
## 
## $se
##             Jan        Feb        Mar        Apr
## 2006 0.02607121 0.03015368 0.03109173 0.03200230
exp(pred.electricity$pred)
##           Jan      Feb      Mar      Apr
## 2006 358882.1 314647.2 327707.6 305684.4

Taking exponential to remove the log transformation of the data done earlier and get prediction values.

Exercise 2

(a) By visually checking first 120 time series plot, decide which ESM model seems the most appropriate. For example, answer should be like “Additive error + no trend + additive seasonality”. Explain your choice.

data(gtemp)
train.gtemp = ts(gtemp[1:120]) # train set: for the model selection and fit
test.gtemp = ts(gtemp[121:130]) # test set: only for the evaluation (for calculating prediction error)

plot.ts(train.gtemp)

We can see that there is a trend and it is additive or A. There appears no seasonality so third parameter is N. For the error term we will try with both Additive and Multiplicative A and M.

(b) Fit the model for the first 120 data which you chose in part (a). Use ets function with the specification of model input with three letters. Report autopilot() and interpret how your chosen model decompose the data.

library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:astsa':
## 
##     gas
train.gtemp.ets = ets(train.gtemp, model="AAN")
train.gtemp.ets2 = ets(train.gtemp)
train.gtemp.ets$aic
## [1] 27.15076
train.gtemp.ets$bic
## [1] 41.08821
train.gtemp.ets2$aic
## [1] 25.59878
train.gtemp.ets2$bic
## [1] 33.96126
#train.gtemp.ets = ets(train.gtemp, model="MAN")

It seems that MAN model doesnt work, so we have run AAN and auto ETS. ETS auto function selected ANN as the appropriate model. So making a comparision of AAN and ANN models in terms of BIC and AIC, we observe that ANN model has lower AIC and BIC. Hence, our final model selected for forecasting is ANN.

pred.ets.gtemp = forecast(train.gtemp.ets, h=10)
plot(pred.ets.gtemp)

print(pred.ets.gtemp$mean)
## Time Series:
## Start = 121 
## End = 130 
## Frequency = 1 
##  [1] 0.3740666 0.3791551 0.3842436 0.3893321 0.3944206 0.3995092 0.4045977
##  [8] 0.4096862 0.4147747 0.4198632
print(mean((pred.ets.gtemp$mean - as.vector(test.gtemp))^2))
## [1] 0.01915633

(c) Make a prediction for next 10 values based on your final model. Calculate the prediction error using test set. As we know the true Y from the test set, it is the mean of squared error, i.e., average of (Yt − 𝑌̂ t)2, for t=121,…,130

pred.ets.gtemp = forecast(train.gtemp.ets2, h=10)
plot(pred.ets.gtemp)

print(pred.ets.gtemp$mean)
## Time Series:
## Start = 121 
## End = 130 
## Frequency = 1 
##  [1] 0.3762138 0.3762138 0.3762138 0.3762138 0.3762138 0.3762138 0.3762138
##  [8] 0.3762138 0.3762138 0.3762138
print(mean((pred.ets.gtemp$mean - as.vector(test.gtemp))^2))
## [1] 0.02519147

We observe that prediction MSE error is 0.02519

(d) Now, you fit ARIMA for the first 120 data using the chosen model in Homework1 Exercise 4. Predict the next 10 values based on this ARIMA model. Just borrow your answer from HW 1 (i.e., no need to explain your model selection process), fit the model, and calculate the prediction error.

pred.arima.gtemp = sarima.for(train.gtemp, 10, p=0, d=1, q=2)

pred.arima.gtemp$pred
## Time Series:
## Start = 121 
## End = 130 
## Frequency = 1 
##  [1] 0.3305345 0.3581846 0.3635754 0.3689661 0.3743569 0.3797477 0.3851385
##  [8] 0.3905293 0.3959201 0.4013109
print(mean((pred.arima.gtemp$pred - as.vector(test.gtemp))^2))
## [1] 0.02408861

We observe that prediction MSE error is 0.02408

(e) Compare prediction performance between ESM and ARIMA

The ARIMA model has lower MSE compared to the ETS model however the difference is very small. ### Exercise 3: Exponential Smoothing Model ### (a) By visually checking first 370 time series plot, decide which ESM model seems the most appropriate. Explain your choice.

train.electricity = ts(electricity[1:370], freq=12) # train set: for the model selection and fit
test.electricity = ts(electricity[371:396], freq=12) # test set: only for the evaluation (for calculating prediction error)
plot.ts(train.electricity)

From time series above we can see that there is a clear trend and it is additive or A. There appears to be a seasonality so third parameter is M. For the error term we will also choose as Multiplicative M. So our model will be MAM

(b) Fit the model for the first 370 data which you chose in part (a). Use ets function with the specification of model input with three letters. Report autoplot() and interpret how your chosen model decompose the data.

train.electricity.ets = ets(train.electricity, model="MAM")
train.electricity.ets2 = ets(train.electricity)
train.electricity.ets$aic
## [1] 8678.796
train.electricity.ets$bic
## [1] 8749.239
train.electricity.ets2$aic
## [1] 8678.796
train.electricity.ets2$bic
## [1] 8749.239

We observe that ETS auto function selected MAM model which is same as our manual selected mode. Hence, our final model selected for forecasting is MAM.

(c) Make a prediction for next 26 values based on your final model. Calculate the prediction error using validation set. As we know the true Y from the validation set, it is the mean of squared error, i.e., average of (Yt − 𝑌̂t)2, for t=371,…,396

pred.ets.electricity = forecast(train.electricity, h=26)
plot(pred.ets.electricity)

print(pred.ets.electricity$mean)
##         Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 31                                                                        
## 32 346744.6 306258.0 317535.3 295170.2 314895.6 340250.6 374844.9 372008.6
## 33 346929.7 306418.0 317697.6 295317.9 315049.8 340413.7 375020.7 372179.4
##         Sep      Oct      Nov      Dec
## 31                   303293.6 330488.9
## 32 323057.0 307804.4 303462.6 330669.2
## 33 323202.2 307939.7 303593.2 330808.5
print(mean((pred.ets.electricity$mean - as.vector(test.electricity))^2))
## [1] 142228849

We observe that the next 26 predicted values follow the trend and close to latest values. Also prediction MSE error is 142228849.

(f) Now, you fit SARIMA for the first 370 data using the chosen model in Exercise 1. Predict the next 26 values based on this SARIMA model. Just borrow your answer from Exercise 1, fit the model, and calculate the prediction error.

pred.electricity = sarima.for(log(train.electricity), n.ahead=26, p=0, d=1, q=2, P=0, D=1, Q=1, S=12)

pred.electricity
## $pred
##         Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 31                                                                        
## 32 12.73824 12.60966 12.65689 12.59127 12.66830 12.75219 12.85707 12.86036
## 33 12.75685 12.62827 12.67550 12.60988 12.68691 12.77080 12.87568 12.87898
##         Sep      Oct      Nov      Dec
## 31                   12.60011 12.68877
## 32 12.70895 12.64849 12.61628 12.70739
## 33 12.72756 12.66710 12.63489 12.72600
## 
## $se
##           Jan        Feb        Mar        Apr        May        Jun        Jul
## 31                                                                             
## 32 0.03133317 0.03233374 0.03330427 0.03424730 0.03516506 0.03605946 0.03693221
## 33 0.04364537 0.04462747 0.04558842 0.04652952 0.04745197 0.04835682 0.04924505
##           Aug        Sep        Oct        Nov        Dec
## 31                                  0.02613900 0.03029958
## 32 0.03778481 0.03861858 0.03943474 0.04131902 0.04264065
## 33 0.05011754 0.05097510 0.05181847 0.05358995 0.05488736
exp(pred.electricity$pred)
##         Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 31                                                                        
## 32 340522.9 299436.7 313918.1 293980.5 317521.8 345308.0 383490.0 384755.5
## 33 346920.2 305062.1 319815.6 299503.4 323486.9 351795.2 390694.5 391983.8
##         Sep      Oct      Nov      Dec
## 31                   296591.2 324089.3
## 32 330693.6 311292.9 301425.5 330177.8
## 33 336906.2 317141.0 307088.2 336380.8
print(mean((exp(pred.electricity$pred) - as.vector(test.electricity))^2))
## [1] 66245886

We observe from plot above that prediction MSE error is 66245886

(d) Compare prediction performance between ESM and ARIMA.

We observe that ESM model test MSE is much lower than the ARIMA test MSE. Hence we can conclude that ESM is a better model for selection compared to ARIMA in this case.

Exercise 4: ARCH and GARCH Model

(a) Generate the time series plot on exchange rate. Generate ACF and PACF from both raw data and squared data, respectively. Make comments based on your observation.

load("usdhkd.RData")
#rtn.usdhkd = diff(log(usdhkd))
#plot.ts(rtn.usdhkd)
plot.ts(usdhkd)

ACF on data and squared data

par(mfrow=c(1,2))
acf(usdhkd);pacf(usdhkd)

acf(usdhkd^2);pacf((usdhkd^2))

From plot above we observe a cutoff at lag 1 on the PACF. The ACF tails off to zero, at the same time there appears to be a cutoff at lag1 for ACF too.

(b) Fit total four models – ARCH(1), ARCH(2), GARCH(1,1) and GARCH(2,2). Then choose the best one based on diagnostics test on residuals and AIC criteria. There is no one correct model and choose the one that you think the optimal

  1. ARCH(1)
usdhkd.m1=garchFit(~garch(1,0), data=usdhkd, trace=F)
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.
summary(usdhkd.m1)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(1, 0), data = usdhkd, trace = F) 
## 
## Mean and Variance Equation:
##  data ~ garch(1, 0)
## <environment: 0x7fc7bcbd9e00>
##  [data = usdhkd]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##          mu        omega       alpha1  
## -0.00117994   0.00045482   0.49888267  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##          Estimate  Std. Error  t value Pr(>|t|)    
## mu     -1.180e-03   1.053e-03   -1.120 0.262606    
## omega   4.548e-04   4.296e-05   10.588  < 2e-16 ***
## alpha1  4.989e-01   1.336e-01    3.733 0.000189 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  977.8896    normalized:  2.268885 
## 
## Description:
##  Tue May  4 16:04:01 2021 by user:  
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value   
##  Jarque-Bera Test   R    Chi^2  4337.001  0         
##  Shapiro-Wilk Test  R    W      0.8287354 0         
##  Ljung-Box Test     R    Q(10)  20.34018  0.02619446
##  Ljung-Box Test     R    Q(15)  26.32464  0.03474572
##  Ljung-Box Test     R    Q(20)  30.61091  0.06054118
##  Ljung-Box Test     R^2  Q(10)  9.38141   0.4963288 
##  Ljung-Box Test     R^2  Q(15)  10.35731  0.7966824 
##  Ljung-Box Test     R^2  Q(20)  11.16525  0.9418239 
##  LM Arch Test       R    TR^2   9.46025   0.663197  
## 
## Information Criterion Statistics:
##       AIC       BIC       SIC      HQIC 
## -4.523850 -4.495547 -4.523946 -4.512675
  1. ARCH(2)
usdhkd.m2=garchFit(~garch(2,0), data=usdhkd, trace=F)
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.
summary(usdhkd.m2)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(2, 0), data = usdhkd, trace = F) 
## 
## Mean and Variance Equation:
##  data ~ garch(2, 0)
## <environment: 0x7fc7c0303138>
##  [data = usdhkd]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##          mu        omega       alpha1       alpha2  
## -0.00046702   0.00026598   0.54327969   0.48701087  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##          Estimate  Std. Error  t value Pr(>|t|)    
## mu     -4.670e-04   8.699e-04   -0.537    0.591    
## omega   2.660e-04   3.473e-05    7.658 1.89e-14 ***
## alpha1  5.433e-01   1.323e-01    4.108 3.99e-05 ***
## alpha2  4.870e-01   1.108e-01    4.395 1.11e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  997.0851    normalized:  2.313422 
## 
## Description:
##  Tue May  4 16:04:01 2021 by user:  
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value   
##  Jarque-Bera Test   R    Chi^2  1450.006  0         
##  Shapiro-Wilk Test  R    W      0.8744017 0         
##  Ljung-Box Test     R    Q(10)  18.6254   0.04528692
##  Ljung-Box Test     R    Q(15)  26.25545  0.03542182
##  Ljung-Box Test     R    Q(20)  28.93381  0.08907092
##  Ljung-Box Test     R^2  Q(10)  8.642008  0.5663814 
##  Ljung-Box Test     R^2  Q(15)  11.90956  0.6858611 
##  Ljung-Box Test     R^2  Q(20)  16.36083  0.6940018 
##  LM Arch Test       R    TR^2   9.24412   0.68195   
## 
## Information Criterion Statistics:
##       AIC       BIC       SIC      HQIC 
## -4.608283 -4.570547 -4.608454 -4.593384
  1. GARCH(1,1)
usdhkd.m3=garchFit(~garch(1,1), data=usdhkd, trace=F)
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.
summary(usdhkd.m3)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(1, 1), data = usdhkd, trace = F) 
## 
## Mean and Variance Equation:
##  data ~ garch(1, 1)
## <environment: 0x7fc7c3ca2710>
##  [data = usdhkd]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##          mu        omega       alpha1        beta1  
## -1.6576e-04   1.5468e-05   2.4065e-01   8.0449e-01  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##          Estimate  Std. Error  t value Pr(>|t|)    
## mu     -1.658e-04   8.902e-04   -0.186   0.8523    
## omega   1.547e-05   1.904e-05    0.812   0.4167    
## alpha1  2.406e-01   1.362e-01    1.767   0.0772 .  
## beta1   8.045e-01   1.052e-01    7.648 2.04e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  999.3168    normalized:  2.3186 
## 
## Description:
##  Tue May  4 16:04:01 2021 by user:  
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value   
##  Jarque-Bera Test   R    Chi^2  2690.144  0         
##  Shapiro-Wilk Test  R    W      0.858535  0         
##  Ljung-Box Test     R    Q(10)  20.78275  0.02266047
##  Ljung-Box Test     R    Q(15)  24.94071  0.05074532
##  Ljung-Box Test     R    Q(20)  26.92712  0.1373269 
##  Ljung-Box Test     R^2  Q(10)  3.768855  0.9571724 
##  Ljung-Box Test     R^2  Q(15)  6.011632  0.9795517 
##  Ljung-Box Test     R^2  Q(20)  8.392027  0.9889398 
##  LM Arch Test       R    TR^2   4.156909  0.9804356 
## 
## Information Criterion Statistics:
##       AIC       BIC       SIC      HQIC 
## -4.618639 -4.580903 -4.618810 -4.603740
  1. GARCH(2,2)
usdhkd.m4=garchFit(~garch(2,2), data=usdhkd, trace=F)
## Warning in sqrt(diag(fit$cvar)): NaNs produced
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.
summary(usdhkd.m4)
## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(formula = ~garch(2, 2), data = usdhkd, trace = F) 
## 
## Mean and Variance Equation:
##  data ~ garch(2, 2)
## <environment: 0x7fc7a5bb3248>
##  [data = usdhkd]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##          mu        omega       alpha1       alpha2        beta1        beta2  
## -1.0068e-04   1.3247e-05   2.4447e-01   1.0000e-08   5.5623e-01   2.4114e-01  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##          Estimate  Std. Error  t value Pr(>|t|)   
## mu     -1.007e-04   8.768e-04   -0.115  0.90858   
## omega   1.325e-05   9.320e-06    1.421  0.15519   
## alpha1  2.445e-01   8.728e-02    2.801  0.00509 **
## alpha2  1.000e-08          NA       NA       NA   
## beta1   5.562e-01          NA       NA       NA   
## beta2   2.411e-01          NA       NA       NA   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  1000.581    normalized:  2.321534 
## 
## Description:
##  Tue May  4 16:04:02 2021 by user:  
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value   
##  Jarque-Bera Test   R    Chi^2  2936.158  0         
##  Shapiro-Wilk Test  R    W      0.859941  0         
##  Ljung-Box Test     R    Q(10)  21.39029  0.01853056
##  Ljung-Box Test     R    Q(15)  25.39566  0.04487562
##  Ljung-Box Test     R    Q(20)  27.34351  0.1258733 
##  Ljung-Box Test     R^2  Q(10)  3.013569  0.9811029 
##  Ljung-Box Test     R^2  Q(15)  5.023137  0.9919287 
##  Ljung-Box Test     R^2  Q(20)  7.216473  0.9959124 
##  LM Arch Test       R    TR^2   3.459938  0.9913337 
## 
## Information Criterion Statistics:
##       AIC       BIC       SIC      HQIC 
## -4.615226 -4.558622 -4.615607 -4.592877

We select model as GARCH(1,1) because Squared residual terms for R squared are uncorrelated and AIC/BIC is least for them among all the 4 models and also it has less complex compared to GARCH(2,2)

(c) Make a prediction for next 5 volatility based on your final model

predict(usdhkd.m3, n.ahead=5, plot=TRUE, conf =.9)

##    meanForecast  meanError standardDeviation lowerInterval upperInterval
## 1 -0.0001657554 0.01384428        0.01384428   -0.02293757    0.02260606
## 2 -0.0001657554 0.01468953        0.01468953   -0.02432788    0.02399637
## 3 -0.0001657554 0.01552382        0.01552382   -0.02570016    0.02536865
## 4 -0.0001657554 0.01635032        0.01635032   -0.02705965    0.02672813
## 5 -0.0001657554 0.01717167        0.01717167   -0.02841064    0.02807913

Prediction of the next 5 volatility can be seen in the end of the graph and it seems to follow the latest values and is a flat line.