library(tseries)
library(tidyverse)
library(magrittr)
library(plotly)
library(timetk)
library(lubridate)
library(pracma)
library(fma)
library(forecast)



Series 3


Exercise 3.1 - simulations are key

  1. AR(2): PACF should show a cut-off at lag p = 2
    MA(3): ACF should show a cut-off at lag q = 3

AR(2) model

# AR2 model - theoretical autocorrelations
plot(0:30, ARMAacf(ar = c(0.9, -0.5), lag.max = 30), type = "h",
ylab = "ACF")

## Theoretical partial autocorrelations
plot(1:30, ARMAacf(ar = c(0.9, -0.5), lag.max = 30, pacf = TRUE), type = "h", ylab = "PACF")

MA(3) model

# MA3 model - theoretical autocorrelations
plot(0:30, ARMAacf(ma = c(0.9, -0.5, -0.4), lag.max = 30), type = "h",
ylab = "ACF")

## Theoretical partial autocorrelations
plot(1:30, ARMAacf(ma = c(0.9, -0.5, -0.4), lag.max = 30, pacf = TRUE), type = "h", ylab = "PACF")

AR(2) process

# define 3 data sets
set.seed(22)
ar2_1_sim <- arima.sim(n = 200, model = list(ar = c(0.9, -0.5)))
ar2_2_sim <- arima.sim(n = 200, model = list(ar = c(0.9, -0.5)))
ar2_3_sim <- arima.sim(n = 200, model = list(ar = c(0.9, -0.5)))

xdata <- 1:200

plot(xdata, ar2_1_sim, type="l", col="blue", lty=1, ylim=c(-5,5), ylab="values" )
lines(xdata, ar2_2_sim, col="red",lty=2)
lines(xdata, ar2_3_sim, col="green",lty=5)

Observations: one sees clear recurring lows and peaks in all 3 realisations. The values are between -5 and 5.

MA(3) process

Just used a length of 52 for better pattern examination.

# define 3 data sets
set.seed(22)
ma3_1_sim <- arima.sim(n = 52, model = list(ma = c(0.9, -0.5, -0.4)))
ma3_2_sim <- arima.sim(n = 52, model = list(ma = c(0.9, -0.5, -0.4)))
ma3_3_sim <- arima.sim(n = 52, model = list(ma = c(0.9, -0.5, -0.4)))

xdata <- 1:52

plot(xdata, ma3_1_sim, type="l", col="blue", lty=1, ylim=c(-5,5), ylab="values" )
lines(xdata, ma3_2_sim, col="red",lty=2)
lines(xdata, ma3_3_sim, col="green",lty=5)

Observations: one sees clear recurring lows and peaks in all 3 realisations which are also close to each other over all 3 realisations. The values are between -4 and 4.

AR(2) process

set.seed(22)
ar2_1_sim <- arima.sim(n = 200, model = list(ar = c(0.9, -0.5)))
time = 1:200
plot(time, ar2_1_sim, type="l", col="blue", lty=1, ylim=c(-5,5), ylab="values" )

acf(ma3_1_sim)

pacf(ma3_1_sim)

Obervations: ACF shows clear cut off after lag 2. PACF shows clear cut off after lag 2. This is a similar result as seen in the simulated data above.


MA(3) process

Just used a length of 52 for better pattern examination.

set.seed(22)
ma3_1_sim <- arima.sim(n = 52, model = list(ma = c(0.9, -0.5, -0.4)))
time = 1:52
plot(time, ma3_1_sim, type="l", col="blue", lty=1, ylim=c(-4,4), ylab="values" )

acf(ma3_1_sim)

pacf(ma3_1_sim)

Obervations: ACF shows clear cut off after lag 2. PACF shows clear cut off after lag 1. ACF shows a similar result as seen in the simulated data above. PACF shows quite a different result.


Exercise 3.2 - Stationarity of AR(p) models

polyroot(c(1, -0.5, -2))
## [1]  0.5930703-0i -0.8430703+0i

As the first value has an absolute value below 1, the time series is not stationary.

polyroot(c(1, -1))
## [1] 1+0i

As the value has an absolute value of 1, the time series could be stationary.

values <- seq(-1000, 1000, 100)
for (i in values) {
  print(paste(i, polyroot(c(1, -0.5, i)))
  )
}
## [1] "-1000 0.0313737647980123+0i"  "-1000 -0.0318737647980123+0i"
## [1] "-900 0.0330567129428698-0i"  "-900 -0.0336122684984253+0i"
## [1] "-800 0.0350442201002865+0i"  "-800 -0.0356692201002865-0i"
## [1] "-700 0.037440991747515-0i"   "-700 -0.0381552774618008+0i"
## [1] "-600 0.0404102886175298+0i"  "-600 -0.0412436219508631+0i"
## [1] "-500 0.0442241545476267+0i"  "-500 -0.0452241545476267+0i"
## [1] "-400 0.049378906097424-0i"  "-400 -0.050628906097424+0i"
## [1] "-300 0.0569077073377334-0i" "-300 -0.0585743740044+0i"  
## [1] "-200 0.0694717257990782+0i"  "-200 -0.0719717257990782+0i"
## [1] "-100 0.0975312451187128-0i" "-100 -0.102531245118713+0i"
## [1] "0 2+0i"
## [1] "100 0.0025+0.099968745115661i" "100 0.0025-0.099968745115661i"
## [1] "200 0.00125+0.0706996287118964i" "200 0.00125-0.0706996287118964i"
## [1] "300 0.0008333333333333+0.0577290125403933i"
## [2] "300 0.0008333333333333-0.0577290125403933i"
## [1] "400 0.000625+0.0499960935974002i" "400 0.000625-0.0499960935974002i"
## [1] "500 0.0005+0.0447185643776721i" "500 0.0005-0.0447185643776721i"
## [1] "600 0.0004166666666667+0.0408227026978317i"
## [2] "600 0.0004166666666667-0.0408227026978317i"
## [1] "700 0.0003571428571429+0.0377947599218598i"
## [2] "700 0.0003571428571429-0.0377947599218598i"
## [1] "800 0.0003125+0.0353539579644203i" "800 0.0003125-0.0353539579644203i"
## [1] "900 0.0002777777777778+0.0333321759058314i"
## [2] "900 0.0002777777777778-0.0333321759058314i"
## [1] "1000 0.00025+0.0316217883744737i" "1000 0.00025-0.0316217883744737i"

As the first value has an absolute value below 1 for alpha2 from -1000 to 1000 in intervalls of 100, the time series is could never be stationary with alpha1 of -0.5.

# ACF
# ta<-ARMAacf(ar=(1),lag.max=22)
# plot(ta, type="h")
# 
# # PACF
# tp<-ARMAacf(ar=(1),pacf=TRUE)
# plot(tp, type="h")

values <- seq(0, 2, 0.5)
for (i in values) {
  ta<-ARMAacf(ar=(i),lag.max=22)
  plot(ta, type="h")
  tp<-ARMAacf(ar=(i),pacf=TRUE)
  plot(tp, type="h")
  print(paste(i, polyroot(c(1, -1, i)))
  )
}

## [1] "0 1+0i"

## [1] "0.5 1+1i" "0.5 1-1i"

## [1] "1 0.499999999999999+0.866025403784436i"
## [2] "1 0.500000000000001-0.866025403784436i"

## [1] "1.5 0.333333333333333+0.74535599249993i"
## [2] "1.5 0.333333333333333-0.74535599249993i"

## [1] "2 0.25+0.661437827766147i" "2 0.25-0.661437827766147i"

Because Yt - 1 * alpha (>= 1) is equal or bigger than Yt. The characteristic polynomial is therefore below 1 and not stationary! The plots above as well as the polyroots confirm this statement.


Exercise 3.3 - AR(p) models with yield data

# load data
yields <- read.table("http://stat.ethz.ch/Teaching/Datasets/WBL/yields.dat",
header = FALSE)
t_yields <- ts(yields[, 1])
hist(t_yields)

tsdisplay(t_yields)

Yes, the data could be generated by an AR-process of order p = 1. One can observe only a autocorrelation of lag 1 within the PACF plot.

# unable to compute mean and innovation variance by hand. How is the suggested approach?
r_yw <- ar(t_yields, method = "yw", order.max = 1)
r_yw$resid
## Time Series:
## Start = 1 
## End = 70 
## Frequency = 1 
##  [1]           NA  11.26178808 -23.11028050   8.90470844  -5.38113227
##  [6]   7.75288322   8.88971950  -8.61918537   3.92251817  -0.05967209
## [11]  18.65166640  -8.38113227  -0.41675174  -8.83942873   2.53263985
## [16]  -4.44955041  26.09215313  15.12777260 -12.61918537  17.36300490
## [21]   8.78850269   5.82130136   1.16057127   8.43142304  -2.66979377
## [26]   3.48203145   1.16057127  -6.56857696 -28.51796855  -2.31553492
## [31]   1.94032791  19.43142304  12.61886773  24.77069295   7.78850269
## [36]   6.43142304  -3.44955041   0.48203145 -14.00906369  -3.02687342
## [41]  -2.00906369   2.65166640  -4.61918537   3.48203145   1.16057127
## [46]  10.43142304  -2.89003714  10.09215313  -3.11028050  -2.29772519
## [51] -12.78882032   2.75288322   6.94032791  -8.61918537  -2.07748183
## [56]  -1.39894200 -17.95845528 -22.80663006  -3.41675174  -5.00906369
## [61]  14.48203145  -6.55076722  -6.24711678   8.43142304  -8.66979377
## [66]   3.14276154  -8.05967209   1.53263985   5.16057127 -27.00906369
str(r_yw)
## List of 15
##  $ order       : int 1
##  $ ar          : num -0.39
##  $ var.pred    : num 122
##  $ x.mean      : num 51.1
##  $ aic         : Named num [1:2] 9.54 0
##   ..- attr(*, "names")= chr [1:2] "0" "1"
##  $ n.used      : int 70
##  $ n.obs       : int 70
##  $ order.max   : num 1
##  $ partialacf  : num [1, 1, 1] -0.39
##  $ resid       : Time-Series [1:70] from 1 to 70: NA 11.26 -23.11 8.9 -5.38 ...
##  $ method      : chr "Yule-Walker"
##  $ series      : chr "t_yields"
##  $ frequency   : num 1
##  $ call        : language ar(x = t_yields, order.max = 1, method = "yw")
##  $ asy.var.coef: num [1, 1] 0.0125
##  - attr(*, "class")= chr "ar"
r_burg <- ar(t_yields, method = "burg", order.max = 1)
r_burg$resid
## Time Series:
## Start = 1 
## End = 70 
## Frequency = 1 
##  [1]           NA  11.19245049 -22.89411036   8.43230111  -5.04740012
##  [6]   7.53239447   9.10588964  -8.55416638   3.75241315   0.07252519
## [11]  18.59912338  -8.04740012  -0.68762420  -8.74082060   2.34574025
## [16]  -4.33414771  25.97243182  15.61265590 -12.55416638  17.12572158
## [21]   9.17261855   5.81914206   1.25917940   8.41246917  -2.52080192
## [26]   3.37910471   1.25917940  -6.58753083 -28.62089529  -2.75435311
## [31]   2.07252519  19.41246917  12.95259988  24.85250651   8.17261855
## [36]   6.41246917  -3.33414771   0.37910471 -13.96083927  -3.28095131
## [41]  -1.96083927   2.59912338  -4.55416638   3.37910471   1.25917940
## [46]  10.41246917  -2.70745614   9.97243182  -2.89411036  -2.43424107
## [51] -12.77418505   2.53239447   7.07252519  -8.55416638  -2.24758685
## [56]  -1.36751216 -17.99420373 -23.09429709  -3.68762420  -4.96083927
## [61]  14.37910471  -6.26741879  -6.46760553   8.41246917  -8.52080192
## [66]   2.93906736  -7.92747481   1.34574025   5.25917940 -26.96083927
str(r_burg)
## List of 15
##  $ order       : int 1
##  $ ar          : num -0.407
##  $ var.pred    : num 117
##  $ x.mean      : num 51.1
##  $ aic         : Named num [1:2] 10.7 0
##   ..- attr(*, "names")= chr [1:2] "0" "1"
##  $ n.used      : int 70
##  $ n.obs       : int 70
##  $ order.max   : num 1
##  $ partialacf  : num [1, 1, 1] -0.407
##  $ resid       : Time-Series [1:70] from 1 to 70: NA 11.19 -22.89 8.43 -5.05 ...
##  $ method      : chr "Burg"
##  $ series      : chr "t_yields"
##  $ frequency   : num 1
##  $ call        : language ar(x = t_yields, order.max = 1, method = "burg")
##  $ asy.var.coef: num [1, 1] 0.0119
##  - attr(*, "class")= chr "ar"
checkresiduals(r_burg$resid)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

Residuals seem to be allright. No autocorrelation is detected and the residuals look normally distributed.


r_mle <- arima(t_yields, order = c(1, 0, 0), include.mean = TRUE)
str(r_mle)
## List of 14
##  $ coef     : Named num [1:2] -0.419 51.266
##   ..- attr(*, "names")= chr [1:2] "ar1" "intercept"
##  $ sigma2   : num 117
##  $ var.coef : num [1:2, 1:2] 0.01274 -0.00296 -0.00296 0.8349
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "ar1" "intercept"
##   .. ..$ : chr [1:2] "ar1" "intercept"
##  $ mask     : logi [1:2] TRUE TRUE
##  $ loglik   : num -266
##  $ aic      : num 538
##  $ arma     : int [1:7] 1 0 0 0 1 0 0
##  $ residuals: Time-Series [1:70] from 1 to 70: -3.87 10.95 -22.93 7.89 -5 ...
##  $ call     : language arima(x = t_yields, order = c(1, 0, 0), include.mean = TRUE)
##  $ series   : chr "t_yields"
##  $ code     : int 0
##  $ n.cond   : int 0
##  $ nobs     : int 70
##  $ model    :List of 10
##   ..$ phi  : num -0.419
##   ..$ theta: num(0) 
##   ..$ Delta: num(0) 
##   ..$ Z    : num 1
##   ..$ a    : num -28.3
##   ..$ P    : num [1, 1] 0
##   ..$ T    : num [1, 1] -0.419
##   ..$ V    : num [1, 1] 1
##   ..$ h    : num 0
##   ..$ Pn   : num [1, 1] 1
##  - attr(*, "class")= chr "Arima"
r_mle
## 
## Call:
## arima(x = t_yields, order = c(1, 0, 0), include.mean = TRUE)
## 
## Coefficients:
##           ar1  intercept
##       -0.4191    51.2658
## s.e.   0.1129     0.9137
## 
## sigma^2 estimated as 116.6:  log likelihood = -265.98,  aic = 537.96
# calculate confidence intervalls
print("confidence intervals: ")
## [1] "confidence intervals: "
(ci_ar1_lower_band <- -0.4191-0.1129)
## [1] -0.532
(ci_ar1_upper_band <- -0.4191+0.1129)
## [1] -0.3062
(ci_intercept_lower_band <- 51.2658-0.9137)
## [1] 50.3521
(ci_intercept_upper_band <- 51.2658+0.9137)
## [1] 52.1795

Confidence intervall for coefficient ar1 with mean -0.4191 is from -0.532 to -0.3062. Confidence intervall for coefficient intercept with mean 51.2658 is from 50.3521 to 52.1795.


Exercise 3.4 - ARIMA

Load data

d_force <- read.table("http://stat.ethz.ch/Teaching/Datasets/WBL/kraft.dat",
header = FALSE)
ts_force <- ts(d_force[, 1])
plot(ts_force)

ts_forceA <- window(ts_force, end = 280)
tsdisplay(ts_forceA)

acf(ts_forceA)

Yes. The expected behaviour is a lag at every multiple of about 2 seconds multiplied by 1.5 seconds of measurements. E. g. at lag 4 (6 seconds) or lag 8 (12 seconds).

The ACF plots confirms the expected behaviour.


# PACF
pacf(ts_forceA) # cut-off at p = 6

# ar(6) model
ar_force <- ar(ts_forceA, method = "mle")
print("mle-method:")
## [1] "mle-method:"
ar_force$aic
##          0          1          2          3          4          5 
## 675.384208 297.907306 197.602018 153.028418  97.026089  74.755848 
##          6          7          8          9         10         11 
##  17.664186  19.434497  21.358786   5.854905   0.000000   1.445990 
##         12 
##   1.231237
which.min(ar_force$aic)
## 10 
## 11
ar_force2 <- ar(ts_forceA, method = "burg")
print("burg-method:")
## [1] "burg-method:"
ar_force2$aic
##           0           1           2           3           4           5 
## 689.1010110 310.2653197 209.2374481 164.1764133 107.3902617  84.6220590 
##           6           7           8           9          10          11 
##  26.4245736  28.2614837  30.2416767  14.1104211   7.9229345   9.4621702 
##          12          13          14          15          16          17 
##   8.8048523   6.7157869   4.2085974   6.1855432   8.0032864   0.0000000 
##          18          19          20          21          22          23 
##   0.6570324   2.4690675   4.4660453   2.8930394   4.7344284   6.5250774 
##          24 
##   5.4429421
which.min(ar_force2$aic)
## 17 
## 18
ar_force3 <- ar(ts_forceA, method = "yw")
print("yw-method:")
## [1] "yw-method:"
ar_force3$aic
##          0          1          2          3          4          5 
## 649.787273 274.680759 179.549362 139.329613  86.846990  63.674193 
##          6          7          8          9         10         11 
##  10.474487  12.453192  14.291716   4.194248   1.363401   1.894851 
##         12         13         14         15         16         17 
##   2.882748   1.736910   0.126790   1.920976   3.794555   0.000000 
##         18         19         20         21         22         23 
##   1.031020   2.743056   4.499686   5.302003   6.946098   8.876893 
##         24 
##   9.143878
which.min(ar_force2$aic)
## 17 
## 18
# tried CSS-ML as well
# lags <- seq(1,16)
# for (i in lags) {
#   fit_MLE <- arima(ts_forceA, order = c(i, 0, 0), method = "css-mle")
#   cat("AR(", i, "):", sep = "")
#   print(fit_MLE$aic)
# }

Maximum likelihood estimator: best lag = 10 Burg’s algorithm & Yule-Walker equations: best lag = 17


ar_lme <- arima(ts_forceA, order = c(10, 0, 0), method = "ML")
summary(ar_lme)
## 
## Call:
## arima(x = ts_forceA, order = c(10, 0, 0), method = "ML")
## 
## Coefficients:
##          ar1      ar2     ar3      ar4     ar5      ar6      ar7     ar8
##       1.2588  -0.4467  0.0751  -0.4799  0.4873  -0.3358  -0.0829  0.3780
## s.e.  0.0588   0.0917  0.0929   0.0937  0.0961   0.0968   0.0943  0.0951
##           ar9    ar10  intercept
##       -0.4676  0.1744    -0.0079
## s.e.   0.0948  0.0617     0.0125
## 
## sigma^2 estimated as 0.008276:  log likelihood = 270.84,  aic = -517.68
## 
## Training set error measures:
##                        ME       RMSE        MAE       MPE     MAPE
## Training set 0.0002465765 0.09097423 0.07182276 -18.79978 153.9653
##                   MASE        ACF1
## Training set 0.5261257 0.006710714
# select residuals and display them
residuals <- ar_lme$residuals
tsdisplay(residuals)

qqnorm(ar_lme$residuals,main="Normal Q-Q Plot: AR(10)")
qqline(ar_lme$residuals)

Residuals looking quite nice. However there seems to be a autocorrelation at lag 8 which must be further analysed. Q-Q plot shows a approximation to normal distribution. The model is in general appropriate for this time series. One can observe from the results in b) that a Yule Walker equation model with lag 17 could even show better results than this model.


force_pred <- predict(ar_force, n.ahead = 40)
plot(ts_force, lty = 3)
lines(ts_forceA, lwd = 2)
lines(force_pred$pred, lwd = 2, col = "orange")
lines(force_pred$pred + force_pred$se * 1.96, col = "red")
lines(force_pred$pred - force_pred$se * 1.96, col = "red")

The model predicts the values quite well, because the original data is between the confidence interval values.