library(tseries)
library(tidyverse)
library(magrittr)
library(plotly)
library(timetk)
library(lubridate)
library(pracma)
library(fma)
library(forecast)
AR(2): PACF should show a cut-off at lag p = 2
MA(3): ACF should show a cut-off at lag q = 3
AR(2) model
# AR2 model - theoretical autocorrelations
plot(0:30, ARMAacf(ar = c(0.9, -0.5), lag.max = 30), type = "h",
ylab = "ACF")
## Theoretical partial autocorrelations
plot(1:30, ARMAacf(ar = c(0.9, -0.5), lag.max = 30, pacf = TRUE), type = "h", ylab = "PACF")
MA(3) model
# MA3 model - theoretical autocorrelations
plot(0:30, ARMAacf(ma = c(0.9, -0.5, -0.4), lag.max = 30), type = "h",
ylab = "ACF")
## Theoretical partial autocorrelations
plot(1:30, ARMAacf(ma = c(0.9, -0.5, -0.4), lag.max = 30, pacf = TRUE), type = "h", ylab = "PACF")
AR(2) process
# define 3 data sets
set.seed(22)
ar2_1_sim <- arima.sim(n = 200, model = list(ar = c(0.9, -0.5)))
ar2_2_sim <- arima.sim(n = 200, model = list(ar = c(0.9, -0.5)))
ar2_3_sim <- arima.sim(n = 200, model = list(ar = c(0.9, -0.5)))
xdata <- 1:200
plot(xdata, ar2_1_sim, type="l", col="blue", lty=1, ylim=c(-5,5), ylab="values" )
lines(xdata, ar2_2_sim, col="red",lty=2)
lines(xdata, ar2_3_sim, col="green",lty=5)
Observations: one sees clear recurring lows and peaks in all 3 realisations. The values are between -5 and 5.
MA(3) process
Just used a length of 52 for better pattern examination.
# define 3 data sets
set.seed(22)
ma3_1_sim <- arima.sim(n = 52, model = list(ma = c(0.9, -0.5, -0.4)))
ma3_2_sim <- arima.sim(n = 52, model = list(ma = c(0.9, -0.5, -0.4)))
ma3_3_sim <- arima.sim(n = 52, model = list(ma = c(0.9, -0.5, -0.4)))
xdata <- 1:52
plot(xdata, ma3_1_sim, type="l", col="blue", lty=1, ylim=c(-5,5), ylab="values" )
lines(xdata, ma3_2_sim, col="red",lty=2)
lines(xdata, ma3_3_sim, col="green",lty=5)
Observations: one sees clear recurring lows and peaks in all 3 realisations which are also close to each other over all 3 realisations. The values are between -4 and 4.
AR(2) process
set.seed(22)
ar2_1_sim <- arima.sim(n = 200, model = list(ar = c(0.9, -0.5)))
time = 1:200
plot(time, ar2_1_sim, type="l", col="blue", lty=1, ylim=c(-5,5), ylab="values" )
acf(ma3_1_sim)
pacf(ma3_1_sim)
Obervations: ACF shows clear cut off after lag 2. PACF shows clear cut off after lag 2. This is a similar result as seen in the simulated data above.
MA(3) process
Just used a length of 52 for better pattern examination.
set.seed(22)
ma3_1_sim <- arima.sim(n = 52, model = list(ma = c(0.9, -0.5, -0.4)))
time = 1:52
plot(time, ma3_1_sim, type="l", col="blue", lty=1, ylim=c(-4,4), ylab="values" )
acf(ma3_1_sim)
pacf(ma3_1_sim)
Obervations: ACF shows clear cut off after lag 2. PACF shows clear cut off after lag 1. ACF shows a similar result as seen in the simulated data above. PACF shows quite a different result.
polyroot(c(1, -0.5, -2))
## [1] 0.5930703-0i -0.8430703+0i
As the first value has an absolute value below 1, the time series is not stationary.
polyroot(c(1, -1))
## [1] 1+0i
As the value has an absolute value of 1, the time series could be stationary.
values <- seq(-1000, 1000, 100)
for (i in values) {
print(paste(i, polyroot(c(1, -0.5, i)))
)
}
## [1] "-1000 0.0313737647980123+0i" "-1000 -0.0318737647980123+0i"
## [1] "-900 0.0330567129428698-0i" "-900 -0.0336122684984253+0i"
## [1] "-800 0.0350442201002865+0i" "-800 -0.0356692201002865-0i"
## [1] "-700 0.037440991747515-0i" "-700 -0.0381552774618008+0i"
## [1] "-600 0.0404102886175298+0i" "-600 -0.0412436219508631+0i"
## [1] "-500 0.0442241545476267+0i" "-500 -0.0452241545476267+0i"
## [1] "-400 0.049378906097424-0i" "-400 -0.050628906097424+0i"
## [1] "-300 0.0569077073377334-0i" "-300 -0.0585743740044+0i"
## [1] "-200 0.0694717257990782+0i" "-200 -0.0719717257990782+0i"
## [1] "-100 0.0975312451187128-0i" "-100 -0.102531245118713+0i"
## [1] "0 2+0i"
## [1] "100 0.0025+0.099968745115661i" "100 0.0025-0.099968745115661i"
## [1] "200 0.00125+0.0706996287118964i" "200 0.00125-0.0706996287118964i"
## [1] "300 0.0008333333333333+0.0577290125403933i"
## [2] "300 0.0008333333333333-0.0577290125403933i"
## [1] "400 0.000625+0.0499960935974002i" "400 0.000625-0.0499960935974002i"
## [1] "500 0.0005+0.0447185643776721i" "500 0.0005-0.0447185643776721i"
## [1] "600 0.0004166666666667+0.0408227026978317i"
## [2] "600 0.0004166666666667-0.0408227026978317i"
## [1] "700 0.0003571428571429+0.0377947599218598i"
## [2] "700 0.0003571428571429-0.0377947599218598i"
## [1] "800 0.0003125+0.0353539579644203i" "800 0.0003125-0.0353539579644203i"
## [1] "900 0.0002777777777778+0.0333321759058314i"
## [2] "900 0.0002777777777778-0.0333321759058314i"
## [1] "1000 0.00025+0.0316217883744737i" "1000 0.00025-0.0316217883744737i"
As the first value has an absolute value below 1 for alpha2 from -1000 to 1000 in intervalls of 100, the time series is could never be stationary with alpha1 of -0.5.
# ACF
# ta<-ARMAacf(ar=(1),lag.max=22)
# plot(ta, type="h")
#
# # PACF
# tp<-ARMAacf(ar=(1),pacf=TRUE)
# plot(tp, type="h")
values <- seq(0, 2, 0.5)
for (i in values) {
ta<-ARMAacf(ar=(i),lag.max=22)
plot(ta, type="h")
tp<-ARMAacf(ar=(i),pacf=TRUE)
plot(tp, type="h")
print(paste(i, polyroot(c(1, -1, i)))
)
}
## [1] "0 1+0i"
## [1] "0.5 1+1i" "0.5 1-1i"
## [1] "1 0.499999999999999+0.866025403784436i"
## [2] "1 0.500000000000001-0.866025403784436i"
## [1] "1.5 0.333333333333333+0.74535599249993i"
## [2] "1.5 0.333333333333333-0.74535599249993i"
## [1] "2 0.25+0.661437827766147i" "2 0.25-0.661437827766147i"
Because Yt - 1 * alpha (>= 1) is equal or bigger than Yt. The characteristic polynomial is therefore below 1 and not stationary! The plots above as well as the polyroots confirm this statement.
# load data
yields <- read.table("http://stat.ethz.ch/Teaching/Datasets/WBL/yields.dat",
header = FALSE)
t_yields <- ts(yields[, 1])
hist(t_yields)
tsdisplay(t_yields)
Yes, the data could be generated by an AR-process of order p = 1. One can observe only a autocorrelation of lag 1 within the PACF plot.
# unable to compute mean and innovation variance by hand. How is the suggested approach?
r_yw <- ar(t_yields, method = "yw", order.max = 1)
r_yw$resid
## Time Series:
## Start = 1
## End = 70
## Frequency = 1
## [1] NA 11.26178808 -23.11028050 8.90470844 -5.38113227
## [6] 7.75288322 8.88971950 -8.61918537 3.92251817 -0.05967209
## [11] 18.65166640 -8.38113227 -0.41675174 -8.83942873 2.53263985
## [16] -4.44955041 26.09215313 15.12777260 -12.61918537 17.36300490
## [21] 8.78850269 5.82130136 1.16057127 8.43142304 -2.66979377
## [26] 3.48203145 1.16057127 -6.56857696 -28.51796855 -2.31553492
## [31] 1.94032791 19.43142304 12.61886773 24.77069295 7.78850269
## [36] 6.43142304 -3.44955041 0.48203145 -14.00906369 -3.02687342
## [41] -2.00906369 2.65166640 -4.61918537 3.48203145 1.16057127
## [46] 10.43142304 -2.89003714 10.09215313 -3.11028050 -2.29772519
## [51] -12.78882032 2.75288322 6.94032791 -8.61918537 -2.07748183
## [56] -1.39894200 -17.95845528 -22.80663006 -3.41675174 -5.00906369
## [61] 14.48203145 -6.55076722 -6.24711678 8.43142304 -8.66979377
## [66] 3.14276154 -8.05967209 1.53263985 5.16057127 -27.00906369
str(r_yw)
## List of 15
## $ order : int 1
## $ ar : num -0.39
## $ var.pred : num 122
## $ x.mean : num 51.1
## $ aic : Named num [1:2] 9.54 0
## ..- attr(*, "names")= chr [1:2] "0" "1"
## $ n.used : int 70
## $ n.obs : int 70
## $ order.max : num 1
## $ partialacf : num [1, 1, 1] -0.39
## $ resid : Time-Series [1:70] from 1 to 70: NA 11.26 -23.11 8.9 -5.38 ...
## $ method : chr "Yule-Walker"
## $ series : chr "t_yields"
## $ frequency : num 1
## $ call : language ar(x = t_yields, order.max = 1, method = "yw")
## $ asy.var.coef: num [1, 1] 0.0125
## - attr(*, "class")= chr "ar"
r_burg <- ar(t_yields, method = "burg", order.max = 1)
r_burg$resid
## Time Series:
## Start = 1
## End = 70
## Frequency = 1
## [1] NA 11.19245049 -22.89411036 8.43230111 -5.04740012
## [6] 7.53239447 9.10588964 -8.55416638 3.75241315 0.07252519
## [11] 18.59912338 -8.04740012 -0.68762420 -8.74082060 2.34574025
## [16] -4.33414771 25.97243182 15.61265590 -12.55416638 17.12572158
## [21] 9.17261855 5.81914206 1.25917940 8.41246917 -2.52080192
## [26] 3.37910471 1.25917940 -6.58753083 -28.62089529 -2.75435311
## [31] 2.07252519 19.41246917 12.95259988 24.85250651 8.17261855
## [36] 6.41246917 -3.33414771 0.37910471 -13.96083927 -3.28095131
## [41] -1.96083927 2.59912338 -4.55416638 3.37910471 1.25917940
## [46] 10.41246917 -2.70745614 9.97243182 -2.89411036 -2.43424107
## [51] -12.77418505 2.53239447 7.07252519 -8.55416638 -2.24758685
## [56] -1.36751216 -17.99420373 -23.09429709 -3.68762420 -4.96083927
## [61] 14.37910471 -6.26741879 -6.46760553 8.41246917 -8.52080192
## [66] 2.93906736 -7.92747481 1.34574025 5.25917940 -26.96083927
str(r_burg)
## List of 15
## $ order : int 1
## $ ar : num -0.407
## $ var.pred : num 117
## $ x.mean : num 51.1
## $ aic : Named num [1:2] 10.7 0
## ..- attr(*, "names")= chr [1:2] "0" "1"
## $ n.used : int 70
## $ n.obs : int 70
## $ order.max : num 1
## $ partialacf : num [1, 1, 1] -0.407
## $ resid : Time-Series [1:70] from 1 to 70: NA 11.19 -22.89 8.43 -5.05 ...
## $ method : chr "Burg"
## $ series : chr "t_yields"
## $ frequency : num 1
## $ call : language ar(x = t_yields, order.max = 1, method = "burg")
## $ asy.var.coef: num [1, 1] 0.0119
## - attr(*, "class")= chr "ar"
checkresiduals(r_burg$resid)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
Residuals seem to be allright. No autocorrelation is detected and the residuals look normally distributed.
r_mle <- arima(t_yields, order = c(1, 0, 0), include.mean = TRUE)
str(r_mle)
## List of 14
## $ coef : Named num [1:2] -0.419 51.266
## ..- attr(*, "names")= chr [1:2] "ar1" "intercept"
## $ sigma2 : num 117
## $ var.coef : num [1:2, 1:2] 0.01274 -0.00296 -0.00296 0.8349
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "ar1" "intercept"
## .. ..$ : chr [1:2] "ar1" "intercept"
## $ mask : logi [1:2] TRUE TRUE
## $ loglik : num -266
## $ aic : num 538
## $ arma : int [1:7] 1 0 0 0 1 0 0
## $ residuals: Time-Series [1:70] from 1 to 70: -3.87 10.95 -22.93 7.89 -5 ...
## $ call : language arima(x = t_yields, order = c(1, 0, 0), include.mean = TRUE)
## $ series : chr "t_yields"
## $ code : int 0
## $ n.cond : int 0
## $ nobs : int 70
## $ model :List of 10
## ..$ phi : num -0.419
## ..$ theta: num(0)
## ..$ Delta: num(0)
## ..$ Z : num 1
## ..$ a : num -28.3
## ..$ P : num [1, 1] 0
## ..$ T : num [1, 1] -0.419
## ..$ V : num [1, 1] 1
## ..$ h : num 0
## ..$ Pn : num [1, 1] 1
## - attr(*, "class")= chr "Arima"
r_mle
##
## Call:
## arima(x = t_yields, order = c(1, 0, 0), include.mean = TRUE)
##
## Coefficients:
## ar1 intercept
## -0.4191 51.2658
## s.e. 0.1129 0.9137
##
## sigma^2 estimated as 116.6: log likelihood = -265.98, aic = 537.96
# calculate confidence intervalls
print("confidence intervals: ")
## [1] "confidence intervals: "
(ci_ar1_lower_band <- -0.4191-0.1129)
## [1] -0.532
(ci_ar1_upper_band <- -0.4191+0.1129)
## [1] -0.3062
(ci_intercept_lower_band <- 51.2658-0.9137)
## [1] 50.3521
(ci_intercept_upper_band <- 51.2658+0.9137)
## [1] 52.1795
Confidence intervall for coefficient ar1 with mean -0.4191 is from -0.532 to -0.3062. Confidence intervall for coefficient intercept with mean 51.2658 is from 50.3521 to 52.1795.
Load data
d_force <- read.table("http://stat.ethz.ch/Teaching/Datasets/WBL/kraft.dat",
header = FALSE)
ts_force <- ts(d_force[, 1])
plot(ts_force)
ts_forceA <- window(ts_force, end = 280)
tsdisplay(ts_forceA)
acf(ts_forceA)
Yes. The expected behaviour is a lag at every multiple of about 2 seconds multiplied by 1.5 seconds of measurements. E. g. at lag 4 (6 seconds) or lag 8 (12 seconds).
The ACF plots confirms the expected behaviour.
# PACF
pacf(ts_forceA) # cut-off at p = 6
# ar(6) model
ar_force <- ar(ts_forceA, method = "mle")
print("mle-method:")
## [1] "mle-method:"
ar_force$aic
## 0 1 2 3 4 5
## 675.384208 297.907306 197.602018 153.028418 97.026089 74.755848
## 6 7 8 9 10 11
## 17.664186 19.434497 21.358786 5.854905 0.000000 1.445990
## 12
## 1.231237
which.min(ar_force$aic)
## 10
## 11
ar_force2 <- ar(ts_forceA, method = "burg")
print("burg-method:")
## [1] "burg-method:"
ar_force2$aic
## 0 1 2 3 4 5
## 689.1010110 310.2653197 209.2374481 164.1764133 107.3902617 84.6220590
## 6 7 8 9 10 11
## 26.4245736 28.2614837 30.2416767 14.1104211 7.9229345 9.4621702
## 12 13 14 15 16 17
## 8.8048523 6.7157869 4.2085974 6.1855432 8.0032864 0.0000000
## 18 19 20 21 22 23
## 0.6570324 2.4690675 4.4660453 2.8930394 4.7344284 6.5250774
## 24
## 5.4429421
which.min(ar_force2$aic)
## 17
## 18
ar_force3 <- ar(ts_forceA, method = "yw")
print("yw-method:")
## [1] "yw-method:"
ar_force3$aic
## 0 1 2 3 4 5
## 649.787273 274.680759 179.549362 139.329613 86.846990 63.674193
## 6 7 8 9 10 11
## 10.474487 12.453192 14.291716 4.194248 1.363401 1.894851
## 12 13 14 15 16 17
## 2.882748 1.736910 0.126790 1.920976 3.794555 0.000000
## 18 19 20 21 22 23
## 1.031020 2.743056 4.499686 5.302003 6.946098 8.876893
## 24
## 9.143878
which.min(ar_force2$aic)
## 17
## 18
# tried CSS-ML as well
# lags <- seq(1,16)
# for (i in lags) {
# fit_MLE <- arima(ts_forceA, order = c(i, 0, 0), method = "css-mle")
# cat("AR(", i, "):", sep = "")
# print(fit_MLE$aic)
# }
Maximum likelihood estimator: best lag = 10 Burg’s algorithm & Yule-Walker equations: best lag = 17
ar_lme <- arima(ts_forceA, order = c(10, 0, 0), method = "ML")
summary(ar_lme)
##
## Call:
## arima(x = ts_forceA, order = c(10, 0, 0), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 1.2588 -0.4467 0.0751 -0.4799 0.4873 -0.3358 -0.0829 0.3780
## s.e. 0.0588 0.0917 0.0929 0.0937 0.0961 0.0968 0.0943 0.0951
## ar9 ar10 intercept
## -0.4676 0.1744 -0.0079
## s.e. 0.0948 0.0617 0.0125
##
## sigma^2 estimated as 0.008276: log likelihood = 270.84, aic = -517.68
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 0.0002465765 0.09097423 0.07182276 -18.79978 153.9653
## MASE ACF1
## Training set 0.5261257 0.006710714
# select residuals and display them
residuals <- ar_lme$residuals
tsdisplay(residuals)
qqnorm(ar_lme$residuals,main="Normal Q-Q Plot: AR(10)")
qqline(ar_lme$residuals)
Residuals looking quite nice. However there seems to be a autocorrelation at lag 8 which must be further analysed. Q-Q plot shows a approximation to normal distribution. The model is in general appropriate for this time series. One can observe from the results in b) that a Yule Walker equation model with lag 17 could even show better results than this model.
force_pred <- predict(ar_force, n.ahead = 40)
plot(ts_force, lty = 3)
lines(ts_forceA, lwd = 2)
lines(force_pred$pred, lwd = 2, col = "orange")
lines(force_pred$pred + force_pred$se * 1.96, col = "red")
lines(force_pred$pred - force_pred$se * 1.96, col = "red")
The model predicts the values quite well, because the original data is between the confidence interval values.