library(tseries)
library(tidyverse)
library(magrittr)
library(plotly)
library(timetk)
library(lubridate)
library(pracma)
library(fma)
library(forecast)
library(fpp)
library(car)



Series 4


Exercise 4.1 - simulations

ARMA(1,2) model

# AR(1,2) model - theoretical autocorrelations
plot(0:30, ARMAacf(ar = c(0.75), ma = c(-0.3, 0.25), lag.max = 30), type = "h",
ylab = "ACF")

## Theoretical partial autocorrelations
plot(1:30, ARMAacf(ar = c(0.75), ma = c(-0.3, 0.25 ), lag.max = 30, pacf = TRUE), type = "h", ylab = "PACF")

ARMA(2,1) model

# AR(2,1) model - theoretical autocorrelations
plot(0:30, ARMAacf(ar = c(0.75, -0.3), ma = c(0.25), lag.max = 30), type = "h",
ylab = "ACF")

## Theoretical partial autocorrelations
plot(1:30, ARMAacf(ar = c(0.75, -0.3), ma = c(0.25), lag.max = 30, pacf = TRUE), type = "h", ylab = "PACF")

Because AR part of model is not stationary!

ARMA(3,1) is the equivalent model because we can take the differencing term (1 -B)^d and use it in the AR part.


Exercise 4.2 - sunspotarea

d <- sunspotarea
glimpse(d)
##  Time-Series [1:137] from 1875 to 2011: 213.1 109.3 92.9 22.2 36.3 ...
hist(d)

Because it is right-skewed.

d2 <- d %>% log()
d_to_1974 <- window(d, end = 1974)
tsdisplay(d_to_1974)

acf(d_to_1974)

pacf(d_to_1974)

(fit <- arima(d_to_1974, order = c(10, 0, 3)))
## 
## Call:
## arima(x = d_to_1974, order = c(10, 0, 3))
## 
## Coefficients:
##          ar1     ar2      ar3     ar4      ar5      ar6      ar7     ar8
##       0.2579  0.3707  -0.3403  0.0664  -0.0028  -0.1064  -0.0152  0.0434
## s.e.  0.3838  0.3127   0.3439  0.3182   0.1222   0.1303   0.1091  0.1128
##          ar9    ar10     ma1     ma2     ma3  intercept
##       0.1251  0.3401  0.5509  0.0524  0.2243   771.4821
## s.e.  0.1251  0.1235  0.3972  0.2688  0.3174   179.0755
## 
## sigma^2 estimated as 87264:  log likelihood = -712.83,  aic = 1455.65
tsdisplay(fit$residuals)

Suitable model: ARMA(10, 3). Residuals looking good.


Exercise 4.3 - beluga whales model

d_beluga <- read.table("http://stat.ethz.ch/Teaching/Datasets/WBL/beluga.dat", header = TRUE)
d_beluga_ts <-  ts(d_beluga)
(fit <- lm(NURSING ~ ., d_beluga))
## 
## Call:
## lm(formula = NURSING ~ ., data = d_beluga)
## 
## Coefficients:
## (Intercept)       PERIOD        BOUTS      LOCKONS     DAYNIGHT  
##   0.5602842    0.0001998    0.8784967    2.3903512   -0.3416237
summary(fit)
## 
## Call:
## lm(formula = NURSING ~ ., data = d_beluga)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4457 -0.9018 -0.0850  1.0952  3.9548 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.5602842  0.5502170   1.018  0.31012    
## PERIOD       0.0001998  0.0031937   0.063  0.95020    
## BOUTS        0.8784967  0.3336237   2.633  0.00932 ** 
## LOCKONS      2.3903512  0.2035042  11.746  < 2e-16 ***
## DAYNIGHT    -0.3416237  0.2510156  -1.361  0.17550    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.582 on 155 degrees of freedom
## Multiple R-squared:  0.842,  Adjusted R-squared:  0.8379 
## F-statistic: 206.5 on 4 and 155 DF,  p-value: < 2.2e-16
fit_res <- fit$residuals
tsdisplay(fit_res)

car::qqPlot(fit_res)

## [1] 11  7
hist(fit_res)

Bouts and lock-ons seems to have a significant impact on nursing success. Day or night is not important. There is some structure left in the residuals. One must further investigate the dependency structure at lag 1 and 2 in (P)ACF. The residuals seem to be normally distributed.

# use AR(2) model according to PACF result of residuals above
f_burg <- ar.burg(fit_res, order = 2)
# summary(f_burg)
f_burg$x.mean
## [1] 6.699556e-17
f_burg$ar
## [1] 0.2844054 0.3210801

The mean is close to zero (6.67e-17). The parameter alpha 1 is about 0.28 and alpha 2 about 0.32.

library(nlme) # Load the package containing the procedure gls()
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
## 
##     getResponse
## The following object is masked from 'package:dplyr':
## 
##     collapse
d_beluga_gls <- gls(NURSING ~ BOUTS + LOCKONS + DAYNIGHT + PERIOD, 
                    data = d_beluga, 
                    correlation = corARMA(form = ~ PERIOD, p = f_burg$order, q = 0,
                                          fixed = FALSE), method = "ML")
summary(d_beluga_gls)
## Generalized least squares fit by maximum likelihood
##   Model: NURSING ~ BOUTS + LOCKONS + DAYNIGHT + PERIOD 
##   Data: d_beluga 
##       AIC      BIC   logLik
##   560.396 584.9974 -272.198
## 
## Correlation Structure: ARMA(2,0)
##  Formula: ~PERIOD 
##  Parameter estimate(s):
##      Phi1      Phi2 
## 0.2739974 0.3653663 
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept)  1.3218875 0.7678369  1.721573  0.0871
## BOUTS        0.2961689 0.3370588  0.878686  0.3809
## LOCKONS      2.5681918 0.1964012 13.076253  0.0000
## DAYNIGHT    -0.3080292 0.1549160 -1.988362  0.0485
## PERIOD       0.0024982 0.0062754  0.398089  0.6911
## 
##  Correlation: 
##          (Intr) BOUTS  LOCKON DAYNIG
## BOUTS    -0.303                     
## LOCKONS  -0.101 -0.811              
## DAYNIGHT -0.014 -0.135  0.067       
## PERIOD   -0.607 -0.233  0.251  0.024
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.80055479 -0.58763711  0.01738819  0.65602026  2.49854049 
## 
## Residual standard error: 1.577032 
## Degrees of freedom: 160 total; 155 residual
d_resid <- ts(resid(d_beluga_gls ))
      1. Leave this exercises out and use them as proper exam preparation.


Exercise 4.3 - varvas

t_url <- "http://stat.ethz.ch/Teaching/Datasets/WBL/varve.dat"
d_varve <- ts(scan(t_url)[201:550])
glimpse(d_varve)
##  Time-Series [1:350] from 1 to 350: 21.37 8.62 8.49 23.14 25.14 ...
plot(d_varve)

hist(d_varve)

Because it is right-skewed.

d_varve_log <- log(d_varve)
tsdisplay(d_varve_log)

d_varve_diff_log <- diff(d_varve, differences = 1)
tsdisplay(d_varve_diff_log)

No, it is not stationary. There seems to be a trend because of the slow decay in the ACF. There could be a seasonal component due to the oscillatory behaviour as well. One could try to difference the series. But there is still a autocorrelation left at lag 1 to 5 in the PACF.

auto.arima(d_varve, seasonal = T, stationary = F, ic = "aic")
## Series: d_varve 
## ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.2794  -0.9280
## s.e.  0.0569   0.0201
## 
## sigma^2 estimated as 302.7:  log likelihood=-1491.8
## AIC=2989.61   AICc=2989.68   BIC=3001.17
(varve_fit <- arima(d_varve, order = c(1, 1, 1), seasonal = c(1, 1, 1)))
## 
## Call:
## arima(x = d_varve, order = c(1, 1, 1), seasonal = c(1, 1, 1))
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##          ar1      ma1    sar1     sma1
##       0.1402  -0.9656  0.1402  -0.9656
## s.e.     NaN   0.0416     NaN   0.0416
## 
## sigma^2 estimated as 302.8:  log likelihood = -1492.73,  aic = 2995.46
na.omit(varve_fit$coef)
##        ar1        ma1       sar1       sma1 
##  0.1401664 -0.9656279  0.1401664 -0.9656279

Y(t) = 0.1402 * Y(t-1) + -0.9656 * E(t) + 0.1402 * ??? -0.956 * ???.

Need some help here. How to I get the results above in the correct formula? Looked for help but nothing helped me to solve the task (e. g. https://stats.stackexchange.com/questions/129901/sarima-model-equation).