library(tseries)
library(tidyverse)
library(magrittr)
library(plotly)
library(timetk)
library(lubridate)
library(pracma)
library(fma)
library(forecast)
library(fpp)
library(car)
ARMA(1,2) model
# AR(1,2) model - theoretical autocorrelations
plot(0:30, ARMAacf(ar = c(0.75), ma = c(-0.3, 0.25), lag.max = 30), type = "h",
ylab = "ACF")
## Theoretical partial autocorrelations
plot(1:30, ARMAacf(ar = c(0.75), ma = c(-0.3, 0.25 ), lag.max = 30, pacf = TRUE), type = "h", ylab = "PACF")
ARMA(2,1) model
# AR(2,1) model - theoretical autocorrelations
plot(0:30, ARMAacf(ar = c(0.75, -0.3), ma = c(0.25), lag.max = 30), type = "h",
ylab = "ACF")
## Theoretical partial autocorrelations
plot(1:30, ARMAacf(ar = c(0.75, -0.3), ma = c(0.25), lag.max = 30, pacf = TRUE), type = "h", ylab = "PACF")
Because AR part of model is not stationary!
ARMA(3,1) is the equivalent model because we can take the differencing term (1 -B)^d and use it in the AR part.
d <- sunspotarea
glimpse(d)
## Time-Series [1:137] from 1875 to 2011: 213.1 109.3 92.9 22.2 36.3 ...
hist(d)
Because it is right-skewed.
d2 <- d %>% log()
d_to_1974 <- window(d, end = 1974)
tsdisplay(d_to_1974)
acf(d_to_1974)
pacf(d_to_1974)
(fit <- arima(d_to_1974, order = c(10, 0, 3)))
##
## Call:
## arima(x = d_to_1974, order = c(10, 0, 3))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.2579 0.3707 -0.3403 0.0664 -0.0028 -0.1064 -0.0152 0.0434
## s.e. 0.3838 0.3127 0.3439 0.3182 0.1222 0.1303 0.1091 0.1128
## ar9 ar10 ma1 ma2 ma3 intercept
## 0.1251 0.3401 0.5509 0.0524 0.2243 771.4821
## s.e. 0.1251 0.1235 0.3972 0.2688 0.3174 179.0755
##
## sigma^2 estimated as 87264: log likelihood = -712.83, aic = 1455.65
tsdisplay(fit$residuals)
Suitable model: ARMA(10, 3). Residuals looking good.
d_beluga <- read.table("http://stat.ethz.ch/Teaching/Datasets/WBL/beluga.dat", header = TRUE)
d_beluga_ts <- ts(d_beluga)
(fit <- lm(NURSING ~ ., d_beluga))
##
## Call:
## lm(formula = NURSING ~ ., data = d_beluga)
##
## Coefficients:
## (Intercept) PERIOD BOUTS LOCKONS DAYNIGHT
## 0.5602842 0.0001998 0.8784967 2.3903512 -0.3416237
summary(fit)
##
## Call:
## lm(formula = NURSING ~ ., data = d_beluga)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4457 -0.9018 -0.0850 1.0952 3.9548
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5602842 0.5502170 1.018 0.31012
## PERIOD 0.0001998 0.0031937 0.063 0.95020
## BOUTS 0.8784967 0.3336237 2.633 0.00932 **
## LOCKONS 2.3903512 0.2035042 11.746 < 2e-16 ***
## DAYNIGHT -0.3416237 0.2510156 -1.361 0.17550
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.582 on 155 degrees of freedom
## Multiple R-squared: 0.842, Adjusted R-squared: 0.8379
## F-statistic: 206.5 on 4 and 155 DF, p-value: < 2.2e-16
fit_res <- fit$residuals
tsdisplay(fit_res)
car::qqPlot(fit_res)
## [1] 11 7
hist(fit_res)
Bouts and lock-ons seems to have a significant impact on nursing success. Day or night is not important. There is some structure left in the residuals. One must further investigate the dependency structure at lag 1 and 2 in (P)ACF. The residuals seem to be normally distributed.
# use AR(2) model according to PACF result of residuals above
f_burg <- ar.burg(fit_res, order = 2)
# summary(f_burg)
f_burg$x.mean
## [1] 6.699556e-17
f_burg$ar
## [1] 0.2844054 0.3210801
The mean is close to zero (6.67e-17). The parameter alpha 1 is about 0.28 and alpha 2 about 0.32.
library(nlme) # Load the package containing the procedure gls()
##
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
##
## getResponse
## The following object is masked from 'package:dplyr':
##
## collapse
d_beluga_gls <- gls(NURSING ~ BOUTS + LOCKONS + DAYNIGHT + PERIOD,
data = d_beluga,
correlation = corARMA(form = ~ PERIOD, p = f_burg$order, q = 0,
fixed = FALSE), method = "ML")
summary(d_beluga_gls)
## Generalized least squares fit by maximum likelihood
## Model: NURSING ~ BOUTS + LOCKONS + DAYNIGHT + PERIOD
## Data: d_beluga
## AIC BIC logLik
## 560.396 584.9974 -272.198
##
## Correlation Structure: ARMA(2,0)
## Formula: ~PERIOD
## Parameter estimate(s):
## Phi1 Phi2
## 0.2739974 0.3653663
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.3218875 0.7678369 1.721573 0.0871
## BOUTS 0.2961689 0.3370588 0.878686 0.3809
## LOCKONS 2.5681918 0.1964012 13.076253 0.0000
## DAYNIGHT -0.3080292 0.1549160 -1.988362 0.0485
## PERIOD 0.0024982 0.0062754 0.398089 0.6911
##
## Correlation:
## (Intr) BOUTS LOCKON DAYNIG
## BOUTS -0.303
## LOCKONS -0.101 -0.811
## DAYNIGHT -0.014 -0.135 0.067
## PERIOD -0.607 -0.233 0.251 0.024
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.80055479 -0.58763711 0.01738819 0.65602026 2.49854049
##
## Residual standard error: 1.577032
## Degrees of freedom: 160 total; 155 residual
d_resid <- ts(resid(d_beluga_gls ))
t_url <- "http://stat.ethz.ch/Teaching/Datasets/WBL/varve.dat"
d_varve <- ts(scan(t_url)[201:550])
glimpse(d_varve)
## Time-Series [1:350] from 1 to 350: 21.37 8.62 8.49 23.14 25.14 ...
plot(d_varve)
hist(d_varve)
Because it is right-skewed.
d_varve_log <- log(d_varve)
tsdisplay(d_varve_log)
d_varve_diff_log <- diff(d_varve, differences = 1)
tsdisplay(d_varve_diff_log)
No, it is not stationary. There seems to be a trend because of the slow decay in the ACF. There could be a seasonal component due to the oscillatory behaviour as well. One could try to difference the series. But there is still a autocorrelation left at lag 1 to 5 in the PACF.
auto.arima(d_varve, seasonal = T, stationary = F, ic = "aic")
## Series: d_varve
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.2794 -0.9280
## s.e. 0.0569 0.0201
##
## sigma^2 estimated as 302.7: log likelihood=-1491.8
## AIC=2989.61 AICc=2989.68 BIC=3001.17
(varve_fit <- arima(d_varve, order = c(1, 1, 1), seasonal = c(1, 1, 1)))
##
## Call:
## arima(x = d_varve, order = c(1, 1, 1), seasonal = c(1, 1, 1))
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ma1 sar1 sma1
## 0.1402 -0.9656 0.1402 -0.9656
## s.e. NaN 0.0416 NaN 0.0416
##
## sigma^2 estimated as 302.8: log likelihood = -1492.73, aic = 2995.46
na.omit(varve_fit$coef)
## ar1 ma1 sar1 sma1
## 0.1401664 -0.9656279 0.1401664 -0.9656279
Y(t) = 0.1402 * Y(t-1) + -0.9656 * E(t) + 0.1402 * ??? -0.956 * ???.
Need some help here. How to I get the results above in the correct formula? Looked for help but nothing helped me to solve the task (e. g. https://stats.stackexchange.com/questions/129901/sarima-model-equation).