Análise de séries temporais

Nesta aula, utilizaremos o pacote astsa de Shumway and Stoffer, 2016. Os comandos e séries utilizadas podem ser encontrados no mesmo livro.

library(astsa)    # Shumway and Stoffer, 2016

Outros pacotes úteis na análise de séries temporais:

 library(stats)    #  Modelos ARIMA
 library(tseries)  #  Testes de RU  
 library(FitAR)    #  Ajuste de modelos AR. Útil para plotar Estatísticas de Ljung-Box vs Lags
 library(fGarch)   #  Modelos de heterocedasticidade
 library(e1071)    #  Assimetria e Curtose
 library(MTS)      #  Modelos VARMA, Tsay, 2016
 library(evir)     #  Calculo de Valores Extremos para VaR
 library(urca)     #  Testes de Co-integração e de raízes unitárias
 library(finTS)          # Tsay, 2005
 library(arfima)         # Fractional ARIMA Modeling
 library(GECStableGarch) # ARMA-GARCH/APARCH models with GEV and stable distributions
 library(gogarch)        # Generalized Orthogonal GARCH - GO-GARCH
 library(gss)            # General smoothing Splines
 library(evd)            # Calculo de Valores Extremos para VaR
 library(fExtremes)      # Calculo de Valores Extremos para VaR
 library(ismev)          # Calculo de Valores Extremos para VaR
 library(rugarch)  #  Modelos EGARCH univariados
 library(rmgarch)  #  Modelos GARCH multivariados
 library(fArma)
 library(fBasics)
 library(fExtremes)
 library(fUnitRoots)
 library(tsDyn)
 library(vars)
 library(ccgarch)

Simulando Processos ARMA(p,q)

Para simular processos ARMA(p,q), utilizamos a função arima.sim.

par(mfrow=c(1,3))
plot(arima.sim(list(order=c(1,0,0), ar=.5), n=100), ylab="x",
main=(expression(AR(1)~~~phi==+.5)))
plot(arima.sim(list(order=c(1,0,0), ar=-.5), n=100), ylab="x",
main=(expression(AR(1)~~~phi==-.5)))
plot(arima.sim(list(order=c(2,0,0), ar=c(-0.5,-0.3)), n=100), ylab="x",
main=(expression(AR(1)~~~phi[1]==-.5~~~phi[2]==.3)))

Representações \(MA(\infty)\) e \(AR(\infty)\)

psis=ARMAtoMA(ar = .5, lag.max = 15)  # first 10 psi-weights
phis=ARMAtoAR(ma = -.5, lag.max = 15)# first 10 pi-weights
psis2=ARMAtoMA(ar = -.5, ma = -.6, lag.max = 15)# first 10 pi-weights

par(mfrow=c(1,3))
plot(psis, main="AR(1)", type="o")
plot(phis, main="MA(1)", type="o")
plot(psis2, main="ARMA(1,1)", type="o")
abline(h=0,col="grey")

ACF e PACF teóricas

ACF = ARMAacf(ar=c(1.5,-.75), ma=0, 24)[-1]
PACF = ARMAacf(ar=c(1.5,-.75), ma=0, 24, pacf = TRUE)
par(mfrow=c(1,2))
plot(ACF, type="h", xlab="lag",main="ACF")
abline(h=0)
plot(PACF, type="h", xlab="lag",main="PACF")

# Representação MA infinita deste processo
MA_inf=ARMAtoMA(ar=c(1.5,-.75), ma=0, 48)
par(mfrow=c(1,1))
plot(MA_inf, main=expression(paste("Representação MA(",infinity,")",sep="")),ylab=expression(psi[j]),type="o")
abline(h=0,col="grey")

Exemplo Prático

Exemplo 2.7 (Shumway and Stoffer, 2016): Variedades Glaciais Paleoclimáticas

O derretimento de geleiras produz camadas anuais de areia e silte sedimentadas durante o derretimento da primavera. Tais depósitos sedimentares, chamados varves, podem ser usados como proxies para parâmetros paleoclimáticos, como a temperatura, porque, em um ano quente, mais areia e silte são depositados na geleira.
A figura a seguir mostra a série de variações anuais de espessuras coletadas de um local em Massachusetts por 634 anos, começando 11.834 anos atrás(detalhes, Shumway e Verosub, 1992).

plot.ts(varve)

Exemplo Prático: Correção pela Diferença e log

Porque a variação nas espessuras aumenta em proporcionalmente à quantidade depositada, uma transformação logarítmica poderia remover a não estacionariedade observável na variância em função do tempo.

dl_varve=diff(log(varve))
plot.ts(dl_varve, main="Log-diferença da série varve")

par(mfrow=c(1,2))
hist(varve, col="orange", main="Histograma da série varve")
hist(dl_varve, col="orange",main="Histograma do log-diff da série varve")

Exemplo Prático: ACF e PACF

par(mfrow=c(1,2))
acf(varve);pacf(varve)

acf(dl_varve);pacf(dl_varve)

Exemplo prático: Tendência linear

Exemplo 2.1 (Shumway and Stoffer, 2016): Preço mensal (por pound) do frango nos EUA de 2001 a 2016 (180 months)

fit1_chicken<- lm(chicken~time(chicken), na.action=NULL)
plot(chicken, ylab="centavos por pound")
abline(fit1_chicken)

par(mfrow=c(1,2))
acf(chicken);pacf(chicken)

acf(diff(chicken));pacf(diff(chicken))

par(mfrow=c(1,2))
plot.ts(resid(fit1_chicken), main="após removida a tendência")
plot.ts(diff(chicken), main="primeira diferença")

par(mfrow=c(1,3)) # plot ACFs
acf(chicken, 48, main="chicken")
acf(resid(fit1_chicken), 48, main="após removida a tendência")
acf(diff(chicken), 48, main="primeira diferença")

summary(fit1_chicken)

## 
## Call:
## lm(formula = chicken ~ time(chicken), na.action = NULL)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.7411 -3.4730  0.8251  2.7738 11.5804 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -7.131e+03  1.624e+02  -43.91   <2e-16 ***
## time(chicken)  3.592e+00  8.084e-02   44.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.696 on 178 degrees of freedom
## Multiple R-squared:  0.9173, Adjusted R-squared:  0.9168 
## F-statistic:  1974 on 1 and 178 DF,  p-value: < 2.2e-16

Exemplo prático: Regressão nos lags

Exemplo 2.3 (Shumway and Stoffer, 2016): Southern Oscillation Index (SOI)

plot.ts(soi)

par(mfrow=c(1,2))
acf(soi);pacf(soi)

T=length(soi)
dblag_soi=cbind(soi=soi[7:T],soi1=soi[6:(T-1)],
                soi2=soi[5:(T-2)],soi3=soi[4:(T-3)],
                soi4=soi[3:(T-4)],soi5=soi[2:(T-5)],
                soi6=soi[1:(T-6)]) #ts.intersect(soi, soiL6=lag(soi,-6), dframe=TRUE)
pairs(dblag_soi, col="darkred",pch=16)

summary(fit1_soi<-lm(soi~soi1+soi2+soi3+soi4+soi5+soi6, data =as.data.frame(dblag_soi)))

## 
## Call:
## lm(formula = soi ~ soi1 + soi2 + soi3 + soi4 + soi5 + soi6, data = as.data.frame(dblag_soi))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81011 -0.21039  0.00523  0.22008  0.85394 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.04307    0.01505   2.861  0.00442 ** 
## soi1         0.56767    0.04755  11.938  < 2e-16 ***
## soi2         0.03681    0.05469   0.673  0.50126    
## soi3         0.05067    0.05481   0.924  0.35577    
## soi4        -0.02528    0.05488  -0.461  0.64533    
## soi5        -0.11143    0.05485  -2.031  0.04281 *  
## soi6        -0.06324    0.04798  -1.318  0.18818    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3024 on 440 degrees of freedom
## Multiple R-squared:  0.3906, Adjusted R-squared:  0.3823 
## F-statistic:    47 on 6 and 440 DF,  p-value: < 2.2e-16

Exemplo prático: Ajuste sazonal com função cosseno

x = 2*cos(2*pi*1:500/50 + .6*pi) + rnorm(500,0,5)
t=1:length(x)
period_x=spectrum(x,plot=FALSE)
plot(period_x$freq,period_x$spec,type="l")

periodo=length(x)/10
w=1/periodo
z1 = cos(2*pi*t*w)
z2 = sin(2*pi*t*w)
summary(fit_cosseno <- lm(x~0+z1+z2)) # zero to exclude the intercept

## 
## Call:
## lm(formula = x ~ 0 + z1 + z2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.7549  -3.1155   0.1577   3.7677  13.7902 
## 
## Coefficients:
##    Estimate Std. Error t value Pr(>|t|)    
## z1  -0.4149     0.3197  -1.298    0.195    
## z2  -1.5317     0.3197  -4.791 2.19e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.055 on 498 degrees of freedom
## Multiple R-squared:  0.04715,    Adjusted R-squared:  0.04332 
## F-statistic: 12.32 on 2 and 498 DF,  p-value: 5.99e-06

par(mfrow=c(2,1))
plot.ts(x)
plot.ts(x, col=8, ylab=expression(hat(x)))
lines(fitted(fit_cosseno), col=2)

b1b2=fit_cosseno$coefficients
phi=atan(-b1b2[2]/b1b2[1])
A=-b1b2[1]/cos(phi)
print(paste("A"=A,";","Phi=",phi),digits = 2)

## [1] "1.58691858043781 ; Phi= -1.30625487545458"

Exemplo prático: Alisamentos

Exemplo 2.11 (Shumway and Stoffer, 2016): Southern Oscilation Index (SOI)

Alisamento com pesos lineares

wgts = c(.5, rep(1,11), .5)/12
soif = filter(soi, sides=2, filter=wgts)
plot(soi)
lines(soif, lwd=2, col=4)
par(fig = c(.65, 1, .65, 1), new = TRUE) # the insert
nwgts = c(rep(0,20), wgts, rep(0,20))
plot(nwgts, type="l", ylim = c(-.02,.1), xaxt='n', yaxt='n', ann=FALSE)

Alisamento com pesos gaussianos

par(new=FALSE)
plot(soi)
lines(ksmooth(time(soi), soi, "normal", bandwidth=1), lwd=2, col=4)
par(fig = c(.65, 1, .65, 1), new = TRUE) # the insert
gauss = function(x) { 1/sqrt(2*pi) * exp(-(x^2)/2) }
x = seq(from = -3, to = 3, by = 0.001)
plot(x, gauss(x), type ="l", ylim=c(-.02,.45), xaxt='n', yaxt='n', ann=FALSE)

Splines

plot(soi)
lines(smooth.spline(time(soi), soi, spar=.5), lwd=2, col=4)
lines(smooth.spline(time(soi), soi, spar= 1), lty=2, lwd=2, col=2)

Ajustes ARIMA

plot.ts(varve, main="Série de variações anuais de espessuras dos sedimentos")

dl_varve=diff(log(varve))
plot.ts(dl_varve, main="Log-diferença da série varve")

hist(dl_varve, col="orange",main="Histograma do log-diff da série varve")

acf0<-function(x,na.action=na.pass,lags=1:30,...){
  #lags:horizonte dos lags
  plot(acf(x,na.action=na.action,lag.max = max(lags),plot=F)[lags],main="ACF")#,lwd=5,lend=2,col="darkblue",...)
  #axis(1, tck = 1, col = "lightgrey", lty = "dotted")
}
pacf0<-function(x,na.action=na.pass,lags=1:30,...){
  #lags:horizonte dos lags
  plot(pacf(x,na.action=na.action,lag.max = max(lags), plot=F)[lags],main="PACF")#,lwd=5,lend=2,col="darkblue",...)
  #axis(1, tck = 1, col = "lightgrey", lty = "dotted")
}
par(mfrow=c(1,2))
acf0(varve);pacf0(varve)

acf0(dl_varve);pacf0(dl_varve)

Ajuste ARIMA(0,1,1)

l_varve=log(varve)
fit1_varve<-arima(x = l_varve,order=c(0,1,1))
fit1_varve

## 
## Call:
## arima(x = l_varve, order = c(0, 1, 1))
## 
## Coefficients:
##           ma1
##       -0.7705
## s.e.   0.0341
## 
## sigma^2 estimated as 0.2353:  log likelihood = -440.72,  aic = 885.44

Diagnóstico

res_fit1_varve=resid(fit1_varve)
hist(res_fit1_varve, col="orange",main="Histograma do log-diff da série varve")

par(mfrow=c(1,2))
acf0(res_fit1_varve);pacf0(res_fit1_varve)

Teste de Box-Pierce-Ljung

library(FitAR)
par(mfrow=c(1,2))
LBQPlot(fit1_varve$residuals,15)

Teste de Normalidade

library(tseries)
jarque.bera.test(fit1_varve$residuals)

## 
##  Jarque Bera Test
## 
## data:  fit1_varve$residuals
## X-squared = 7.7001, df = 2, p-value = 0.02128

qqnorm(fit1_varve$residuals,col="darkred", pch=20)
qqline(fit1_varve$residuals)

Segundo Ajuste ARIMA(1,1,1)

fit2_varve<-arima(x = l_varve,order=c(1,1,1))
fit2_varve

## 
## Call:
## arima(x = l_varve, order = c(1, 1, 1))
## 
## Coefficients:
##          ar1      ma1
##       0.2330  -0.8858
## s.e.  0.0518   0.0292
## 
## sigma^2 estimated as 0.2284:  log likelihood = -431.44,  aic = 868.88

par(mfrow=c(1,3))
res_fit2_varve=resid(fit2_varve)
acf0(res_fit2_varve);pacf0(res_fit2_varve)
LBQPlot(fit2_varve$residuals,15)

hist(res_fit2_varve, col="orange",main="Histograma do log-diff da série varve")
jarque.bera.test(fit2_varve$residuals)

## 
##  Jarque Bera Test
## 
## data:  fit2_varve$residuals
## X-squared = 3.9657, df = 2, p-value = 0.1377

qqnorm(fit2_varve$residuals,col="darkred", pch=20)
qqline(fit2_varve$residuals)

O modelo ARIMA(1,1,1) está melhor ajustado que o modelo ARIMA(0,1,1).

Previsões

fore_fit2_varve = predict(fit2_varve, n.ahead=24)
fore_fit2_varve

## $pred
## Time Series:
## Start = 635 
## End = 658 
## Frequency = 1 
##  [1] 2.560492 2.561433 2.561652 2.561703 2.561715 2.561718 2.561719
##  [8] 2.561719 2.561719 2.561719 2.561719 2.561719 2.561719 2.561719
## [15] 2.561719 2.561719 2.561719 2.561719 2.561719 2.561719 2.561719
## [22] 2.561719 2.561719 2.561719
## 
## $se
## Time Series:
## Start = 635 
## End = 658 
## Frequency = 1 
##  [1] 0.4779476 0.5059420 0.5144671 0.5200988 0.5251117 0.5299524 0.5347206
##  [8] 0.5394402 0.5441173 0.5487541 0.5533521 0.5579121 0.5624351 0.5669221
## [15] 0.5713738 0.5757911 0.5801748 0.5845256 0.5888442 0.5931315 0.5973879
## [22] 0.6016142 0.6058111 0.6099791

exp(fore_fit2_varve$pred)

## Time Series:
## Start = 635 
## End = 658 
## Frequency = 1 
##  [1] 12.94218 12.95437 12.95721 12.95787 12.95802 12.95806 12.95807
##  [8] 12.95807 12.95807 12.95807 12.95807 12.95807 12.95807 12.95807
## [15] 12.95807 12.95807 12.95807 12.95807 12.95807 12.95807 12.95807
## [22] 12.95807 12.95807 12.95807

ts.plot(varve, exp(fore_fit2_varve$pred), col=1:2, ylab="Recruitment",xlim=c(580,658), ylim=c(0,50))
U = exp(fore_fit2_varve$pred+fore_fit2_varve$se); L = exp(fore_fit2_varve$pred-fore_fit2_varve$se)
xx = c(time(U), rev(time(U))); yy = c(L, rev(U))
polygon(xx, yy, border = 8, col = gray(.6, alpha = .2))
lines(exp(fore_fit2_varve$pred), type="p", col=2)

Terceiro Ajuste ARIMA(2,1,1)

fit3_varve<-arima(x = l_varve,order=c(2,1,1))
fit3_varve

## 
## Call:
## arima(x = l_varve, order = c(2, 1, 1))
## 
## Coefficients:
##          ar1     ar2      ma1
##       0.2435  0.0473  -0.9039
## s.e.  0.0502  0.0474   0.0300
## 
## sigma^2 estimated as 0.2281:  log likelihood = -430.95,  aic = 869.89

par(mfrow=c(1,3))
res_fit3_varve=resid(fit3_varve)
acf0(res_fit3_varve);pacf0(res_fit3_varve)
LBQPlot(fit3_varve$residuals,15)

hist(res_fit3_varve, col="orange",main="Histograma do log-diff da série varve")
jarque.bera.test(fit3_varve$residuals)

## 
##  Jarque Bera Test
## 
## data:  fit3_varve$residuals
## X-squared = 4.4529, df = 2, p-value = 0.1079

qqnorm(fit3_varve$residuals,col="darkred", pch=20)
qqline(fit3_varve$residuals)
fore_fit3_varve = predict(fit3_varve, n.ahead=24)
fore_fit3_varve

## $pred
## Time Series:
## Start = 635 
## End = 658 
## Frequency = 1 
##  [1] 2.566672 2.555566 2.553345 2.552279 2.551914 2.551775 2.551724
##  [8] 2.551705 2.551698 2.551695 2.551694 2.551694 2.551694 2.551694
## [15] 2.551694 2.551694 2.551694 2.551694 2.551694 2.551694 2.551694
## [22] 2.551694 2.551694 2.551694
## 
## $se
## Time Series:
## Start = 635 
## End = 658 
## Frequency = 1 
##  [1] 0.4775611 0.5043517 0.5157848 0.5219332 0.5266684 0.5308932 0.5349204
##  [8] 0.5388577 0.5427444 0.5465954 0.5504165 0.5542102 0.5579777 0.5617198
## [15] 0.5654370 0.5691300 0.5727991 0.5764449 0.5800678 0.5836681 0.5872464
## [22] 0.5908031 0.5943384 0.5978529

ts.plot(varve, exp(fore_fit3_varve$pred), col=1:2, ylab="Recruitment",xlim=c(580,658), ylim=c(0,50))
U = exp(fore_fit3_varve$pred+fore_fit3_varve$se); L = exp(fore_fit3_varve$pred-fore_fit3_varve$se)
xx = c(time(U), rev(time(U))); yy = c(L, rev(U))
polygon(xx, yy, border = 8, col = gray(.6, alpha = .2))
lines(exp(fore_fit3_varve$pred), type="p", col=2)

MATD051 - Análise de Séries Temporais A

Aula 12: ARIMA