library(fpp2)
library(tidyverse)
library(TSstudio)
library(tseries)
library(BETS)
library(dplyr)
Índice de Atividade Econômica do Banco Central do Brasil
df.res <- BETSsearch("Central Bank Economic Activity Index",
view = FALSE)
df.res
id <- 24364 # série ajustada sazonalmente
inicio = '2003-01-01'
fim = as.character(Sys.Date() - 1) # ontem
df.BETS <- BETSget(code = id,
data.frame = TRUE,
from = inicio,
to = fim)
df <- df.BETS
head(df)
tail(df)
ibc <- df$value
ts.ibc <- ts(ibc, frequency = 12, start = c(2003,01), end = c(2020,06))
ts.ibc
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 2003 100.48 102.04 102.03 100.94 99.82 100.29 99.05 99.79 101.67 101.93
## 2004 103.57 104.51 105.94 106.47 106.32 107.26 107.83 108.19 109.43 108.78
## 2005 109.54 110.22 110.44 110.98 110.91 110.67 110.64 111.23 110.93 111.07
## 2006 113.33 113.64 113.59 114.27 115.66 114.77 116.89 116.62 116.80 118.04
## 2007 119.65 120.19 119.44 121.20 122.40 123.43 124.21 124.32 125.31 126.24
## 2008 127.27 126.75 126.36 127.85 129.43 131.15 130.95 131.01 130.86 128.20
## 2009 120.91 122.20 122.93 123.68 124.89 125.89 126.29 128.03 129.15 129.90
## 2010 133.58 135.19 136.49 137.20 136.43 136.21 136.97 137.66 139.35 138.73
## 2011 140.15 140.73 141.15 140.86 141.24 142.19 142.22 141.90 141.87 141.57
## 2012 138.87 140.39 140.10 141.00 142.67 143.56 143.87 144.39 143.69 144.78
## 2013 144.98 143.88 144.95 146.24 146.87 146.33 146.93 147.19 148.06 148.06
## 2014 148.34 147.97 147.81 147.00 146.27 143.48 144.71 145.63 146.11 145.49
## 2015 145.30 144.37 144.00 142.51 141.17 139.83 138.95 138.98 137.90 138.84
## 2016 135.64 135.54 134.64 134.71 134.04 134.70 134.46 133.61 133.73 133.15
## 2017 133.66 136.03 135.87 135.80 134.91 136.07 136.57 135.35 135.48 135.54
## 2018 137.33 137.38 137.31 138.43 132.96 137.41 138.37 138.64 137.96 137.77
## 2019 139.33 137.71 137.55 137.22 138.12 138.50 138.50 137.86 139.16 139.74
## 2020 139.35 139.92 131.62 119.42 121.43 127.90
## Nov Dec
## 2003 102.00 101.44
## 2004 109.65 108.74
## 2005 111.82 112.31
## 2006 118.85 119.93
## 2007 126.14 126.87
## 2008 124.91 120.84
## 2009 129.97 131.31
## 2010 140.05 138.34
## 2011 142.23 140.55
## 2012 144.58 144.15
## 2013 148.36 148.71
## 2014 145.64 146.18
## 2015 136.92 136.41
## 2016 133.08 132.34
## 2017 135.91 137.75
## 2018 138.33 138.75
## 2019 139.58 139.19
## 2020
autoplot(ts.ibc, lwd = 1.2, col = "steelblue") +
labs(y = 'IBC-BR',
x = 'Data',
title = 'Índice de Atividade do Banco Central do Brasil',
caption = 'Fonte: BACEN')
O impacto da Covid-19 em números
ibc.2020 <- window(ts.ibc, 2020)
covid <- data.frame(Data = time(ibc.2020),
Valor = as.matrix(ibc.2020))
arit.ret <- function(precos) {
#### Calcula o retorno aritmético de uma série de preços ####
c(NA, precos[2:length(precos)]/ precos[1:length(precos)-1] - 1)
}
covid$delta <- arit.ret(as.numeric(covid$Valor)) * 100
covid
autoplot(ibc.2020, lwd = 1.2, col = "steelblue") +
labs(y = 'IBC-BR',
x = 'Data',
title = 'Índice de Atividade do Banco Central do Brasil em 2020',
caption = 'Fonte: BACEN')
TSstudiots_seasonal(ts.ibc,
type = 'box',
title = 'Sazonalidade - Índice de Atividade do BCB')
acf <- ggAcf(ts.ibc)
acf
pacf <- ggPacf(ts.ibc)
pacf
A série é claramente não estacionária. O Pacf sugere que a séria ficaria estacionária no lag = 3.
Utlizaremos os testes de estacionariedade com o pacote tseries.
\(H_0\): A série não é estacionária
\(H_1\): A série é estacionária
adf.test(ts.ibc)
##
## Augmented Dickey-Fuller Test
##
## data: ts.ibc
## Dickey-Fuller = -0.5381, Lag order = 5, p-value = 0.9793
## alternative hypothesis: stationary
Note a diferença da hipótese básica do teste
\(H_0\): A série é estacionária
\(H_1\): A série não é estacionária
kpss.test(ts.ibc)
##
## KPSS Test for Level Stationarity
##
## data: ts.ibc
## KPSS Level = 2.9641, Truncation lag parameter = 4, p-value = 0.01
Rejeita-se a hipótese nula (p-value smaller than printed p-value).
pp.test(ts.ibc)
##
## Phillips-Perron Unit Root Test
##
## data: ts.ibc
## Dickey-Fuller Z(alpha) = -1.5808, Truncation lag parameter = 4, p-value
## = 0.9779
## alternative hypothesis: stationary
Não se pode rejeitar a hipótese nula a menos de \(\alpha\) = 21%: p-valor = 0.2136
diff <- diff(ts.ibc)
lim <- 1.96/sqrt(length(ibc))
autoplot(diff, col ="blue", lwd = 1.1) +
geom_hline(yintercept = 0,
colour = 'darkred',
linetype = 'solid',
size = 0.75) +
xlab('Data') +
ylab('Diff')
acf.diff <- ggAcf(diff, lag.max = 24)
acf.diff
Uma diferenciação de lag = 2, mas observa-se a sazonalidade nos instantes 6 e 12 e 18 e 24. O teste sugere 4 lags (5-1).
O Acf é útil para medir o Modelo MA(q).
pacf.diff <- ggPacf(diff, lag.max = 24)
pacf.diff
Conte os primeiros spikes do PACF para o lag do AR(p).
adf.test(diff)
##
## Augmented Dickey-Fuller Test
##
## data: diff
## Dickey-Fuller = -5.9818, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
\(H_0\): A série é estacionária
\(H_1\): A série não é estacionária
kpss.test(diff)
##
## KPSS Test for Level Stationarity
##
## data: diff
## KPSS Level = 0.58215, Truncation lag parameter = 4, p-value = 0.02426
pp.test(diff)
##
## Phillips-Perron Unit Root Test
##
## data: diff
## Dickey-Fuller Z(alpha) = -141.52, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary
ar <- arima(ts.ibc, order = c(2,1,0))
ar
##
## Call:
## arima(x = ts.ibc, order = c(2, 1, 0))
##
## Coefficients:
## ar1 ar2
## 0.2673 -0.1832
## s.e. 0.0708 0.0708
##
## sigma^2 estimated as 2.289: log likelihood = -383.17, aic = 772.34
fit <- forecast(ar, h = 12) # IC level = 95
autoplot(ts.ibc, series = "Série_Observada") +
autolayer(fit$fitted, series = "Série Predita_AR") +
autolayer(fit, series = "Projeção h = 12") +
xlab('Data')
ma <- arima(ts.ibc, order = c(0,1,2))
ma
##
## Call:
## arima(x = ts.ibc, order = c(0, 1, 2))
##
## Coefficients:
## ma1 ma2
## 0.2470 -0.0546
## s.e. 0.0773 0.0779
##
## sigma^2 estimated as 2.322: log likelihood = -384.65, aic = 775.31
fit <- forecast(ma, h = 12) # SE QUISER FIXAR IC level = 95)
autoplot(ts.ibc, series = "Série_Observada") +
autolayer(fit$fitted, series = "Série Predita_MA") +
autolayer(fit, series = "Projeção h = 12") +
xlab('Data')
#$ ARIMA(p,d,q)
arima <- arima(ts.ibc, order = c(2,1,2))
arima
##
## Call:
## arima(x = ts.ibc, order = c(2, 1, 2))
##
## Coefficients:
## ar1 ar2 ma1 ma2
## -0.1626 -0.9071 0.3757 1.0000
## s.e. 0.0398 0.0391 0.0193 0.0278
##
## sigma^2 estimated as 2.091: log likelihood = -377.02, aic = 764.05
fit <- forecast(arima, h = 12) # IC 95% padrão
autoplot(ts.ibc, series = "Série_Observada") +
autolayer(fit$fitted, series = "Série Predita_ARIMA") +
autolayer(fit, series = "Projeção h = 12") +
xlab('Data')
sarima <- arima(ts.ibc,
order= c(2,1,0),
seasonal = list(order = c(0,1,1),
period = 12))
sarima
##
## Call:
## arima(x = ts.ibc, order = c(2, 1, 0), seasonal = list(order = c(0, 1, 1), period = 12))
##
## Coefficients:
## ar1 ar2 sma1
## 0.2179 -0.1578 -0.785
## s.e. 0.0743 0.0730 0.078
##
## sigma^2 estimated as 2.527: log likelihood = -376.62, aic = 761.23
fit <- forecast(sarima, h = 12)
autoplot(ts.ibc, series = "Serie_Observada") +
autolayer(fit$fitted, series = "Série Predita_ SARIMA") +
autolayer(fit, series = "Projeção h = 12", showgap = FALSE) +
xlab('Data')
auto.arima <- auto.arima(ts.ibc, seasonal = TRUE,
stepwise = FALSE,
approximation = FALSE)
auto.arima
## Series: ts.ibc
## ARIMA(0,2,4)
##
## Coefficients:
## ma1 ma2 ma3 ma4
## -0.7342 -0.3414 -0.1025 0.2046
## s.e. 0.0678 0.0845 0.0957 0.0727
##
## sigma^2 estimated as 2.293: log likelihood=-381
## AIC=772.01 AICc=772.3 BIC=788.7
print('Modelo -> SARIMA(3,1,0)(0,1,1)[12]')
## [1] "Modelo -> SARIMA(3,1,0)(0,1,1)[12]"
fit <- forecast(auto.arima, h = 12)
autoplot(ts.ibc, series = "Série_Observada") +
autolayer(fit$fitted, series = "Série Predita_AUTO.ARIMA") +
autolayer(fit, series = "Projeção h = 12") +
xlab('Data')
round(accuracy(ar), 4)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1209 1.5095 0.9561 0.1001 0.7453 1.0086 -0.0158
round(accuracy(ma), 4)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1128 1.5204 0.9519 0.0933 0.7422 1.0042 0.0035
round(accuracy(arima), 4)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1137 1.4427 0.9473 0.0946 0.7359 0.9993 0.0566
round(accuracy(sarima), 4)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.126 1.5397 0.959 -0.0936 0.7357 1.0117 -0.0141
round(accuracy(auto.arima), 4)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.1074 1.4923 0.9153 -0.081 0.7123 0.1938 -0.0061
auto.arima): SARIMA(3,1,0)(0,1,1)[12]t_test(model = auto.arima)
Os parâmetros são significativos.
Teste Ljung-Box
IMPORTANTE:
\[fitdf = p + q + P + Q = 3 + 0 + 0 + 1 = 4\]
Box.test(x = auto.arima$residuals,
lag = 24,
type = "Ljung-Box",
fitdf = 4)
##
## Box-Ljung test
##
## data: auto.arima$residuals
## X-squared = 13.12, df = 20, p-value = 0.8722
Não podemos rejeitar \(H_0\). A série aparenta ser iid.
Resíduos
checkresiduals(auto.arima, col ="blue")
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,4)
## Q* = 13.12, df = 20, p-value = 0.8722
##
## Model df: 4. Total lags used: 24
Teste proposto por Engle (1982) - GARCH
\(H_0\): Os resíduos ao quadrado são uma sequência de ruídos brancos, ou seja, os resíduos são homocedásticos.
\(H_1\): A série é heterocedástica
library(FinTS)
ArchTest(auto.arima$residuals, lags = 36)
##
## ARCH LM-test; Null hypothesis: no ARCH effects
##
## data: auto.arima$residuals
## Chi-squared = 134.91, df = 36, p-value = 2.344e-13
Não rejeitamos a hipótese nula.
library(normtest)
jb.norm.test(auto.arima$residuals, nrepl = 2000)
##
## Jarque-Bera test for normality
##
## data: auto.arima$residuals
## JB = 1933.6, p-value < 2.2e-16
shapiro.test(auto.arima$residuals)
##
## Shapiro-Wilk normality test
##
## data: auto.arima$residuals
## W = 0.80205, p-value = 1.321e-15
hist(auto.arima$residuals,
main = 'Histograma dos resíduos',
xlab = 'resíduos',
ylab = 'Frequências')
round(summary(auto.arima$residuals), digits = 4)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.9311 -0.6016 -0.0461 -0.1074 0.5805 6.0425
moda <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
result <- moda(as.numeric(auto.arima$residuals))
print(result)
## [1] 0.04493601
mean(auto.arima$residuals, trim = 0.05)
## [1] -0.02697165
Estamos violando esse diagóstico, porém se efetuarmos o arrendondamento, \[média \approx mediana \approx moda \approx zero\]
fit
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jul 2020 130.0133 128.0729 131.9537 127.0457 132.9809
## Aug 2020 128.6284 125.4982 131.7587 123.8411 133.4157
## Sep 2020 127.7553 123.8652 131.6454 121.8059 133.7048
## Oct 2020 127.5125 123.1546 131.8704 120.8477 134.1774
## Nov 2020 127.2697 122.4682 132.0713 119.9265 134.6130
## Dec 2020 127.0270 121.7993 132.2546 119.0320 135.0219
## Jan 2021 126.7842 121.1435 132.4248 118.1575 135.4108
## Feb 2021 126.5414 120.4976 132.5852 117.2982 135.7845
## Mar 2021 126.2986 119.8594 132.7378 116.4507 136.1465
## Apr 2021 126.0558 119.2271 132.8845 115.6122 136.4994
## May 2021 125.8130 118.5995 133.0266 114.7808 136.8453
## Jun 2021 125.5703 117.9754 133.1652 113.9549 137.1857