# importacao ----
dados <- read.csv2("dados_Imputed.csv")
# manipulacao dos dados ----
dados_prev <- dados %>%
filter(str_detect(Data, c("2012" ,"2013"))) %>%
select(Data, E1_SO2)
dados_prev$Data <- as.Date(dados_prev$Data)
dados_prev <- data.frame(media_dia = tapply(dados_prev$E1_SO2, dados_prev$Data, mean))
dados_prev <- rownames_to_column(dados_prev, var = "data")
dados_prev$data <- as.Date(dados_prev$data)
dados_prev <- dados_prev %>%
filter(data >= "2012-07-01")
dados <- dados_prev %>%
filter(data < "2013-11-01")
# grafico ----
ggplot(dados, aes(x = data, y = media_dia)) +
geom_line(color = "aquamarine4") +
geom_hline(yintercept = mean(dados$media_dia))+
xlab("Data") +
ylab("SO2")

dados$media_boxcox <- BoxCox(dados$media_dia,
BoxCox.lambda(dados$media_dia))
dados_prev$media_boxcox <- BoxCox(dados_prev$media_dia,
BoxCox.lambda(dados_prev$media_dia))
ggplot(dados, aes(x = data, y = media_boxcox)) + geom_line(color = "aquamarine4") +
geom_hline(yintercept = mean(dados$media_boxcox)) +
xlab("Data") +
ylab("SO2 Transformada")

ggplot(dados) + geom_boxplot(aes(x = format(as.Date(dados$data), "%Y-%m"),
y = media_boxcox),
fill = "aquamarine4") +
xlab("Data") +
ylab("S02 Transformada")
## Warning: Use of `dados$data` is discouraged. Use `data` instead.

# Teste de normalidade ----
jarque.bera.test(dados$media_boxcox)
##
## Jarque Bera Test
##
## data: dados$media_boxcox
## X-squared = 16.19, df = 2, p-value = 0.000305
# Periodograma para boxcox ----
periodograma <- itsmr::periodogram(x = dados$media_boxcox)

# Periodo (sazonalidade) ----
freq <- which.max(periodograma)/nrow(dados)
per <- 1/freq
# regressao seno cosseno ajuste por periodo ----
dadoscos <- cos(2*pi*freq*(1:nrow(dados)))
dadossin <- sin(2*pi*freq*(1:nrow(dados)))
fit <- lm(dados$media_boxcox ~ dadoscos + dadossin)
summary(fit)
##
## Call:
## lm(formula = dados$media_boxcox ~ dadoscos + dadossin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59036 -0.11513 0.00252 0.13689 0.55412
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.816066 0.008881 204.50 <2e-16 ***
## dadoscos -0.129256 0.012559 -10.29 <2e-16 ***
## dadossin -0.006031 0.012559 -0.48 0.631
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1962 on 485 degrees of freedom
## Multiple R-squared: 0.1796, Adjusted R-squared: 0.1762
## F-statistic: 53.08 on 2 and 485 DF, p-value: < 2.2e-16
fit <- lm(dados$media_boxcox ~ dadoscos)
summary(fit)
##
## Call:
## lm(formula = dados$media_boxcox ~ dadoscos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59499 -0.11491 0.00195 0.14224 0.55875
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.816066 0.008874 204.7 <2e-16 ***
## dadoscos -0.129256 0.012549 -10.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.196 on 486 degrees of freedom
## Multiple R-squared: 0.1792, Adjusted R-squared: 0.1775
## F-statistic: 106.1 on 1 and 486 DF, p-value: < 2.2e-16
# plots do modelo ----
par(mfrow=c(1,1))
plot.ts(dados$media_boxcox,xlab="Tempo",ylab="SO2 Transformada")
lines(fit$fitted.values, col='red')

par(mfrow=c(1,2))
acf(fit$residuals)
pacf(fit$residuals)

par(mfrow=c(1,1))
plot.ts(x = fit$residuals, ylab="ResÃduos",xlab="Tempo")

par(mfrow=c(1,1))
itsmr::periodogram(x = fit$residuals)

# teste para estacionariedade e variabilidade ----
adf.test(dados$media_dia)
## Warning in adf.test(dados$media_dia): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: dados$media_dia
## Dickey-Fuller = -4.4778, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
adf.test(dados$media_boxcox)
## Warning in adf.test(dados$media_boxcox): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: dados$media_boxcox
## Dickey-Fuller = -4.0177, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
adf.test(fit$residuals)
## Warning in adf.test(fit$residuals): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: fit$residuals
## Dickey-Fuller = -5.298, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# EACF ----
TSA::eacf(fit$residuals,ar.max=8, ma.max=8)
## AR/MA
## 0 1 2 3 4 5 6 7 8
## 0 x x x x x x x x x
## 1 x x x o o o o o o
## 2 x x o o o o o o o
## 3 x x x o o o o o o
## 4 x x x x o o o o o
## 5 o x x x o o o o o
## 6 o x x x x x o o o
## 7 x x x o o x x o o
## 8 x x x x o x x o o
# modelo completo ----
Xreg <- model.matrix(~ dadoscos)
fit_RegARMA <- arima(x = dados$media_boxcox, order = c(1,0,1),
xreg = Xreg, include.mean = FALSE)
summary(fit_RegARMA)
##
## Call:
## arima(x = dados$media_boxcox, order = c(1, 0, 1), xreg = Xreg, include.mean = FALSE)
##
## Coefficients:
## ar1 ma1 (Intercept) dadoscos
## 0.8787 -0.6641 1.8120 -0.1375
## s.e. 0.0490 0.0791 0.0223 0.0311
##
## sigma^2 estimated as 0.03202: log likelihood = 147.12, aic = -286.23
##
## Training set error measures:
## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample
## ME RMSE MAE MPE MAPE
## Training set NaN NaN NaN NaN NaN
lmtest::coeftest(fit_RegARMA)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.878734 0.048961 17.9477 < 2.2e-16 ***
## ma1 -0.664121 0.079076 -8.3985 < 2.2e-16 ***
## (Intercept) 1.811956 0.022263 81.3903 < 2.2e-16 ***
## dadoscos -0.137501 0.031102 -4.4209 9.829e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adjusted <- fit$fitted.values+(fit$residuals-fit_RegARMA$residuals)
par(mfrow=c(1,2));acf(fit_RegARMA$residuals);pacf(fit_RegARMA$residuals)

par(mfrow=c(1,1));plot.ts(dados$media_boxcox,ylab="S02 Tranformada",xlab="Tempo");lines(adjusted, col='red')

periodograma<-itsmr::periodogram(x = fit_RegARMA$residuals)

hist(fit_RegARMA$residuals)

jarque.bera.test(fit_RegARMA$residuals)
##
## Jarque Bera Test
##
## data: fit_RegARMA$residuals
## X-squared = 5.0695, df = 2, p-value = 0.07928
# modelo forecast para previsao ----
fit_RegARMA <- Arima(y = dados$media_boxcox, order = c(1,0,1),
xreg = Xreg, include.mean = FALSE)
# teste para explicacao de autocorrelacao ate o lag k ----
Box.test(x = fit_RegARMA$residuals, lag=25, fitdf = 4, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: fit_RegARMA$residuals
## X-squared = 23.932, df = 21, p-value = 0.2964
# Pontos de quebra ----
breaked <- breakpoints(fit_RegARMA$residuals ~ 1)
summary(breaked) # BIC mÃnimo = 0
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = fit_RegARMA$residuals ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 381
## m = 2 78 381
## m = 3 78 280 381
## m = 4 78 170 280 381
## m = 5 78 170 244 318 397
##
## Corresponding to breakdates:
##
## m = 1 381
## m = 2 78 381
## m = 3 78 280 381
## m = 4 78 170 280 381
## m = 5 78 170 244 318 397
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 15.62 15.49 15.41 15.32 15.30 15.29
## BIC -282.18 -274.06 -264.08 -254.65 -243.04 -230.74
plot(breaked)

# previsao ----
n <- nrow(dados)
h <- nrow(dados_prev) - nrow(dados)
dadoscos.new <- cos(2*pi*freq*(1:h+n))
dadossin.new <- sin(2*pi*freq*(1:h+n))
Xreg.new <- model.matrix( ~ dadoscos,
data = data.frame(dadoscos = dadoscos.new))
prev <- forecast(object = fit_RegARMA, h=h, xreg = Xreg.new)
prev.mean.x <- InvBoxCox(prev$mean, BoxCox.lambda(x = dados$media_dia))
prev.lower.x <- InvBoxCox(prev$lower[,2], BoxCox.lambda(x = dados$media_dia))
prev.upper.x <- InvBoxCox(prev$upper[,2], BoxCox.lambda(x = dados$media_dia))
m <- min(prev.lower.x, dados_prev$media_dia)
M <- max(prev.upper.x, dados_prev$media_dia)
plot.ts(dados_prev$media_dia[seq(n, n+h)], ylim=c(m, M),xlab="Tempo",ylab="S02 Tranformada")
lines(1:h, prev.mean.x, col='red', lwd=1.5)
lines(1:h, prev.lower.x, col='red', lwd=1.5, lty=2)
lines(1:h, prev.upper.x, col='red', lwd=1.5, lty=2)
