path <- "d:/Users/thomas.daher/Desktop/cimento/dados_parte2/"
# cimento
cimento <- read_excel(paste(path, "Obras hidreletricas.xlsx", sep = ""),sheet = "cimento_51") %>%
dplyr::select(data, cimento_51_barra)
#calcário
calcario <- read_excel(paste(path, "calcario.xlsx", sep = "")) %>% dplyr::select(data,calcario)
# diesel
diesel <- read_excel(paste(path, "Obras hidreletricas.xlsx", sep = ""), sheet = "Oleo diesel") %>% dplyr::select(data,diesel)
#coque de petroleo
#coque_antigo <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/coque de petroleo.xlsx", sheet = "dados antigos")
#coque_antigo <- coque_antigo %>% group_by(data) %>% summarise(valor = sum(valor), kg = sum(kg)) %>%mutate(valor_por_kg = valor / kg)
#coque_novo <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/coque de petroleo.xlsx", sheet = "dados novos")
#coque_novo <- coque_novo %>% group_by(data) %>% summarise(valor = sum(`Valor FOB (US$)`), kg = sum(`Quilograma Líquido`)) %>% mutate(valor_por_kg = valor / kg)
#coque <- rbind(coque_antigo,coque_novo)
#rm(coque_antigo,coque_novo)
#producao cimento
prod_cim <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/prod_cimento.xlsx")
prod_cim <- prod_cim %>%
pivot_longer(cols = -ano, names_to = "mes", values_to = "prod_cim") %>%
mutate(data = as.Date(paste(ano, mes, "01", sep = "-"), format = "%Y-%m-%d")) %>% dplyr::select(data, prod_cim)
# serie lubrificantes e combustiveis
combustiveis <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/combustiveis.xlsx") %>% dplyr::select(data, combustiveis)
IPCA <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/ipca.xlsx") %>% dplyr::select(data, IPCA_100)
## Criando dataframes e deflacionando
# Criando o data.frame
df <- merge(x = merge(x = merge(x = cimento, y = diesel, by = "data", all.x = TRUE), y = calcario, by = "data", all.x = TRUE), y = combustiveis, by = "data", all.x = TRUE)
df <- df %>%
inner_join(IPCA, by = "data")
# Defina uma função para deflação
deflacionar_serie <- function(valor_nominal, ipca) {
return((valor_nominal / ipca)*100)
}
# Aplique a função de deflação para cada série no DataFrame
df <- df %>%
mutate(
cimento_deflacionado = deflacionar_serie(cimento_51_barra, IPCA_100),
diesel_deflacionado = deflacionar_serie(diesel, IPCA_100),
calcario_deflacionado = deflacionar_serie(calcario, IPCA_100),
combustiveis_deflacionado = deflacionar_serie(combustiveis, IPCA_100)
)
df <- df %>% dplyr::select(data, cimento_deflacionado, diesel_deflacionado, calcario_deflacionado, combustiveis_deflacionado, IPCA_100)
#Teste de Raiz unitária
ts_cimento <- ts(df$cimento_deflacionado, frequency = 12)
ts_calcario <- ts(df$calcario_deflacionado, frequency = 12)
ts_diesel <- ts(df$diesel_deflacionado, frequency = 12)
ts_combustivel <- ts(df$combustiveis_deflacionado, frequency = 12)
ts_series <- ts(cbind(ts_cimento,ts_calcario,ts_diesel,ts_combustivel), start = c(1979, 12), frequency = 12)
### plotando series em nivel
ggplot(df) +
geom_line(aes(x = data, y = cimento_deflacionado, color = "cimento"), linetype = "solid", size = 1) +
geom_line(aes(x = data, y = calcario_deflacionado, color = "calcario"), linetype = "solid", size = 1) +
geom_line(aes(x = data, y = diesel_deflacionado, color = "diesel"), linetype = "solid", size = 1) +
geom_line(aes(x = data, y = combustiveis_deflacionado, color = "combustiveis"), linetype = "solid", size = 1) +
labs(title = "Séries deflacionadas de cimento, calcario, diesel e combustiveis",
x = "data",
y = "serie",
color = "Séries em nível") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#cimento
adf.test(ts_cimento)
##
## Augmented Dickey-Fuller Test
##
## data: ts_cimento
## Dickey-Fuller = -2.819, Lag order = 7, p-value = 0.2312
## alternative hypothesis: stationary
# diff_cimento
adf.test(diff(ts_cimento))
## Warning in adf.test(diff(ts_cimento)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(ts_cimento)
## Dickey-Fuller = -6.4094, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#calcario
adf.test(ts_calcario)
##
## Augmented Dickey-Fuller Test
##
## data: ts_calcario
## Dickey-Fuller = -3.6511, Lag order = 7, p-value = 0.02836
## alternative hypothesis: stationary
# diff_calcario
#adf.test(diff(ts_calcario))
#diesel
adf.test(ts_diesel)
##
## Augmented Dickey-Fuller Test
##
## data: ts_diesel
## Dickey-Fuller = -1.3595, Lag order = 7, p-value = 0.847
## alternative hypothesis: stationary
# diff_diesel
adf.test(diff(ts_diesel))
## Warning in adf.test(diff(ts_diesel)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(ts_diesel)
## Dickey-Fuller = -6.5866, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#combustiveis
adf.test(ts_combustivel)
##
## Augmented Dickey-Fuller Test
##
## data: ts_combustivel
## Dickey-Fuller = -1.1255, Lag order = 7, p-value = 0.9175
## alternative hypothesis: stationary
# diff_combustiveis
adf.test(diff(ts_combustivel))
## Warning in adf.test(diff(ts_combustivel)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(ts_combustivel)
## Dickey-Fuller = -6.4255, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
Podemos notar que fora a série calcário que já é estacionária em nível as demais séries apresentam raiz unitária, ao calcular a primeira diferença todas se apresentam estacionárias.
#Cimento
# Decompose the time series
decomposition <- decompose(ts_cimento)
autoplot(decomposition) + labs(title = "Decomposition cimento")
monthplot(ts_cimento, col='darkblue', col.base='red',
main='cimento médio por mês',
ylab='cimento', lty.base=2)
#Calcario
# Decompose the time series
decomposition <- decompose(ts_calcario)
autoplot(decomposition) + labs(title = "Decomposition calcario")
monthplot(ts_calcario, col='darkblue', col.base='red',
main='calcario médio por mês',
ylab='calcario', lty.base=2)
#Diesel
# Decompose the time series
decomposition <- decompose(ts_diesel)
autoplot(decomposition) + labs(title = "Decomposition diesel")
monthplot(ts_diesel, col='darkblue', col.base='red',
main='diesel médio por mês',
ylab='diesel', lty.base=2)
#Combustiveis
# Decompose the time series
decomposition <- decompose(ts_combustivel)
autoplot(decomposition) + labs(title = "Decomposition calcario")
monthplot(ts_combustivel, col='darkblue', col.base='red',
main='combustivel médio por mês',
ylab='combustivel', lty.base=2)
A presença de tendência e sazonalidade pode ser observada na decomposição da série em todas as séries. Consequentemente, é aconselhável empregar o comando auto.sarima para verificar a determinação do modelo na explicação da série.
# cimento
cimento_arima <- auto.arima(ts_cimento, seasonal = TRUE)
summary(cimento_arima)
## Series: ts_cimento
## ARIMA(3,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ma1
## -0.2886 0.0396 -0.3787 0.4370
## s.e. 0.0943 0.0520 0.0512 0.0969
##
## sigma^2 = 218: log likelihood = -1429.01
## AIC=2868.02 AICc=2868.19 BIC=2887.28
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.5641433 14.66025 5.699562 1.876182 5.134145 0.1892661
## ACF1
## Training set -0.009962158
adf.test(residuals(cimento_arima))
## Warning in adf.test(residuals(cimento_arima)): p-value smaller than printed
## p-value
##
## Augmented Dickey-Fuller Test
##
## data: residuals(cimento_arima)
## Dickey-Fuller = -5.6875, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# calcario
calcario_arima <- auto.arima(ts_calcario, seasonal = TRUE)
summary(calcario_arima)
## Series: ts_calcario
## ARIMA(2,1,3)
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3
## -1.1610 -0.6201 1.0001 0.3863 -0.3790
## s.e. 0.0628 0.0727 0.0728 0.1024 0.0624
##
## sigma^2 = 30.14: log likelihood = -1084.89
## AIC=2181.78 AICc=2182.02 BIC=2204.89
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3527988 5.442902 2.750066 0.2025024 2.778048 0.288509
## ACF1
## Training set -0.006032054
adf.test(residuals(calcario_arima))
## Warning in adf.test(residuals(calcario_arima)): p-value smaller than printed
## p-value
##
## Augmented Dickey-Fuller Test
##
## data: residuals(calcario_arima)
## Dickey-Fuller = -7.6747, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# diesel
diesel_arima <- auto.arima(ts_diesel, seasonal = TRUE)
summary(diesel_arima)
## Series: ts_diesel
## ARIMA(2,1,1)
##
## Coefficients:
## ar1 ar2 ma1
## 0.2319 0.1662 -0.6292
## s.e. 0.3783 0.1669 0.3687
##
## sigma^2 = 259.7: log likelihood = -1459.74
## AIC=2927.48 AICc=2927.6 BIC=2942.89
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.4599975 16.02332 8.97183 -0.8323906 5.121249 0.3613955
## ACF1
## Training set 0.003041679
adf.test(residuals(diesel_arima))
## Warning in adf.test(residuals(diesel_arima)): p-value smaller than printed
## p-value
##
## Augmented Dickey-Fuller Test
##
## data: residuals(diesel_arima)
## Dickey-Fuller = -6.2925, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# combustivel
combustivel_arima <- auto.arima(ts_combustivel, seasonal = TRUE)
summary(combustivel_arima)
## Series: ts_combustivel
## ARIMA(1,1,1)(0,0,2)[12]
##
## Coefficients:
## ar1 ma1 sma1 sma2
## -0.8488 0.6913 -0.0885 -0.1182
## s.e. 0.0996 0.1388 0.0579 0.0531
##
## sigma^2 = 112: log likelihood = -1313.11
## AIC=2636.21 AICc=2636.39 BIC=2655.47
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.6714353 10.50734 6.685125 -0.01327149 4.022656 0.2634335
## ACF1
## Training set -0.03508005
adf.test(residuals(combustivel_arima))
## Warning in adf.test(residuals(combustivel_arima)): p-value smaller than printed
## p-value
##
## Augmented Dickey-Fuller Test
##
## data: residuals(combustivel_arima)
## Dickey-Fuller = -6.3955, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
Curiosamente o auto.arima determina primeira diferença para o calcario que já é estacionário em primeira ordem como observado pelo teste ADF. O metodo de Bai_perron será empregado para determinar a existência de quebra estrutural.
# Function to perform breakpoint analysis for each univariate time series
perform_breakpoint_analysis <- function(ts) {
mts_object <- ts(ts, start = c(1979, 12), frequency = 12)
bp_results <- breakpoints(mts_object ~ 1)
print(paste("Resumo de quebra da serie", deparse(substitute(ts)), sep = " "))
summary(bp_results)
}
#definindo os dados
ts_cimento <- df$cimento_deflacionado
ts_calcario <- df$calcario_deflacionado
ts_diesel <- df$diesel_deflacionado
ts_combustiveis <- df$combustiveis_deflacionado
# Perform breakpoint analysis for each univariate time series
perform_breakpoint_analysis(ts_cimento)
## [1] "Resumo de quebra da serie ts_cimento"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 78
## m = 2 78 130
## m = 3 77 129 240
## m = 4 77 129 243 297
## m = 5 77 129 187 241 297
##
## Corresponding to breakdates:
##
## m = 1 1986(5)
## m = 2 1986(5) 1990(9)
## m = 3 1986(4) 1990(8) 1999(11)
## m = 4 1986(4) 1990(8) 2000(2) 2004(8)
## m = 5 1986(4) 1990(8) 1995(6) 1999(12) 2004(8)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 1902725 672309 560909 471478 435966 422931
## BIC 4005 3653 3602 3553 3537 3539
perform_breakpoint_analysis(ts_calcario)
## [1] "Resumo de quebra da serie ts_calcario"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 87
## m = 2 87 243
## m = 3 55 107 243
## m = 4 55 107 159 251
## m = 5 55 107 159 245 297
##
## Corresponding to breakdates:
##
## m = 1 1987(2)
## m = 2 1987(2) 2000(2)
## m = 3 1984(6) 1988(10) 2000(2)
## m = 4 1984(6) 1988(10) 1993(2) 2000(10)
## m = 5 1984(6) 1988(10) 1993(2) 2000(4) 2004(8)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 192382 67889 33951 32632 31888 31909
## BIC 3205 2853 2623 2621 2625 2637
perform_breakpoint_analysis(ts_diesel)
## [1] "Resumo de quebra da serie ts_diesel"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 83
## m = 2 83 274
## m = 3 83 186 274
## m = 4 83 135 187 274
## m = 5 83 135 187 245 297
##
## Corresponding to breakdates:
##
## m = 1 1986(10)
## m = 2 1986(10) 2002(9)
## m = 3 1986(10) 1995(5) 2002(9)
## m = 4 1986(10) 1991(2) 1995(6) 2002(9)
## m = 5 1986(10) 1991(2) 1995(6) 2000(4) 2004(8)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 1570275 372469 285528 258728 257253 271707
## BIC 3938 3447 3366 3344 3353 3384
perform_breakpoint_analysis(ts_combustiveis)
## [1] "Resumo de quebra da serie ts_combustiveis"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 269
## m = 2 104 249
## m = 3 104 245 297
## m = 4 104 160 245 297
## m = 5 52 104 160 245 297
##
## Corresponding to breakdates:
##
## m = 1 2002(4)
## m = 2 1988(7) 2000(8)
## m = 3 1988(7) 2000(4) 2004(8)
## m = 4 1988(7) 1993(3) 2000(4) 2004(8)
## m = 5 1984(3) 1988(7) 1993(3) 2000(4) 2004(8)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 1889848 513187 328289 212397 210464 210187
## BIC 4002 3559 3415 3275 3283 3295
Utilizando os pontos de quebra anteriormente determinados, vamos agora regredir contra 1 e analisar a estacionariedade dos resíduos.
bp_results <- breakpoints(ts_cimento ~ 1)
lm_break <- lm(ts_cimento ~ breakfactor(bp_results, breaks = 4))
summary(lm_break)
##
## Call:
## lm(formula = ts_cimento ~ breakfactor(bp_results, breaks = 4))
##
## Residuals:
## Min 1Q Median 3Q Max
## -115.747 -17.543 -4.751 20.972 177.026
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 19.094 4.057 4.706
## breakfactor(bp_results, breaks = 4)segment2 184.796 6.390 28.920
## breakfactor(bp_results, breaks = 4)segment3 113.922 5.251 21.694
## breakfactor(bp_results, breaks = 4)segment4 172.414 6.319 27.286
## breakfactor(bp_results, breaks = 4)segment5 135.149 6.390 21.150
## Pr(>|t|)
## (Intercept) 3.66e-06 ***
## breakfactor(bp_results, breaks = 4)segment2 < 2e-16 ***
## breakfactor(bp_results, breaks = 4)segment3 < 2e-16 ***
## breakfactor(bp_results, breaks = 4)segment4 < 2e-16 ***
## breakfactor(bp_results, breaks = 4)segment5 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 35.6 on 344 degrees of freedom
## Multiple R-squared: 0.7709, Adjusted R-squared: 0.7682
## F-statistic: 289.3 on 4 and 344 DF, p-value: < 2.2e-16
residuals_cimento <- (residuals(lm_break))
adf.test(residuals_cimento)
## Warning in adf.test(residuals_cimento): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: residuals_cimento
## Dickey-Fuller = -5.6718, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
bp_results <- breakpoints(ts_calcario ~ 1)
lm_break <- lm(ts_calcario ~ breakfactor(bp_results, breaks = 3))
summary(lm_break)
##
## Call:
## lm(formula = ts_calcario ~ breakfactor(bp_results, breaks = 3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.987 -5.713 0.310 5.761 42.860
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 68.030 1.311 51.88
## breakfactor(bp_results, breaks = 3)segment2 27.568 1.881 14.65
## breakfactor(bp_results, breaks = 3)segment3 41.302 1.554 26.58
## breakfactor(bp_results, breaks = 3)segment4 64.476 1.616 39.89
## Pr(>|t|)
## (Intercept) <2e-16 ***
## breakfactor(bp_results, breaks = 3)segment2 <2e-16 ***
## breakfactor(bp_results, breaks = 3)segment3 <2e-16 ***
## breakfactor(bp_results, breaks = 3)segment4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.725 on 345 degrees of freedom
## Multiple R-squared: 0.8304, Adjusted R-squared: 0.8289
## F-statistic: 563 on 3 and 345 DF, p-value: < 2.2e-16
residuals_calcario <- (residuals(lm_break))
adf.test(residuals_calcario)
## Warning in adf.test(residuals_calcario): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: residuals_calcario
## Dickey-Fuller = -4.8386, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
bp_results <- breakpoints(ts_diesel ~ 1)
lm_break <- lm(ts_diesel ~ breakfactor(bp_results, breaks = 3))
summary(lm_break)
##
## Call:
## lm(formula = ts_diesel ~ breakfactor(bp_results, breaks = 3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -71.190 -9.617 -0.494 9.720 94.383
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 272.042 3.006 90.50
## breakfactor(bp_results, breaks = 3)segment2 -137.982 4.039 -34.16
## breakfactor(bp_results, breaks = 3)segment3 -161.747 4.190 -38.60
## breakfactor(bp_results, breaks = 3)segment4 -108.752 4.363 -24.93
## Pr(>|t|)
## (Intercept) <2e-16 ***
## breakfactor(bp_results, breaks = 3)segment2 <2e-16 ***
## breakfactor(bp_results, breaks = 3)segment3 <2e-16 ***
## breakfactor(bp_results, breaks = 3)segment4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.38 on 345 degrees of freedom
## Multiple R-squared: 0.8352, Adjusted R-squared: 0.8338
## F-statistic: 583 on 3 and 345 DF, p-value: < 2.2e-16
residuals_diesel <- (residuals(lm_break))
adf.test(residuals_diesel)
## Warning in adf.test(residuals_diesel): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: residuals_diesel
## Dickey-Fuller = -4.1921, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
bp_results <- breakpoints(ts_combustiveis ~ 1)
lm_break <- lm(ts_combustiveis ~ breakfactor(bp_results, breaks = 3))
summary(lm_break)
##
## Call:
## lm(formula = ts_combustiveis ~ breakfactor(bp_results, breaks = 3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -68.56 -15.77 -5.78 14.46 81.01
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 186.831 2.433 76.79
## breakfactor(bp_results, breaks = 3)segment2 -63.825 3.207 -19.90
## breakfactor(bp_results, breaks = 3)segment3 60.554 4.214 14.37
## breakfactor(bp_results, breaks = 3)segment4 132.691 4.214 31.49
## Pr(>|t|)
## (Intercept) <2e-16 ***
## breakfactor(bp_results, breaks = 3)segment2 <2e-16 ***
## breakfactor(bp_results, breaks = 3)segment3 <2e-16 ***
## breakfactor(bp_results, breaks = 3)segment4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.81 on 345 degrees of freedom
## Multiple R-squared: 0.8876, Adjusted R-squared: 0.8866
## F-statistic: 908.2 on 3 and 345 DF, p-value: < 2.2e-16
residuals_combustiveis <- (residuals(lm_break))
adf.test(residuals_combustiveis)
## Warning in adf.test(residuals_combustiveis): p-value smaller than printed
## p-value
##
## Augmented Dickey-Fuller Test
##
## data: residuals_combustiveis
## Dickey-Fuller = -4.277, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
Podemos notar que todos os resíduos são estacionários, seguir esta metodologia pode ser então um bom caminho.