path <- "d:/Users/thomas.daher/Desktop/cimento/dados_parte2/"
# cimento
cimento <- read_excel(paste(path, "Obras hidreletricas.xlsx", sep = ""),sheet = "cimento_51") %>%
dplyr::select(data, cimento_51_barra)
#calcário
calcario <- read_excel(paste(path, "calcario.xlsx", sep = "")) %>% dplyr::select(data,calcario)
# diesel
diesel <- read_excel(paste(path, "Obras hidreletricas.xlsx", sep = ""), sheet = "Oleo diesel") %>% dplyr::select(data,diesel)
#coque de petroleo
#coque_antigo <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/coque de petroleo.xlsx", sheet = "dados antigos")
#coque_antigo <- coque_antigo %>% group_by(data) %>% summarise(valor = sum(valor), kg = sum(kg)) %>%mutate(valor_por_kg = valor / kg)
#coque_novo <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/coque de petroleo.xlsx", sheet = "dados novos")
#coque_novo <- coque_novo %>% group_by(data) %>% summarise(valor = sum(`Valor FOB (US$)`), kg = sum(`Quilograma Líquido`)) %>% mutate(valor_por_kg = valor / kg)
#coque <- rbind(coque_antigo,coque_novo)
#rm(coque_antigo,coque_novo)
#producao cimento
prod_cim <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/prod_cimento.xlsx")
prod_cim <- prod_cim %>%
pivot_longer(cols = -ano, names_to = "mes", values_to = "prod_cim") %>%
mutate(data = as.Date(paste(ano, mes, "01", sep = "-"), format = "%Y-%m-%d")) %>% dplyr::select(data, prod_cim)
# serie lubrificantes e combustiveis
combustiveis <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/combustiveis.xlsx") %>% dplyr::select(data, combustiveis)
IPCA <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/ipca.xlsx") %>% dplyr::select(data, IPCA_100)
## Criando dataframes e deflacionando
# Criando o data.frame
df <- merge(x = merge(x = merge(x = cimento, y = diesel, by = "data", all.x = TRUE), y = calcario, by = "data", all.x = TRUE), y = combustiveis, by = "data", all.x = TRUE)
df <- df %>%
inner_join(IPCA, by = "data")
# Defina uma função para deflação
deflacionar_serie <- function(valor_nominal, ipca) {
return((valor_nominal / ipca)*100)
}
# Aplique a função de deflação para cada série no DataFrame
df <- df %>%
mutate(
cimento_deflacionado = deflacionar_serie(cimento_51_barra, IPCA_100),
diesel_deflacionado = deflacionar_serie(diesel, IPCA_100),
calcario_deflacionado = deflacionar_serie(calcario, IPCA_100),
combustiveis_deflacionado = deflacionar_serie(combustiveis, IPCA_100)
)
df <- df %>% dplyr::select(data, cimento_deflacionado, diesel_deflacionado, calcario_deflacionado, combustiveis_deflacionado, IPCA_100)
#Teste de Raiz unitária
ts_cimento <- ts(df$cimento_deflacionado, frequency = 12)
ts_calcario <- ts(df$calcario_deflacionado, frequency = 12)
ts_diesel <- ts(df$diesel_deflacionado, frequency = 12)
ts_combustivel <- ts(df$combustiveis_deflacionado, frequency = 12)
ts_series <- ts(cbind(ts_cimento,ts_calcario,ts_diesel,ts_combustivel), start = c(1980, 01), frequency = 12)
### plotando series em nivel
ggplot(df) +
geom_line(aes(x = data, y = cimento_deflacionado, color = "cimento"), linetype = "solid", size = 1) +
geom_line(aes(x = data, y = calcario_deflacionado, color = "calcario"), linetype = "solid", size = 1) +
geom_line(aes(x = data, y = diesel_deflacionado, color = "diesel"), linetype = "solid", size = 1) +
geom_line(aes(x = data, y = combustiveis_deflacionado, color = "combustiveis"), linetype = "solid", size = 1) +
labs(title = "Séries deflacionadas de cimento, calcario, diesel e combustiveis",
x = "data",
y = "serie",
color = "Séries em nível") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#cimento
adf.test(ts_cimento)
##
## Augmented Dickey-Fuller Test
##
## data: ts_cimento
## Dickey-Fuller = -2.819, Lag order = 7, p-value = 0.2312
## alternative hypothesis: stationary
# diff_cimento
adf.test(diff(ts_cimento))
## Warning in adf.test(diff(ts_cimento)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(ts_cimento)
## Dickey-Fuller = -6.4094, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#calcario
adf.test(ts_calcario)
##
## Augmented Dickey-Fuller Test
##
## data: ts_calcario
## Dickey-Fuller = -3.6511, Lag order = 7, p-value = 0.02836
## alternative hypothesis: stationary
# diff_calcario
adf.test(diff(ts_calcario))
## Warning in adf.test(diff(ts_calcario)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(ts_calcario)
## Dickey-Fuller = -8.6684, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#diesel
adf.test(ts_diesel)
##
## Augmented Dickey-Fuller Test
##
## data: ts_diesel
## Dickey-Fuller = -1.3595, Lag order = 7, p-value = 0.847
## alternative hypothesis: stationary
# diff_diesel
adf.test(diff(ts_diesel))
## Warning in adf.test(diff(ts_diesel)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(ts_diesel)
## Dickey-Fuller = -6.5866, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#combustiveis
adf.test(ts_combustivel)
##
## Augmented Dickey-Fuller Test
##
## data: ts_combustivel
## Dickey-Fuller = -1.1255, Lag order = 7, p-value = 0.9175
## alternative hypothesis: stationary
# diff_combustiveis
adf.test(diff(ts_combustivel))
## Warning in adf.test(diff(ts_combustivel)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(ts_combustivel)
## Dickey-Fuller = -6.4255, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
Podemos notar que as séries apresentam raiz unitária mesmo após estarem deflacionadas, ao calcular a primeira diferença todas se apresentam estacionárias. estretanto não significa que o problema seja apenas não estarem em primeira diferença, mas pode ser sazonalidade ou uma tendência.
#Cimento
# Plot the time series
autoplot(ts_cimento) + labs(title = "Cimento")
# Decompose the time series
decomposition <- decompose(ts_cimento)
autoplot(decomposition) + labs(title = "Decomposition cimento")
#Calcario
# Plot the time series
autoplot(ts_calcario) + labs(title = "Calcario")
# Decompose the time series
decomposition <- decompose(ts_calcario)
autoplot(decomposition) + labs(title = "Decomposition calcario")
#Diesel
# Plot the time series
autoplot(ts_diesel) + labs(title = "Diesel")
# Decompose the time series
decomposition <- decompose(ts_diesel)
autoplot(decomposition) + labs(title = "Decomposition diesel")
#Combustiveis
# Plot the time series
autoplot(ts_combustivel) + labs(title = "Combustiveis")
# Decompose the time series
decomposition <- decompose(ts_combustivel)
autoplot(decomposition) + labs(title = "Decomposition calcario")
A decomposição das séries mostra indicios de temdência e sazonalidade para todas, vamos então utilizar o comando auto.sarima para ver o que ele decide com modelo para explicar as séries.
### Arima
library(forecast)
# cimento
cimento_arima <- auto.arima(ts_cimento, seasonal = TRUE)
summary(cimento_arima)
## Series: ts_cimento
## ARIMA(3,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ma1
## -0.2886 0.0396 -0.3787 0.4370
## s.e. 0.0943 0.0520 0.0512 0.0969
##
## sigma^2 = 218: log likelihood = -1429.01
## AIC=2868.02 AICc=2868.19 BIC=2887.28
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.5641433 14.66025 5.699562 1.876182 5.134145 0.1892661
## ACF1
## Training set -0.009962158
adf_test <- ur.df(residuals(cimento_arima), type = "trend")
summary(adf_test)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -127.614 -1.500 -0.204 2.290 78.958
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.502000 1.598528 0.940 0.348
## z.lag.1 -1.047707 0.076732 -13.654 <2e-16 ***
## tt -0.005187 0.007918 -0.655 0.513
## z.diff.lag 0.036187 0.053959 0.671 0.503
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.76 on 343 degrees of freedom
## Multiple R-squared: 0.5062, Adjusted R-squared: 0.5019
## F-statistic: 117.2 on 3 and 343 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -13.6541 62.1449 93.2173
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2 6.15 4.71 4.05
## phi3 8.34 6.30 5.36
# calcario
calcario_arima <- auto.arima(ts_calcario, seasonal = TRUE)
summary(calcario_arima)
## Series: ts_calcario
## ARIMA(2,1,3)
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3
## -1.1610 -0.6201 1.0001 0.3863 -0.3790
## s.e. 0.0628 0.0727 0.0728 0.1024 0.0624
##
## sigma^2 = 30.14: log likelihood = -1084.89
## AIC=2181.78 AICc=2182.02 BIC=2204.89
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3527988 5.442902 2.750066 0.2025024 2.778048 0.288509
## ACF1
## Training set -0.006032054
adf_test <- ur.df(residuals(calcario_arima), type = "trend")
summary(adf_test)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -48.863 -1.500 0.025 1.362 28.810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.607387 0.593237 1.024 0.307
## z.lag.1 -0.979399 0.076572 -12.791 <2e-16 ***
## tt -0.001456 0.002935 -0.496 0.620
## z.diff.lag -0.027218 0.053964 -0.504 0.614
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.473 on 343 degrees of freedom
## Multiple R-squared: 0.5039, Adjusted R-squared: 0.4996
## F-statistic: 116.1 on 3 and 343 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -12.7906 54.534 81.8006
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2 6.15 4.71 4.05
## phi3 8.34 6.30 5.36
# diesel
diesel_arima <- auto.arima(ts_diesel, seasonal = TRUE)
summary(diesel_arima)
## Series: ts_diesel
## ARIMA(2,1,1)
##
## Coefficients:
## ar1 ar2 ma1
## 0.2319 0.1662 -0.6292
## s.e. 0.3783 0.1669 0.3687
##
## sigma^2 = 259.7: log likelihood = -1459.74
## AIC=2927.48 AICc=2927.6 BIC=2942.89
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.4599975 16.02332 8.97183 -0.8323906 5.121249 0.3613955
## ACF1
## Training set 0.003041679
adf_test <- ur.df(residuals(diesel_arima), type = "trend")
summary(adf_test)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -98.573 -3.644 -0.805 3.015 72.700
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.965728 1.748473 -1.124 0.262
## z.lag.1 -1.004535 0.076297 -13.166 <2e-16 ***
## tt 0.008835 0.008664 1.020 0.309
## z.diff.lag 0.004427 0.053936 0.082 0.935
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.11 on 343 degrees of freedom
## Multiple R-squared: 0.5006, Adjusted R-squared: 0.4962
## F-statistic: 114.6 on 3 and 343 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -13.1662 57.786 86.6777
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2 6.15 4.71 4.05
## phi3 8.34 6.30 5.36
# combustivel
combustivel_arima <- auto.arima(ts_combustivel, seasonal = TRUE)
summary(combustivel_arima)
## Series: ts_combustivel
## ARIMA(1,1,1)(0,0,2)[12]
##
## Coefficients:
## ar1 ma1 sma1 sma2
## -0.8488 0.6913 -0.0885 -0.1182
## s.e. 0.0996 0.1388 0.0579 0.0531
##
## sigma^2 = 112: log likelihood = -1313.11
## AIC=2636.21 AICc=2636.39 BIC=2655.47
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.6714353 10.50734 6.685125 -0.01327149 4.022656 0.2634335
## ACF1
## Training set -0.03508005
adf_test <- ur.df(residuals(combustivel_arima), type = "trend")
summary(adf_test)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.161 -3.845 -1.007 3.658 33.189
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.884375 1.138901 -0.777 0.438
## z.lag.1 -1.090759 0.077976 -13.988 <2e-16 ***
## tt 0.009322 0.005682 1.641 0.102
## z.diff.lag 0.046439 0.053997 0.860 0.390
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.52 on 343 degrees of freedom
## Multiple R-squared: 0.522, Adjusted R-squared: 0.5179
## F-statistic: 124.9 on 3 and 343 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -13.9884 65.2286 97.8428
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2 6.15 4.71 4.05
## phi3 8.34 6.30 5.36
Todos aplicaram primeira diferença o que não responde bem minha pergunta, pois o auto.arima tenta modelar as vezes acima do mais parcimonioso, portanto vou eu testar alguns modelos.
ts_list <- list(ts_cimento, ts_calcario, ts_diesel, ts_combustivel)
for (i in seq_along(ts_list)) {
ts_name <- c("cimento", "calcario", "diesel", "combustivel")[i]
acf(ts_list[[i]], lag.max = 48, main = paste("ACF for", ts_name))
Pacf(ts_list[[i]], lag.max = 48, main = paste("PACF for", ts_name))
}
# Function to fit and compare ARIMA(1,0,0) and SARIMA(0,0,0)(1,0,0)
fit_and_compare <- function(ts, name) {
# ARMA(1,0,0)
arima_model <- Arima(ts, order = c(1, 0, 0))
aic_arima <- AIC(arima_model)
adf_test_arima <- adf.test(residuals(arima_model))
# SARMA(0,0,0)(1,0,0)
sarima_model <- Arima(ts, order = c(0, 0, 0), seasonal = list(order = c(1, 0, 0)))
aic_sarima <- AIC(sarima_model)
cat("Results for", name, "\n")
cat("ARIMA(1,0,0) AIC:", aic_arima, "\n")
cat("ADF Test for ARIMA(1,0,0) p-value:", adf_test_arima$p.value, "\n")
cat("SARIMA(0,0,0)(1,0,0) AIC:", aic_sarima, "\n\n")
return(c("ARIMA(1,0,0)" = aic_arima, "SARIMA(0,0,0)(1,0,0)" = aic_sarima))
}
# Fit and compare for each time series
results_cimento <- fit_and_compare(ts_cimento, "Cimento")
## Warning in adf.test(residuals(arima_model)): p-value smaller than printed
## p-value
## Results for Cimento
## ARIMA(1,0,0) AIC: 2937.178
## ADF Test for ARIMA(1,0,0) p-value: 0.01
## SARIMA(0,0,0)(1,0,0) AIC: 3679.743
results_calcario <- fit_and_compare(ts_calcario, "Calcario")
## Warning in adf.test(residuals(arima_model)): p-value smaller than printed
## p-value
## Results for Calcario
## ARIMA(1,0,0) AIC: 2253.616
## ADF Test for ARIMA(1,0,0) p-value: 0.01
## SARIMA(0,0,0)(1,0,0) AIC: 2812.553
results_diesel <- fit_and_compare(ts_diesel, "Diesel")
## Warning in adf.test(residuals(arima_model)): p-value smaller than printed
## p-value
## Results for Diesel
## ARIMA(1,0,0) AIC: 2984.373
## ADF Test for ARIMA(1,0,0) p-value: 0.01
## SARIMA(0,0,0)(1,0,0) AIC: 3502.832
results_combustivel <- fit_and_compare(ts_combustivel, "Combustivel")
## Warning in adf.test(residuals(arima_model)): p-value smaller than printed
## p-value
## Results for Combustivel
## ARIMA(1,0,0) AIC: 2676.365
## ADF Test for ARIMA(1,0,0) p-value: 0.01
## SARIMA(0,0,0)(1,0,0) AIC: 3439.423
# Compare AIC values
aic_values <- data.frame(
"Cimento" = results_cimento,
"Calcario" = results_calcario,
"Diesel" = results_diesel,
"Combustivel" = results_combustivel
)
cat("AIC Values:\n")
## AIC Values:
print(aic_values)
## Cimento Calcario Diesel Combustivel
## ARIMA(1,0,0) 2937.178 2253.616 2984.373 2676.365
## SARIMA(0,0,0)(1,0,0) 3679.743 2812.553 3502.832 3439.423
# Choose the best model for each time series
best_models <- apply(aic_values, 2, function(col) names(col)[which.min(col)])
cat("\nBest Models:\n")
##
## Best Models:
print(best_models)
## Cimento Calcario Diesel Combustivel
## "ARIMA(1,0,0)" "ARIMA(1,0,0)" "ARIMA(1,0,0)" "ARIMA(1,0,0)"
Ao aplicar o AR(1) e o MA(1) em todos, ambos ficam estacionários, entretanto o AR(1) apresenta AIC mais baixos para todas as séries, desta formatenho um fit bem parcimonioso e estacionário, vou utilizar ele para analisar as quebras estruturais e cointegração.
library(strucchange)
# Function to perform breakpoint analysis for each univariate time series
perform_breakpoint_analysis <- function(ts) {
mts_object <- ts(ts, start = c(1980, 01), frequency = 12)
bp_results <- breakpoints(mts_object ~ 1)
print(paste("Resumo de quebra da serie", deparse(substitute(ts)), sep = " "))
summary(bp_results)
}
#definindo os dados
ts_cimento <- df[-1, ]$cimento_deflacionado
ts_calcario <- df[-1, ]$calcario_deflacionado
ts_diesel <- df[-1, ]$diesel_deflacionado
ts_combustiveis <- df[-1, ]$combustiveis_deflacionado
# Perform breakpoint analysis for each univariate time series
perform_breakpoint_analysis(ts_cimento)
## [1] "Resumo de quebra da serie ts_cimento"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 77
## m = 2 77 129
## m = 3 76 128 239
## m = 4 76 128 242 296
## m = 5 76 128 186 240 296
##
## Corresponding to breakdates:
##
## m = 1 1986(5)
## m = 2 1986(5) 1990(9)
## m = 3 1986(4) 1990(8) 1999(11)
## m = 4 1986(4) 1990(8) 2000(2) 2004(8)
## m = 5 1986(4) 1990(8) 1995(6) 1999(12) 2004(8)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 1885631 671909 560509 471112 435600 422565
## BIC 3991 3644 3592 3544 3528 3529
perform_breakpoint_analysis(ts_calcario)
## [1] "Resumo de quebra da serie ts_calcario"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 86
## m = 2 86 242
## m = 3 86 145 250
## m = 4 54 106 158 250
## m = 5 54 106 158 244 296
##
## Corresponding to breakdates:
##
## m = 1 1987(2)
## m = 2 1987(2) 2000(2)
## m = 3 1987(2) 1992(1) 2000(10)
## m = 4 1984(6) 1988(10) 1993(2) 2000(10)
## m = 5 1984(6) 1988(10) 1993(2) 2000(4) 2004(8)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 189415 67416 33478 32362 31671 31691
## BIC 3192 2844 2612 2612 2616 2628
perform_breakpoint_analysis(ts_diesel)
## [1] "Resumo de quebra da serie ts_diesel"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 82
## m = 2 82 273
## m = 3 82 185 273
## m = 4 82 134 186 273
## m = 5 82 134 186 244 296
##
## Corresponding to breakdates:
##
## m = 1 1986(10)
## m = 2 1986(10) 2002(9)
## m = 3 1986(10) 1995(5) 2002(9)
## m = 4 1986(10) 1991(2) 1995(6) 2002(9)
## m = 5 1986(10) 1991(2) 1995(6) 2000(4) 2004(8)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 1558055 372438 285497 258697 257222 271676
## BIC 3925 3439 3358 3335 3345 3376
perform_breakpoint_analysis(ts_combustiveis)
## [1] "Resumo de quebra da serie ts_combustiveis"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 268
## m = 2 103 248
## m = 3 103 244 296
## m = 4 103 159 244 296
## m = 5 55 107 159 244 296
##
## Corresponding to breakdates:
##
## m = 1 2002(4)
## m = 2 1988(7) 2000(8)
## m = 3 1988(7) 2000(4) 2004(8)
## m = 4 1988(7) 1993(3) 2000(4) 2004(8)
## m = 5 1984(7) 1988(11) 1993(3) 2000(4) 2004(8)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 1889240 513093 327817 211926 209992 216193
## BIC 3992 3550 3406 3266 3274 3296
Aplico a quebra estrutural nas séries em nível para ver on que irá diferenciar das séries fitadas.
# Function to perform breakpoint analysis for each univariate time series
perform_breakpoint_analysis <- function(ts) {
mts_object <- ts(ts, start = c(1980, 01), frequency = 12)
bp_results <- breakpoints(mts_object ~ 1)
print(paste("Resumo de quebra da serie", deparse(substitute(ts)), sep = " "))
summary(bp_results)
}
# Assuming 'df' is your data frame
ts_cimento <- df[-1, ]$cimento_deflacionado
ts_calcario <- df[-1, ]$calcario_deflacionado
ts_diesel <- df[-1, ]$diesel_deflacionado
ts_combustiveis <- df[-1, ]$combustiveis_deflacionado
# Perform breakpoint analysis for each univariate time series
perform_breakpoint_analysis(residuals(cimento_arima))
## [1] "Resumo de quebra da serie residuals(cimento_arima)"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 108
## m = 2 56 108
## m = 3 56 108 198
## m = 4 56 108 200 279
## m = 5 56 108 160 212 279
##
## Corresponding to breakdates:
##
## m = 1 1988(12)
## m = 2 1984(8) 1988(12)
## m = 3 1984(8) 1988(12) 1996(6)
## m = 4 1984(8) 1988(12) 1996(8) 2003(3)
## m = 5 1984(8) 1988(12) 1993(4) 1997(8) 2003(3)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 74897 73596 72590 71983 71772 71715
## BIC 2876 2881 2888 2897 2908 2919
perform_breakpoint_analysis(residuals(calcario_arima))
## [1] "Resumo de quebra da serie residuals(calcario_arima)"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 108
## m = 2 54 108
## m = 3 54 108 199
## m = 4 54 108 199 278
## m = 5 54 108 160 212 278
##
## Corresponding to breakdates:
##
## m = 1 1988(12)
## m = 2 1984(6) 1988(12)
## m = 3 1984(6) 1988(12) 1996(7)
## m = 4 1984(6) 1988(12) 1996(7) 2003(2)
## m = 5 1984(6) 1988(12) 1993(4) 1997(8) 2003(2)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 10296 10226 10172 10131 10110 10110
## BIC 2183 2193 2202 2213 2224 2235
perform_breakpoint_analysis(residuals(diesel_arima))
## [1] "Resumo de quebra da serie residuals(diesel_arima)"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 122
## m = 2 65 122
## m = 3 65 122 177
## m = 4 65 122 177 246
## m = 5 65 122 176 228 280
##
## Corresponding to breakdates:
##
## m = 1 1990(2)
## m = 2 1985(5) 1990(2)
## m = 3 1985(5) 1990(2) 1994(9)
## m = 4 1985(5) 1990(2) 1994(9) 2000(6)
## m = 5 1985(5) 1990(2) 1994(8) 1998(12) 2003(4)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 89531 88639 86128 85996 85793 85732
## BIC 2938 2946 2948 2959 2970 2982
perform_breakpoint_analysis(residuals(combustivel_arima))
## [1] "Resumo de quebra da serie residuals(combustivel_arima)"
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 228
## m = 2 228 280
## m = 3 64 228 280
## m = 4 64 130 228 280
## m = 5 64 116 169 228 280
##
## Corresponding to breakdates:
##
## m = 1 1998(12)
## m = 2 1998(12) 2003(4)
## m = 3 1985(4) 1998(12) 2003(4)
## m = 4 1985(4) 1990(10) 1998(12) 2003(4)
## m = 5 1985(4) 1989(8) 1994(1) 1998(12) 2003(4)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 38374 37722 37161 36873 36580 36537
## BIC 2642 2648 2655 2664 2673 2684
Analisando pelo menor BIC todas as séries apontam que não há quebra estrutural.
# Fit VAR model with lag = 1 (ARMA(1,0,0))
var_model <- VAR(ts_series, p = 1, type = "none")
# Summary of the VAR model
summary(var_model)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: ts_cimento, ts_calcario, ts_diesel, ts_combustivel
## Deterministic variables: none
## Sample size: 348
## Log Likelihood: -5135.401
## Roots of the characteristic polynomial:
## 1.002 0.9872 0.9627 0.9627
## Call:
## VAR(y = ts_series, p = 1, type = "none")
##
##
## Estimation results for equation ts_cimento:
## ===========================================
## ts_cimento = ts_cimento.l1 + ts_calcario.l1 + ts_diesel.l1 + ts_combustivel.l1
##
## Estimate Std. Error t value Pr(>|t|)
## ts_cimento.l1 0.981816 0.021284 46.130 <2e-16 ***
## ts_calcario.l1 0.008530 0.047280 0.180 0.857
## ts_diesel.l1 0.005920 0.013336 0.444 0.657
## ts_combustivel.l1 0.003198 0.015769 0.203 0.839
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 16.16 on 344 degrees of freedom
## Multiple R-Squared: 0.9886, Adjusted R-squared: 0.9884
## F-statistic: 7438 on 4 and 344 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation ts_calcario:
## ============================================
## ts_calcario = ts_cimento.l1 + ts_calcario.l1 + ts_diesel.l1 + ts_combustivel.l1
##
## Estimate Std. Error t value Pr(>|t|)
## ts_cimento.l1 0.0036507 0.0080199 0.455 0.649
## ts_calcario.l1 0.9864631 0.0178155 55.371 <2e-16 ***
## ts_diesel.l1 0.0008055 0.0050250 0.160 0.873
## ts_combustivel.l1 0.0047786 0.0059418 0.804 0.422
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 6.089 on 344 degrees of freedom
## Multiple R-Squared: 0.997, Adjusted R-squared: 0.997
## F-statistic: 2.854e+04 on 4 and 344 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation ts_diesel:
## ==========================================
## ts_diesel = ts_cimento.l1 + ts_calcario.l1 + ts_diesel.l1 + ts_combustivel.l1
##
## Estimate Std. Error t value Pr(>|t|)
## ts_cimento.l1 -0.07597 0.02237 -3.397 0.000762 ***
## ts_calcario.l1 0.14239 0.04968 2.866 0.004416 **
## ts_diesel.l1 0.94427 0.01401 67.381 < 2e-16 ***
## ts_combustivel.l1 0.01729 0.01657 1.043 0.297549
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 16.98 on 344 degrees of freedom
## Multiple R-Squared: 0.9912, Adjusted R-squared: 0.9911
## F-statistic: 9662 on 4 and 344 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation ts_combustivel:
## ===============================================
## ts_combustivel = ts_cimento.l1 + ts_calcario.l1 + ts_diesel.l1 + ts_combustivel.l1
##
## Estimate Std. Error t value Pr(>|t|)
## ts_cimento.l1 -0.022575 0.014546 -1.552 0.1216
## ts_calcario.l1 0.053819 0.032313 1.666 0.0967 .
## ts_diesel.l1 -0.017545 0.009114 -1.925 0.0550 .
## ts_combustivel.l1 1.001963 0.010777 92.973 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 11.04 on 344 degrees of freedom
## Multiple R-Squared: 0.9971, Adjusted R-squared: 0.9971
## F-statistic: 2.951e+04 on 4 and 344 DF, p-value: < 2.2e-16
##
##
##
## Covariance matrix of residuals:
## ts_cimento ts_calcario ts_diesel ts_combustivel
## ts_cimento 261.03 71.93 91.94 78.44
## ts_calcario 71.93 37.04 40.01 38.20
## ts_diesel 91.94 40.01 288.30 87.39
## ts_combustivel 78.44 38.20 87.39 121.94
##
## Correlation matrix of residuals:
## ts_cimento ts_calcario ts_diesel ts_combustivel
## ts_cimento 1.0000 0.7315 0.3351 0.4397
## ts_calcario 0.7315 1.0000 0.3872 0.5683
## ts_diesel 0.3351 0.3872 1.0000 0.4661
## ts_combustivel 0.4397 0.5683 0.4661 1.0000
# Get residuals from the VAR model
var_residuals <- residuals(var_model)
# Perform structural change analysis on residuals
bp_results <- breakpoints(var_residuals ~ 1)
summary(bp_results)
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = var_residuals ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 107
## m = 2 55 107
## m = 3 55 107 165
## m = 4 55 107 197 277
## m = 5 55 107 159 214 277
##
## Corresponding to breakdates:
##
## m = 1 0.307471264367816
## m = 2 0.158045977011494 0.307471264367816
## m = 3 0.158045977011494 0.307471264367816 0.474137931034483
## m = 4 0.158045977011494 0.307471264367816 0.566091954022989
## m = 5 0.158045977011494 0.307471264367816 0.456896551724138 0.614942528735632
##
## m = 1
## m = 2
## m = 3
## m = 4 0.795977011494253
## m = 5 0.795977011494253
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 89794 89359 87333 86822 86574 86475
## BIC 2932 2942 2945 2955 2966 2977
# Plot dos resíduos
df_residuals <- as.data.frame(var_residuals)
df_residuals$date <- seq(as.Date("1980-01-01"), by = "month", length.out = nrow(df_residuals))
ggplot(df_residuals, aes(x = date)) +
geom_line(aes(y = ts_cimento), color = "blue", size = 0.5) +
geom_line(aes(y = ts_calcario), color = "red", size = 0.5) +
geom_line(aes(y = ts_diesel), color = "green", size = 0.5) +
geom_line(aes(y = ts_combustivel), color = "purple", size = 0.5) +
labs(title = "Residuos do VAR",
x = "Data",
y = "Valores") +
scale_color_manual(values = c("blue", "red", "green", "purple"))
Analisando a partir de um VAR(1,0,0) e escolhendo pelo BIC não há tembaem quebra estrututral quando olhando para as séries fitadas todas juntas. Entretanto o plot parece demonstrar que são cointegradas.
#series deflacionadas em nivel
K <- VARselect(ts_series, lag.max = 12, type = "both") # optimal lag selected by AIC
summary(K)
## Length Class Mode
## selection 4 -none- numeric
## criteria 48 -none- numeric
K <- K$selection[1]
K
## AIC(n)
## 12
test=ca.jo(ts_series, type="trace", K=K, ecdet= "none", spec= "longrun")
summary(test)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: trace statistic , with linear trend
##
## Eigenvalues (lambda):
## [1] 8.474321e-02 2.049241e-02 1.543370e-02 1.566389e-05
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 3 | 0.01 6.50 8.18 11.65
## r <= 2 | 5.25 15.66 17.95 23.52
## r <= 1 | 12.22 28.71 31.52 37.22
## r = 0 | 42.07 45.23 48.28 55.43
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.l12 1.00000 1.0000000 1.0000000
## ts_calcario.l12 -49.33614 -2.8536094 -2.2822919
## ts_diesel.l12 -13.39716 0.4097156 -0.6908061
## ts_combustivel.l12 10.15470 0.1983826 0.2614674
## ts_combustivel.l12
## ts_cimento.l12 1.0000000
## ts_calcario.l12 0.3767496
## ts_diesel.l12 4.8183652
## ts_combustivel.l12 -18.9689119
##
## Weights W:
## (This is the loading matrix)
##
## ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.d 0.006302643 -0.002650518 -0.009403368
## ts_calcario.d 0.003296388 0.002307099 0.001231048
## ts_diesel.d 0.008337551 -0.020193954 -0.004677955
## ts_combustivel.d 0.003672845 -0.029432178 0.004441159
## ts_combustivel.l12
## ts_cimento.d 2.245526e-05
## ts_calcario.d 7.918305e-06
## ts_diesel.d -1.704012e-05
## ts_combustivel.d 1.069891e-05
#series fitadas
K <- VARselect(var_residuals, lag.max = 12, type = "none")
K <- K$selection[1]
K
## AIC(n)
## 12
test=ca.jo(var_residuals, type="trace", K=K, ecdet= "none", spec= "longrun")
summary(test)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: trace statistic , with linear trend
##
## Eigenvalues (lambda):
## [1] 0.14079156 0.11320243 0.07794479 0.04295246
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 3 | 14.75 6.50 8.18 11.65
## r <= 2 | 42.02 15.66 17.95 23.52
## r <= 1 | 82.38 28.71 31.52 37.22
## r = 0 | 133.37 45.23 48.28 55.43
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.l12 1.00000000 1.0000000 1.00000000
## ts_calcario.l12 -2.29790808 -6.2125268 6.73280022
## ts_diesel.l12 0.01585749 0.8375065 -0.03327864
## ts_combustivel.l12 0.79375837 -2.1485414 -2.75302489
## ts_combustivel.l12
## ts_cimento.l12 1.000000
## ts_calcario.l12 -5.191413
## ts_diesel.l12 -5.160946
## ts_combustivel.l12 2.276985
##
## Weights W:
## (This is the loading matrix)
##
## ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.d -0.3892732 0.2616798 -0.16028197
## ts_calcario.d 0.1329149 0.1271011 -0.03771426
## ts_diesel.d -0.2028894 -0.2298026 0.08884892
## ts_combustivel.d -0.2594548 0.2471513 0.16675939
## ts_combustivel.l12
## ts_cimento.d 0.09885113
## ts_calcario.d 0.04552410
## ts_diesel.d 0.14198547
## ts_combustivel.d 0.06743987
Escolhendo o lag pelo comando VARselect e obtendo um lag de 12, faço a cointegração pelo método proposto por Johansen, o resultado é de que as variáveis não cointegram quando em nivel. O que torna uma regressão entre elas possivelmente espúria ja que não apresentam relação de longo prazo.Para as fitadas no VAR(1) apresentam 4 vetores de cointegração, embora um resultado oposto, também não é muito conclusivo.
# Escolha a ordem do modelo VAR usando VARselect
var_select <- VARselect(ts_series, lag.max = 12, type = "both")
lag_order_aic <- var_select$selection[1]
# Ajuste o modelo VAR com a ordem escolhida
var_model <- VAR(ts_series, p = lag_order_aic, type = "both")
# Extraia os resíduos do modelo VAR
residuos <- residuals(var_model)
# Realize o Teste de Johansen nos resíduos
johansen_test <- ca.jo(residuos, type = "trace", K = lag_order_aic)
# Imprima os resultados
summary(johansen_test)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: trace statistic , with linear trend
##
## Eigenvalues (lambda):
## [1] 0.13293646 0.09104065 0.06740728 0.05211971
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 3 | 17.40 6.50 8.18 11.65
## r <= 2 | 40.08 15.66 17.95 23.52
## r <= 1 | 71.10 28.71 31.52 37.22
## r = 0 | 117.46 45.23 48.28 55.43
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.l12 1.0000000 1.000000 1.0000000
## ts_calcario.l12 -2.7778644 -1.919730 -0.0819438
## ts_diesel.l12 0.4739209 0.421803 -0.7437453
## ts_combustivel.l12 0.6014599 -1.434339 0.1556454
## ts_combustivel.l12
## ts_cimento.l12 1.000000
## ts_calcario.l12 -6.349379
## ts_diesel.l12 -1.461604
## ts_combustivel.l12 1.488601
##
## Weights W:
## (This is the loading matrix)
##
## ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.d -0.50008795 0.02770996 -0.5640706
## ts_calcario.d 0.03111033 0.11926110 -0.1697195
## ts_diesel.d -0.67273837 0.19009357 0.4089993
## ts_combustivel.d -0.49808199 0.60502349 -0.1286672
## ts_combustivel.l12
## ts_cimento.d 0.2400720
## ts_calcario.d 0.1226344
## ts_diesel.d 0.2663606
## ts_combustivel.d 0.0747433
Aqui permiti que o comando decidisse o VAR, o resultado não foi muito diferente do modelo VAR(1,0,0)