Instalando os pacotes

Importando dados e deflacionando

path <- "d:/Users/thomas.daher/Desktop/cimento/dados_parte2/"

# cimento
cimento <- read_excel(paste(path, "Obras hidreletricas.xlsx", sep = ""),sheet = "cimento_51") %>% 
  dplyr::select(data, cimento_51_barra)


#calcário
calcario <- read_excel(paste(path, "calcario.xlsx", sep = "")) %>% dplyr::select(data,calcario)

# diesel
diesel <- read_excel(paste(path, "Obras hidreletricas.xlsx", sep = ""), sheet = "Oleo diesel") %>% dplyr::select(data,diesel)

#coque de petroleo

#coque_antigo <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/coque de petroleo.xlsx", sheet = "dados antigos")

#coque_antigo <- coque_antigo %>% group_by(data) %>% summarise(valor = sum(valor), kg = sum(kg)) %>%mutate(valor_por_kg = valor / kg)

#coque_novo <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/coque de petroleo.xlsx", sheet = "dados novos")

#coque_novo <- coque_novo %>% group_by(data) %>% summarise(valor = sum(`Valor FOB (US$)`), kg = sum(`Quilograma Líquido`)) %>% mutate(valor_por_kg = valor / kg)

#coque <- rbind(coque_antigo,coque_novo)
#rm(coque_antigo,coque_novo)

#producao cimento

prod_cim <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/prod_cimento.xlsx")

prod_cim <- prod_cim %>%
  pivot_longer(cols = -ano, names_to = "mes", values_to = "prod_cim") %>% 
  mutate(data = as.Date(paste(ano, mes, "01", sep = "-"), format = "%Y-%m-%d")) %>% dplyr::select(data, prod_cim)


# serie lubrificantes e combustiveis

combustiveis <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/combustiveis.xlsx") %>% dplyr::select(data, combustiveis)

IPCA <- read_excel("d:/Users/thomas.daher/Desktop/cimento/dados_parte2/ipca.xlsx") %>%  dplyr::select(data, IPCA_100)

## Criando dataframes e deflacionando

# Criando o data.frame

df <- merge(x = merge(x = merge(x = cimento, y = diesel, by = "data", all.x = TRUE), y = calcario, by = "data", all.x = TRUE), y = combustiveis, by = "data", all.x = TRUE)
df <- df %>%
  inner_join(IPCA, by = "data")

# Defina uma função para deflação
deflacionar_serie <- function(valor_nominal, ipca) {
  return((valor_nominal / ipca)*100)
}

# Aplique a função de deflação para cada série no DataFrame
df <- df %>%
  mutate(
    cimento_deflacionado = deflacionar_serie(cimento_51_barra, IPCA_100),
    diesel_deflacionado = deflacionar_serie(diesel, IPCA_100),
    calcario_deflacionado = deflacionar_serie(calcario, IPCA_100),
    combustiveis_deflacionado = deflacionar_serie(combustiveis, IPCA_100)
  )

df <- df %>% dplyr::select(data, cimento_deflacionado, diesel_deflacionado, calcario_deflacionado, combustiveis_deflacionado, IPCA_100)

Análise de raiz unitária para series em nível e em primeira diferença

#Teste de Raiz unitária

ts_cimento <- ts(df$cimento_deflacionado, frequency = 12)
ts_calcario <- ts(df$calcario_deflacionado, frequency = 12)
ts_diesel <- ts(df$diesel_deflacionado, frequency = 12)
ts_combustivel <- ts(df$combustiveis_deflacionado, frequency = 12)

ts_series <- ts(cbind(ts_cimento,ts_calcario,ts_diesel,ts_combustivel), start = c(1980, 01), frequency = 12)

### plotando series em nivel

ggplot(df) +
  geom_line(aes(x = data, y = cimento_deflacionado, color = "cimento"), linetype = "solid", size = 1) +
  geom_line(aes(x = data, y = calcario_deflacionado, color = "calcario"), linetype = "solid", size = 1) +
  geom_line(aes(x = data, y = diesel_deflacionado, color = "diesel"), linetype = "solid", size = 1) +
  geom_line(aes(x = data, y = combustiveis_deflacionado, color = "combustiveis"), linetype = "solid", size = 1) +
  labs(title = "Séries deflacionadas de cimento, calcario, diesel e combustiveis",
       x = "data",
       y = "serie",
       color = "Séries em nível") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#cimento
adf.test(ts_cimento)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_cimento
## Dickey-Fuller = -2.819, Lag order = 7, p-value = 0.2312
## alternative hypothesis: stationary
# diff_cimento
adf.test(diff(ts_cimento))
## Warning in adf.test(diff(ts_cimento)): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(ts_cimento)
## Dickey-Fuller = -6.4094, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#calcario
adf.test(ts_calcario)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_calcario
## Dickey-Fuller = -3.6511, Lag order = 7, p-value = 0.02836
## alternative hypothesis: stationary
# diff_calcario
adf.test(diff(ts_calcario))
## Warning in adf.test(diff(ts_calcario)): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(ts_calcario)
## Dickey-Fuller = -8.6684, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#diesel
adf.test(ts_diesel)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_diesel
## Dickey-Fuller = -1.3595, Lag order = 7, p-value = 0.847
## alternative hypothesis: stationary
# diff_diesel
adf.test(diff(ts_diesel))
## Warning in adf.test(diff(ts_diesel)): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(ts_diesel)
## Dickey-Fuller = -6.5866, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#combustiveis
adf.test(ts_combustivel)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_combustivel
## Dickey-Fuller = -1.1255, Lag order = 7, p-value = 0.9175
## alternative hypothesis: stationary
# diff_combustiveis
adf.test(diff(ts_combustivel))
## Warning in adf.test(diff(ts_combustivel)): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(ts_combustivel)
## Dickey-Fuller = -6.4255, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary

Podemos notar que as séries apresentam raiz unitária mesmo após estarem deflacionadas, ao calcular a primeira diferença todas se apresentam estacionárias. estretanto não significa que o problema seja apenas não estarem em primeira diferença, mas pode ser sazonalidade ou uma tendência.

Sazonalidade das séries

#Cimento
# Plot the time series
autoplot(ts_cimento) + labs(title = "Cimento")

# Decompose the time series
decomposition <- decompose(ts_cimento)
autoplot(decomposition) + labs(title = "Decomposition cimento")

#Calcario
# Plot the time series
autoplot(ts_calcario) + labs(title = "Calcario")

# Decompose the time series
decomposition <- decompose(ts_calcario)
autoplot(decomposition) + labs(title = "Decomposition calcario")

#Diesel
# Plot the time series
autoplot(ts_diesel) + labs(title = "Diesel")

# Decompose the time series
decomposition <- decompose(ts_diesel)
autoplot(decomposition) + labs(title = "Decomposition diesel")

#Combustiveis
# Plot the time series
autoplot(ts_combustivel) + labs(title = "Combustiveis")

# Decompose the time series
decomposition <- decompose(ts_combustivel)
autoplot(decomposition) + labs(title = "Decomposition calcario")

A decomposição das séries mostra indicios de temdência e sazonalidade para todas, vamos então utilizar o comando auto.sarima para ver o que ele decide com modelo para explicar as séries.

SARIMA

### Arima
library(forecast)
# cimento
cimento_arima <- auto.arima(ts_cimento, seasonal = TRUE)
summary(cimento_arima)
## Series: ts_cimento 
## ARIMA(3,1,1) 
## 
## Coefficients:
##           ar1     ar2      ar3     ma1
##       -0.2886  0.0396  -0.3787  0.4370
## s.e.   0.0943  0.0520   0.0512  0.0969
## 
## sigma^2 = 218:  log likelihood = -1429.01
## AIC=2868.02   AICc=2868.19   BIC=2887.28
## 
## Training set error measures:
##                     ME     RMSE      MAE      MPE     MAPE      MASE
## Training set 0.5641433 14.66025 5.699562 1.876182 5.134145 0.1892661
##                      ACF1
## Training set -0.009962158
adf_test <- ur.df(residuals(cimento_arima), type = "trend")
summary(adf_test)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -127.614   -1.500   -0.204    2.290   78.958 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.502000   1.598528   0.940    0.348    
## z.lag.1     -1.047707   0.076732 -13.654   <2e-16 ***
## tt          -0.005187   0.007918  -0.655    0.513    
## z.diff.lag   0.036187   0.053959   0.671    0.503    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.76 on 343 degrees of freedom
## Multiple R-squared:  0.5062, Adjusted R-squared:  0.5019 
## F-statistic: 117.2 on 3 and 343 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -13.6541 62.1449 93.2173 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2  6.15  4.71  4.05
## phi3  8.34  6.30  5.36
# calcario
calcario_arima <- auto.arima(ts_calcario, seasonal = TRUE)
summary(calcario_arima)
## Series: ts_calcario 
## ARIMA(2,1,3) 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2      ma3
##       -1.1610  -0.6201  1.0001  0.3863  -0.3790
## s.e.   0.0628   0.0727  0.0728  0.1024   0.0624
## 
## sigma^2 = 30.14:  log likelihood = -1084.89
## AIC=2181.78   AICc=2182.02   BIC=2204.89
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE     MASE
## Training set 0.3527988 5.442902 2.750066 0.2025024 2.778048 0.288509
##                      ACF1
## Training set -0.006032054
adf_test <- ur.df(residuals(calcario_arima), type = "trend")
summary(adf_test)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -48.863  -1.500   0.025   1.362  28.810 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.607387   0.593237   1.024    0.307    
## z.lag.1     -0.979399   0.076572 -12.791   <2e-16 ***
## tt          -0.001456   0.002935  -0.496    0.620    
## z.diff.lag  -0.027218   0.053964  -0.504    0.614    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.473 on 343 degrees of freedom
## Multiple R-squared:  0.5039, Adjusted R-squared:  0.4996 
## F-statistic: 116.1 on 3 and 343 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -12.7906 54.534 81.8006 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2  6.15  4.71  4.05
## phi3  8.34  6.30  5.36
# diesel
diesel_arima <-  auto.arima(ts_diesel, seasonal = TRUE)
summary(diesel_arima)
## Series: ts_diesel 
## ARIMA(2,1,1) 
## 
## Coefficients:
##          ar1     ar2      ma1
##       0.2319  0.1662  -0.6292
## s.e.  0.3783  0.1669   0.3687
## 
## sigma^2 = 259.7:  log likelihood = -1459.74
## AIC=2927.48   AICc=2927.6   BIC=2942.89
## 
## Training set error measures:
##                      ME     RMSE     MAE        MPE     MAPE      MASE
## Training set -0.4599975 16.02332 8.97183 -0.8323906 5.121249 0.3613955
##                     ACF1
## Training set 0.003041679
adf_test <- ur.df(residuals(diesel_arima), type = "trend")
summary(adf_test)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -98.573  -3.644  -0.805   3.015  72.700 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.965728   1.748473  -1.124    0.262    
## z.lag.1     -1.004535   0.076297 -13.166   <2e-16 ***
## tt           0.008835   0.008664   1.020    0.309    
## z.diff.lag   0.004427   0.053936   0.082    0.935    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.11 on 343 degrees of freedom
## Multiple R-squared:  0.5006, Adjusted R-squared:  0.4962 
## F-statistic: 114.6 on 3 and 343 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -13.1662 57.786 86.6777 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2  6.15  4.71  4.05
## phi3  8.34  6.30  5.36
# combustivel
combustivel_arima <- auto.arima(ts_combustivel, seasonal = TRUE)
summary(combustivel_arima)
## Series: ts_combustivel 
## ARIMA(1,1,1)(0,0,2)[12] 
## 
## Coefficients:
##           ar1     ma1     sma1     sma2
##       -0.8488  0.6913  -0.0885  -0.1182
## s.e.   0.0996  0.1388   0.0579   0.0531
## 
## sigma^2 = 112:  log likelihood = -1313.11
## AIC=2636.21   AICc=2636.39   BIC=2655.47
## 
## Training set error measures:
##                     ME     RMSE      MAE         MPE     MAPE      MASE
## Training set 0.6714353 10.50734 6.685125 -0.01327149 4.022656 0.2634335
##                     ACF1
## Training set -0.03508005
adf_test <- ur.df(residuals(combustivel_arima), type = "trend")
summary(adf_test)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -67.161  -3.845  -1.007   3.658  33.189 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.884375   1.138901  -0.777    0.438    
## z.lag.1     -1.090759   0.077976 -13.988   <2e-16 ***
## tt           0.009322   0.005682   1.641    0.102    
## z.diff.lag   0.046439   0.053997   0.860    0.390    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.52 on 343 degrees of freedom
## Multiple R-squared:  0.522,  Adjusted R-squared:  0.5179 
## F-statistic: 124.9 on 3 and 343 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -13.9884 65.2286 97.8428 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2  6.15  4.71  4.05
## phi3  8.34  6.30  5.36

Todos aplicaram primeira diferença o que não responde bem minha pergunta, pois o auto.arima tenta modelar as vezes acima do mais parcimonioso, portanto vou eu testar alguns modelos.

Sarma model

ts_list <- list(ts_cimento, ts_calcario, ts_diesel, ts_combustivel)
for (i in seq_along(ts_list)) {
  ts_name <- c("cimento", "calcario", "diesel", "combustivel")[i]
  
  acf(ts_list[[i]], lag.max = 48, main = paste("ACF for", ts_name))
  Pacf(ts_list[[i]], lag.max = 48, main = paste("PACF for", ts_name))
}

# Function to fit and compare ARIMA(1,0,0) and SARIMA(0,0,0)(1,0,0)
fit_and_compare <- function(ts, name) {
  # ARMA(1,0,0)
  arima_model <- Arima(ts, order = c(1, 0, 0))
  aic_arima <- AIC(arima_model)
  adf_test_arima <- adf.test(residuals(arima_model))
  
  # SARMA(0,0,0)(1,0,0)
  sarima_model <- Arima(ts, order = c(0, 0, 0), seasonal = list(order = c(1, 0, 0)))
  aic_sarima <- AIC(sarima_model)
  
  cat("Results for", name, "\n")
  cat("ARIMA(1,0,0) AIC:", aic_arima, "\n")
  cat("ADF Test for ARIMA(1,0,0) p-value:", adf_test_arima$p.value, "\n")
  cat("SARIMA(0,0,0)(1,0,0) AIC:", aic_sarima, "\n\n")
  
  return(c("ARIMA(1,0,0)" = aic_arima, "SARIMA(0,0,0)(1,0,0)" = aic_sarima))
}

# Fit and compare for each time series
results_cimento <- fit_and_compare(ts_cimento, "Cimento")
## Warning in adf.test(residuals(arima_model)): p-value smaller than printed
## p-value
## Results for Cimento 
## ARIMA(1,0,0) AIC: 2937.178 
## ADF Test for ARIMA(1,0,0) p-value: 0.01 
## SARIMA(0,0,0)(1,0,0) AIC: 3679.743
results_calcario <- fit_and_compare(ts_calcario, "Calcario")
## Warning in adf.test(residuals(arima_model)): p-value smaller than printed
## p-value
## Results for Calcario 
## ARIMA(1,0,0) AIC: 2253.616 
## ADF Test for ARIMA(1,0,0) p-value: 0.01 
## SARIMA(0,0,0)(1,0,0) AIC: 2812.553
results_diesel <- fit_and_compare(ts_diesel, "Diesel")
## Warning in adf.test(residuals(arima_model)): p-value smaller than printed
## p-value
## Results for Diesel 
## ARIMA(1,0,0) AIC: 2984.373 
## ADF Test for ARIMA(1,0,0) p-value: 0.01 
## SARIMA(0,0,0)(1,0,0) AIC: 3502.832
results_combustivel <- fit_and_compare(ts_combustivel, "Combustivel")
## Warning in adf.test(residuals(arima_model)): p-value smaller than printed
## p-value
## Results for Combustivel 
## ARIMA(1,0,0) AIC: 2676.365 
## ADF Test for ARIMA(1,0,0) p-value: 0.01 
## SARIMA(0,0,0)(1,0,0) AIC: 3439.423
# Compare AIC values
aic_values <- data.frame(
  "Cimento" = results_cimento,
  "Calcario" = results_calcario,
  "Diesel" = results_diesel,
  "Combustivel" = results_combustivel
)

cat("AIC Values:\n")
## AIC Values:
print(aic_values)
##                       Cimento Calcario   Diesel Combustivel
## ARIMA(1,0,0)         2937.178 2253.616 2984.373    2676.365
## SARIMA(0,0,0)(1,0,0) 3679.743 2812.553 3502.832    3439.423
# Choose the best model for each time series
best_models <- apply(aic_values, 2, function(col) names(col)[which.min(col)])
cat("\nBest Models:\n")
## 
## Best Models:
print(best_models)
##        Cimento       Calcario         Diesel    Combustivel 
## "ARIMA(1,0,0)" "ARIMA(1,0,0)" "ARIMA(1,0,0)" "ARIMA(1,0,0)"

Ao aplicar o AR(1) e o MA(1) em todos, ambos ficam estacionários, entretanto o AR(1) apresenta AIC mais baixos para todas as séries, desta formatenho um fit bem parcimonioso e estacionário, vou utilizar ele para analisar as quebras estruturais e cointegração.

Individual Bai-Perron

library(strucchange)

# Function to perform breakpoint analysis for each univariate time series
perform_breakpoint_analysis <- function(ts) {
  mts_object <- ts(ts, start = c(1980, 01), frequency = 12)
  
  bp_results <- breakpoints(mts_object ~ 1)
  
  print(paste("Resumo de quebra da serie", deparse(substitute(ts)), sep = " "))

  summary(bp_results)
}

#definindo os dados
ts_cimento <- df[-1, ]$cimento_deflacionado
ts_calcario <- df[-1, ]$calcario_deflacionado
ts_diesel <- df[-1, ]$diesel_deflacionado
ts_combustiveis <- df[-1, ]$combustiveis_deflacionado

# Perform breakpoint analysis for each univariate time series
perform_breakpoint_analysis(ts_cimento)
## [1] "Resumo de quebra da serie ts_cimento"
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
## 
## Breakpoints at observation number:
##                           
## m = 1   77                
## m = 2   77 129            
## m = 3   76 128     239    
## m = 4   76 128     242 296
## m = 5   76 128 186 240 296
## 
## Corresponding to breakdates:
##                                                 
## m = 1   1986(5)                                 
## m = 2   1986(5) 1990(9)                         
## m = 3   1986(4) 1990(8)         1999(11)        
## m = 4   1986(4) 1990(8)         2000(2)  2004(8)
## m = 5   1986(4) 1990(8) 1995(6) 1999(12) 2004(8)
## 
## Fit:
##                                                    
## m   0       1       2       3       4       5      
## RSS 1885631  671909  560509  471112  435600  422565
## BIC    3991    3644    3592    3544    3528    3529
perform_breakpoint_analysis(ts_calcario)
## [1] "Resumo de quebra da serie ts_calcario"
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
## 
## Breakpoints at observation number:
##                           
## m = 1      86             
## m = 2      86      242    
## m = 3      86  145 250    
## m = 4   54 106 158 250    
## m = 5   54 106 158 244 296
## 
## Corresponding to breakdates:
##                                                  
## m = 1           1987(2)                          
## m = 2           1987(2)          2000(2)         
## m = 3           1987(2)  1992(1) 2000(10)        
## m = 4   1984(6) 1988(10) 1993(2) 2000(10)        
## m = 5   1984(6) 1988(10) 1993(2) 2000(4)  2004(8)
## 
## Fit:
##                                              
## m   0      1      2      3      4      5     
## RSS 189415  67416  33478  32362  31671  31691
## BIC   3192   2844   2612   2612   2616   2628
perform_breakpoint_analysis(ts_diesel)
## [1] "Resumo de quebra da serie ts_diesel"
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
## 
## Breakpoints at observation number:
##                           
## m = 1   82                
## m = 2   82             273
## m = 3   82     185     273
## m = 4   82 134 186     273
## m = 5   82 134 186 244 296
## 
## Corresponding to breakdates:
##                                                 
## m = 1   1986(10)                                
## m = 2   1986(10)                         2002(9)
## m = 3   1986(10)         1995(5)         2002(9)
## m = 4   1986(10) 1991(2) 1995(6)         2002(9)
## m = 5   1986(10) 1991(2) 1995(6) 2000(4) 2004(8)
## 
## Fit:
##                                                    
## m   0       1       2       3       4       5      
## RSS 1558055  372438  285497  258697  257222  271676
## BIC    3925    3439    3358    3335    3345    3376
perform_breakpoint_analysis(ts_combustiveis)
## [1] "Resumo de quebra da serie ts_combustiveis"
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
## 
## Breakpoints at observation number:
##                           
## m = 1              268    
## m = 2      103     248    
## m = 3      103     244 296
## m = 4      103 159 244 296
## m = 5   55 107 159 244 296
## 
## Corresponding to breakdates:
##                                                 
## m = 1                            2002(4)        
## m = 2           1988(7)          2000(8)        
## m = 3           1988(7)          2000(4) 2004(8)
## m = 4           1988(7)  1993(3) 2000(4) 2004(8)
## m = 5   1984(7) 1988(11) 1993(3) 2000(4) 2004(8)
## 
## Fit:
##                                                    
## m   0       1       2       3       4       5      
## RSS 1889240  513093  327817  211926  209992  216193
## BIC    3992    3550    3406    3266    3274    3296

Aplico a quebra estrutural nas séries em nível para ver on que irá diferenciar das séries fitadas.

Bai-Perron on ARMA(1,0,0) series

# Function to perform breakpoint analysis for each univariate time series
perform_breakpoint_analysis <- function(ts) {
  mts_object <- ts(ts, start = c(1980, 01), frequency = 12)
  
  bp_results <- breakpoints(mts_object ~ 1)
  
  print(paste("Resumo de quebra da serie", deparse(substitute(ts)), sep = " "))

  summary(bp_results)
}

# Assuming 'df' is your data frame
ts_cimento <- df[-1, ]$cimento_deflacionado
ts_calcario <- df[-1, ]$calcario_deflacionado
ts_diesel <- df[-1, ]$diesel_deflacionado
ts_combustiveis <- df[-1, ]$combustiveis_deflacionado

# Perform breakpoint analysis for each univariate time series
perform_breakpoint_analysis(residuals(cimento_arima))
## [1] "Resumo de quebra da serie residuals(cimento_arima)"
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
## 
## Breakpoints at observation number:
##                           
## m = 1      108            
## m = 2   56 108            
## m = 3   56 108     198    
## m = 4   56 108     200 279
## m = 5   56 108 160 212 279
## 
## Corresponding to breakdates:
##                                                 
## m = 1           1988(12)                        
## m = 2   1984(8) 1988(12)                        
## m = 3   1984(8) 1988(12)         1996(6)        
## m = 4   1984(8) 1988(12)         1996(8) 2003(3)
## m = 5   1984(8) 1988(12) 1993(4) 1997(8) 2003(3)
## 
## Fit:
##                                        
## m   0     1     2     3     4     5    
## RSS 74897 73596 72590 71983 71772 71715
## BIC  2876  2881  2888  2897  2908  2919
perform_breakpoint_analysis(residuals(calcario_arima))
## [1] "Resumo de quebra da serie residuals(calcario_arima)"
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
## 
## Breakpoints at observation number:
##                           
## m = 1      108            
## m = 2   54 108            
## m = 3   54 108     199    
## m = 4   54 108     199 278
## m = 5   54 108 160 212 278
## 
## Corresponding to breakdates:
##                                                 
## m = 1           1988(12)                        
## m = 2   1984(6) 1988(12)                        
## m = 3   1984(6) 1988(12)         1996(7)        
## m = 4   1984(6) 1988(12)         1996(7) 2003(2)
## m = 5   1984(6) 1988(12) 1993(4) 1997(8) 2003(2)
## 
## Fit:
##                                        
## m   0     1     2     3     4     5    
## RSS 10296 10226 10172 10131 10110 10110
## BIC  2183  2193  2202  2213  2224  2235
perform_breakpoint_analysis(residuals(diesel_arima))
## [1] "Resumo de quebra da serie residuals(diesel_arima)"
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
## 
## Breakpoints at observation number:
##                           
## m = 1      122            
## m = 2   65 122            
## m = 3   65 122 177        
## m = 4   65 122 177 246    
## m = 5   65 122 176 228 280
## 
## Corresponding to breakdates:
##                                                 
## m = 1           1990(2)                         
## m = 2   1985(5) 1990(2)                         
## m = 3   1985(5) 1990(2) 1994(9)                 
## m = 4   1985(5) 1990(2) 1994(9) 2000(6)         
## m = 5   1985(5) 1990(2) 1994(8) 1998(12) 2003(4)
## 
## Fit:
##                                        
## m   0     1     2     3     4     5    
## RSS 89531 88639 86128 85996 85793 85732
## BIC  2938  2946  2948  2959  2970  2982
perform_breakpoint_analysis(residuals(combustivel_arima))
## [1] "Resumo de quebra da serie residuals(combustivel_arima)"
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = mts_object ~ 1)
## 
## Breakpoints at observation number:
##                           
## m = 1              228    
## m = 2              228 280
## m = 3   64         228 280
## m = 4   64 130     228 280
## m = 5   64 116 169 228 280
## 
## Corresponding to breakdates:
##                                                  
## m = 1                            1998(12)        
## m = 2                            1998(12) 2003(4)
## m = 3   1985(4)                  1998(12) 2003(4)
## m = 4   1985(4) 1990(10)         1998(12) 2003(4)
## m = 5   1985(4) 1989(8)  1994(1) 1998(12) 2003(4)
## 
## Fit:
##                                        
## m   0     1     2     3     4     5    
## RSS 38374 37722 37161 36873 36580 36537
## BIC  2642  2648  2655  2664  2673  2684

Analisando pelo menor BIC todas as séries apontam que não há quebra estrutural.

Multivariate Bai-Perron

# Fit VAR model with lag = 1 (ARMA(1,0,0))
var_model <- VAR(ts_series, p = 1, type = "none")

# Summary of the VAR model
summary(var_model)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: ts_cimento, ts_calcario, ts_diesel, ts_combustivel 
## Deterministic variables: none 
## Sample size: 348 
## Log Likelihood: -5135.401 
## Roots of the characteristic polynomial:
## 1.002 0.9872 0.9627 0.9627
## Call:
## VAR(y = ts_series, p = 1, type = "none")
## 
## 
## Estimation results for equation ts_cimento: 
## =========================================== 
## ts_cimento = ts_cimento.l1 + ts_calcario.l1 + ts_diesel.l1 + ts_combustivel.l1 
## 
##                   Estimate Std. Error t value Pr(>|t|)    
## ts_cimento.l1     0.981816   0.021284  46.130   <2e-16 ***
## ts_calcario.l1    0.008530   0.047280   0.180    0.857    
## ts_diesel.l1      0.005920   0.013336   0.444    0.657    
## ts_combustivel.l1 0.003198   0.015769   0.203    0.839    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 16.16 on 344 degrees of freedom
## Multiple R-Squared: 0.9886,  Adjusted R-squared: 0.9884 
## F-statistic:  7438 on 4 and 344 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation ts_calcario: 
## ============================================ 
## ts_calcario = ts_cimento.l1 + ts_calcario.l1 + ts_diesel.l1 + ts_combustivel.l1 
## 
##                    Estimate Std. Error t value Pr(>|t|)    
## ts_cimento.l1     0.0036507  0.0080199   0.455    0.649    
## ts_calcario.l1    0.9864631  0.0178155  55.371   <2e-16 ***
## ts_diesel.l1      0.0008055  0.0050250   0.160    0.873    
## ts_combustivel.l1 0.0047786  0.0059418   0.804    0.422    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 6.089 on 344 degrees of freedom
## Multiple R-Squared: 0.997,   Adjusted R-squared: 0.997 
## F-statistic: 2.854e+04 on 4 and 344 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation ts_diesel: 
## ========================================== 
## ts_diesel = ts_cimento.l1 + ts_calcario.l1 + ts_diesel.l1 + ts_combustivel.l1 
## 
##                   Estimate Std. Error t value Pr(>|t|)    
## ts_cimento.l1     -0.07597    0.02237  -3.397 0.000762 ***
## ts_calcario.l1     0.14239    0.04968   2.866 0.004416 ** 
## ts_diesel.l1       0.94427    0.01401  67.381  < 2e-16 ***
## ts_combustivel.l1  0.01729    0.01657   1.043 0.297549    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 16.98 on 344 degrees of freedom
## Multiple R-Squared: 0.9912,  Adjusted R-squared: 0.9911 
## F-statistic:  9662 on 4 and 344 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation ts_combustivel: 
## =============================================== 
## ts_combustivel = ts_cimento.l1 + ts_calcario.l1 + ts_diesel.l1 + ts_combustivel.l1 
## 
##                    Estimate Std. Error t value Pr(>|t|)    
## ts_cimento.l1     -0.022575   0.014546  -1.552   0.1216    
## ts_calcario.l1     0.053819   0.032313   1.666   0.0967 .  
## ts_diesel.l1      -0.017545   0.009114  -1.925   0.0550 .  
## ts_combustivel.l1  1.001963   0.010777  92.973   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 11.04 on 344 degrees of freedom
## Multiple R-Squared: 0.9971,  Adjusted R-squared: 0.9971 
## F-statistic: 2.951e+04 on 4 and 344 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##                ts_cimento ts_calcario ts_diesel ts_combustivel
## ts_cimento         261.03       71.93     91.94          78.44
## ts_calcario         71.93       37.04     40.01          38.20
## ts_diesel           91.94       40.01    288.30          87.39
## ts_combustivel      78.44       38.20     87.39         121.94
## 
## Correlation matrix of residuals:
##                ts_cimento ts_calcario ts_diesel ts_combustivel
## ts_cimento         1.0000      0.7315    0.3351         0.4397
## ts_calcario        0.7315      1.0000    0.3872         0.5683
## ts_diesel          0.3351      0.3872    1.0000         0.4661
## ts_combustivel     0.4397      0.5683    0.4661         1.0000
# Get residuals from the VAR model
var_residuals <- residuals(var_model)

# Perform structural change analysis on residuals
bp_results <- breakpoints(var_residuals ~ 1)
summary(bp_results)
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = var_residuals ~ 1)
## 
## Breakpoints at observation number:
##                           
## m = 1      107            
## m = 2   55 107            
## m = 3   55 107 165        
## m = 4   55 107     197 277
## m = 5   55 107 159 214 277
## 
## Corresponding to breakdates:
##                                                                                
## m = 1                     0.307471264367816                                    
## m = 2   0.158045977011494 0.307471264367816                                    
## m = 3   0.158045977011494 0.307471264367816 0.474137931034483                  
## m = 4   0.158045977011494 0.307471264367816                   0.566091954022989
## m = 5   0.158045977011494 0.307471264367816 0.456896551724138 0.614942528735632
##                          
## m = 1                    
## m = 2                    
## m = 3                    
## m = 4   0.795977011494253
## m = 5   0.795977011494253
## 
## Fit:
##                                        
## m   0     1     2     3     4     5    
## RSS 89794 89359 87333 86822 86574 86475
## BIC  2932  2942  2945  2955  2966  2977
# Plot dos resíduos
df_residuals <- as.data.frame(var_residuals)
df_residuals$date <- seq(as.Date("1980-01-01"), by = "month", length.out = nrow(df_residuals))
ggplot(df_residuals, aes(x = date)) +
  geom_line(aes(y = ts_cimento), color = "blue", size = 0.5) +
  geom_line(aes(y = ts_calcario), color = "red", size = 0.5) +
  geom_line(aes(y = ts_diesel), color = "green", size = 0.5) +
  geom_line(aes(y = ts_combustivel), color = "purple", size = 0.5) +
  labs(title = "Residuos do VAR",
       x = "Data",
       y = "Valores") +
  scale_color_manual(values = c("blue", "red", "green", "purple"))

Analisando a partir de um VAR(1,0,0) e escolhendo pelo BIC não há tembaem quebra estrututral quando olhando para as séries fitadas todas juntas. Entretanto o plot parece demonstrar que são cointegradas.

Johansen test

#series deflacionadas em nivel

K <- VARselect(ts_series, lag.max = 12, type = "both")  # optimal lag selected by AIC
summary(K)
##           Length Class  Mode   
## selection  4     -none- numeric
## criteria  48     -none- numeric
K <- K$selection[1]
K
## AIC(n) 
##     12
test=ca.jo(ts_series, type="trace", K=K, ecdet= "none", spec= "longrun")
summary(test)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , with linear trend 
## 
## Eigenvalues (lambda):
## [1] 8.474321e-02 2.049241e-02 1.543370e-02 1.566389e-05
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 3 |  0.01  6.50  8.18 11.65
## r <= 2 |  5.25 15.66 17.95 23.52
## r <= 1 | 12.22 28.71 31.52 37.22
## r = 0  | 42.07 45.23 48.28 55.43
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                    ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.l12            1.00000       1.0000000     1.0000000
## ts_calcario.l12         -49.33614      -2.8536094    -2.2822919
## ts_diesel.l12           -13.39716       0.4097156    -0.6908061
## ts_combustivel.l12       10.15470       0.1983826     0.2614674
##                    ts_combustivel.l12
## ts_cimento.l12              1.0000000
## ts_calcario.l12             0.3767496
## ts_diesel.l12               4.8183652
## ts_combustivel.l12        -18.9689119
## 
## Weights W:
## (This is the loading matrix)
## 
##                  ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.d        0.006302643    -0.002650518  -0.009403368
## ts_calcario.d       0.003296388     0.002307099   0.001231048
## ts_diesel.d         0.008337551    -0.020193954  -0.004677955
## ts_combustivel.d    0.003672845    -0.029432178   0.004441159
##                  ts_combustivel.l12
## ts_cimento.d           2.245526e-05
## ts_calcario.d          7.918305e-06
## ts_diesel.d           -1.704012e-05
## ts_combustivel.d       1.069891e-05
#series fitadas

K <- VARselect(var_residuals, lag.max = 12, type = "none")
K <- K$selection[1]
K
## AIC(n) 
##     12
test=ca.jo(var_residuals, type="trace", K=K, ecdet= "none", spec= "longrun")
summary(test)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , with linear trend 
## 
## Eigenvalues (lambda):
## [1] 0.14079156 0.11320243 0.07794479 0.04295246
## 
## Values of teststatistic and critical values of test:
## 
##            test 10pct  5pct  1pct
## r <= 3 |  14.75  6.50  8.18 11.65
## r <= 2 |  42.02 15.66 17.95 23.52
## r <= 1 |  82.38 28.71 31.52 37.22
## r = 0  | 133.37 45.23 48.28 55.43
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                    ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.l12         1.00000000       1.0000000    1.00000000
## ts_calcario.l12       -2.29790808      -6.2125268    6.73280022
## ts_diesel.l12          0.01585749       0.8375065   -0.03327864
## ts_combustivel.l12     0.79375837      -2.1485414   -2.75302489
##                    ts_combustivel.l12
## ts_cimento.l12               1.000000
## ts_calcario.l12             -5.191413
## ts_diesel.l12               -5.160946
## ts_combustivel.l12           2.276985
## 
## Weights W:
## (This is the loading matrix)
## 
##                  ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.d         -0.3892732       0.2616798   -0.16028197
## ts_calcario.d         0.1329149       0.1271011   -0.03771426
## ts_diesel.d          -0.2028894      -0.2298026    0.08884892
## ts_combustivel.d     -0.2594548       0.2471513    0.16675939
##                  ts_combustivel.l12
## ts_cimento.d             0.09885113
## ts_calcario.d            0.04552410
## ts_diesel.d              0.14198547
## ts_combustivel.d         0.06743987

Escolhendo o lag pelo comando VARselect e obtendo um lag de 12, faço a cointegração pelo método proposto por Johansen, o resultado é de que as variáveis não cointegram quando em nivel. O que torna uma regressão entre elas possivelmente espúria ja que não apresentam relação de longo prazo.Para as fitadas no VAR(1) apresentam 4 vetores de cointegração, embora um resultado oposto, também não é muito conclusivo.

Johasen VAR auto.fited

# Escolha a ordem do modelo VAR usando VARselect
var_select <- VARselect(ts_series, lag.max = 12, type = "both")
lag_order_aic <- var_select$selection[1]

# Ajuste o modelo VAR com a ordem escolhida
var_model <- VAR(ts_series, p = lag_order_aic, type = "both")

# Extraia os resíduos do modelo VAR
residuos <- residuals(var_model)

# Realize o Teste de Johansen nos resíduos
johansen_test <- ca.jo(residuos, type = "trace", K = lag_order_aic)

# Imprima os resultados
summary(johansen_test)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , with linear trend 
## 
## Eigenvalues (lambda):
## [1] 0.13293646 0.09104065 0.06740728 0.05211971
## 
## Values of teststatistic and critical values of test:
## 
##            test 10pct  5pct  1pct
## r <= 3 |  17.40  6.50  8.18 11.65
## r <= 2 |  40.08 15.66 17.95 23.52
## r <= 1 |  71.10 28.71 31.52 37.22
## r = 0  | 117.46 45.23 48.28 55.43
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                    ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.l12          1.0000000        1.000000     1.0000000
## ts_calcario.l12        -2.7778644       -1.919730    -0.0819438
## ts_diesel.l12           0.4739209        0.421803    -0.7437453
## ts_combustivel.l12      0.6014599       -1.434339     0.1556454
##                    ts_combustivel.l12
## ts_cimento.l12               1.000000
## ts_calcario.l12             -6.349379
## ts_diesel.l12               -1.461604
## ts_combustivel.l12           1.488601
## 
## Weights W:
## (This is the loading matrix)
## 
##                  ts_cimento.l12 ts_calcario.l12 ts_diesel.l12
## ts_cimento.d        -0.50008795      0.02770996    -0.5640706
## ts_calcario.d        0.03111033      0.11926110    -0.1697195
## ts_diesel.d         -0.67273837      0.19009357     0.4089993
## ts_combustivel.d    -0.49808199      0.60502349    -0.1286672
##                  ts_combustivel.l12
## ts_cimento.d              0.2400720
## ts_calcario.d             0.1226344
## ts_diesel.d               0.2663606
## ts_combustivel.d          0.0747433

Aqui permiti que o comando decidisse o VAR, o resultado não foi muito diferente do modelo VAR(1,0,0)