Instalando os pacotes

Regressão linear

#Importando dados e formatando a data

path <- "d:/Users/thomas.daher/Desktop/cimento/dados_parte1/"
file_name <- "Obras hidreletricas.xlsx"
sheet_name <- "serie_curta"
file_path <- paste(path, file_name, sep = "")
cimento <- read_excel(file_path, sheet = sheet_name)

cimento$Data <- as.Date(cimento$Data, format = "%Y/%m/%d")
cimento <- filter(cimento, Data >= as.Date("1990-09-01"))

# regredindo a serie 51 em primeira diferenca contra a 17 em primeira diferenca e ambas series em (t-1)
formula <- "diff(cimento51) ~ 0 + `cimento51_t-1`[-1] + `cimento17_t-1`[-1] + diff(cimento17)"
summary(lm(data = cimento, formula = formula))
## 
## Call:
## lm(formula = formula, data = cimento)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -432333  -11846      18   14545  282935 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## `cimento51_t-1`[-1] -0.06419    0.04623  -1.389    0.168    
## `cimento17_t-1`[-1]  0.06271    0.04320   1.452    0.150    
## diff(cimento17)      0.75181    0.03773  19.928   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 71160 on 99 degrees of freedom
## Multiple R-squared:  0.8153, Adjusted R-squared:  0.8097 
## F-statistic: 145.7 on 3 and 99 DF,  p-value: < 2.2e-16
# Plot das variacoes
ggplot(cimento, aes(x = Data)) +
  geom_line(aes(y = delta_51t, color = "Delta 51"), linetype = "solid") +
  geom_line(aes(y = delta_17, color = "Delta 17"), linetype = "solid") +
  labs(
    x = "Data",
    y = "Variação",
    title = "Variação de cada série"
  ) +
  scale_color_manual(
    values = c("Delta 51" = "blue", "Delta 17" = "red"),
    labels = c("Delta 51", "Delta 17")
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 18, hjust = 0.5),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    legend.title = element_blank(),
    legend.text = element_text(size = 10)
  )

# Regressao de uma variacao contra a outra

formula <- "delta_51t ~ delta_17"

OLS <- (lm(data = cimento, formula = formula))
summary(OLS)
## 
## Call:
## lm(formula = formula, data = cimento)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.15759 -0.00765  0.00351  0.02041  0.18851 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.12599    0.08913   1.414    0.161    
## delta_17     0.87671    0.07855  11.161   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1254 on 101 degrees of freedom
## Multiple R-squared:  0.5523, Adjusted R-squared:  0.5478 
## F-statistic: 124.6 on 1 and 101 DF,  p-value: < 2.2e-16

Teste de raiz unitária para os dois tipos de serie de cimentos

# Transformando em serie temporal

cimento_17 <- ts(cimento$cimento17)
cimento_51 <- ts(cimento$cimento51)

# Realizando  teste de Dickey-Fuller aumentado

adf.test(diff(cimento_17))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(cimento_17)
## Dickey-Fuller = -3.3697, Lag order = 4, p-value = 0.06337
## alternative hypothesis: stationary
adf.test(diff(cimento_51))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(cimento_51)
## Dickey-Fuller = -3.1684, Lag order = 4, p-value = 0.09691
## alternative hypothesis: stationary
# Plotando as series e suas primeiras diferenças

plot(cimento_17)

plot(diff(cimento_17))

plot(cimento_51)

plot(diff(cimento_51))

# Criando a regressao de uma serie contra a outra

formula <- "diff(cimento51) ~ diff(cimento17)"
linear_model <- lm(data = cimento, formula = formula)
summary(linear_model)
## 
## Call:
## lm(formula = formula, data = cimento)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -422787  -13347  -12493   12064  286771 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.335e+04  7.315e+03   1.824   0.0711 .  
## diff(cimento17) 7.294e-01  3.732e-02  19.543   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 71030 on 100 degrees of freedom
## Multiple R-squared:  0.7925, Adjusted R-squared:  0.7904 
## F-statistic: 381.9 on 1 and 100 DF,  p-value: < 2.2e-16
plot(linear_model)

# Teste adf dos resíduos da série

adf.test(residuals(linear_model))
## Warning in adf.test(residuals(linear_model)): p-value smaller than printed
## p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  residuals(linear_model)
## Dickey-Fuller = -4.1612, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
plot(residuals(linear_model))

# Plot da diferenca entre as series e da diferenca das diferençcs

plot((cimento$cimento17) - (cimento$cimento51))

plot(diff(cimento$cimento17) - diff(cimento$cimento51))

### Cointegracao

# Transformando em series temporais, tirando a primeira diferença e invertendo para poder projetar o passado.

ts_data17 <- rev(diff(ts(cimento$cimento17, start = 1990-10-01, frequency = 12)))
ts_data51 <- rev(diff(ts(cimento$cimento51, start = 1990-10-01, frequency = 12)))

# Teste de estacionariedade usando Augmented Dickey-Fuller (ADF) test

adf_test17 <- ur.df(ts_data17, type = "trend", lags = 0)
adf_test51 <- ur.df(ts_data51, type = "trend", lags = 0)

summary(adf_test17)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -148792  -24970  -14213    -304 1232234 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.701e+04  2.706e+04   0.629 0.530974    
## z.lag.1     -2.829e-01  7.041e-02  -4.018 0.000115 ***
## tt          -3.018e+01  4.572e+02  -0.066 0.947498    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 133900 on 98 degrees of freedom
## Multiple R-squared:  0.1416, Adjusted R-squared:  0.1241 
## F-statistic: 8.084 on 2 and 98 DF,  p-value: 0.000563
## 
## 
## Value of test-statistic is: -4.0181 5.3891 8.0836 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2  6.22  4.75  4.07
## phi3  8.43  6.49  5.47
summary(adf_test51)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -155633  -24057   -6849     753  596794 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  1.073e+04  1.750e+04   0.613  0.54098   
## z.lag.1     -1.619e-01  5.519e-02  -2.934  0.00416 **
## tt          -4.725e+01  2.935e+02  -0.161  0.87245   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 86000 on 98 degrees of freedom
## Multiple R-squared:  0.08097,    Adjusted R-squared:  0.06222 
## F-statistic: 4.317 on 2 and 98 DF,  p-value: 0.01596
## 
## 
## Value of test-statistic is: -2.9344 2.8786 4.3173 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2  6.22  4.75  4.07
## phi3  8.43  6.49  5.47
# Criando um dataframe para poder rodar o teste Johansen

df <- data.frame(cimento17 = ts_data17, cimento51 = ts_data51)

#Esta função é usada para estimar modelos de vetor de correção de erros (VEC) para séries temporais cointegradas. Ela é usada após a confirmação de cointegração usando o teste ADF.

cointegration_test1 <- ca.jo(df, ecdet = "const", type="trace", K=2, spec="longrun",
season= NULL)
summary(cointegration_test1)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1] 4.055524e-01 1.332928e-01 5.551115e-16
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 1 | 14.31  7.52  9.24 12.97
## r = 0  | 66.32 17.85 19.96 24.60
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##              cimento17.l2  cimento51.l2      constant
## cimento17.l2     1.000000      1.000000  1.000000e+00
## cimento51.l2    -1.089209     -3.931628 -1.511185e+00
## constant      2401.706044 142049.377851  1.874997e+06
## 
## Weights W:
## (This is the loading matrix)
## 
##             cimento17.l2 cimento51.l2      constant
## cimento17.d  -1.37697591   0.09742083 -1.896898e-17
## cimento51.d   0.04394564   0.05556889 -1.042128e-17
cointegration_test2 <- ca.jo(df, ecdet = "const", type="eigen", K=2, spec="longrun",
season= NULL)
summary(cointegration_test2)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: maximal eigenvalue statistic (lambda max) , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1] 4.055524e-01 1.332928e-01 5.551115e-16
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 1 | 14.31  7.52  9.24 12.97
## r = 0  | 52.01 13.75 15.67 20.20
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##              cimento17.l2  cimento51.l2      constant
## cimento17.l2     1.000000      1.000000  1.000000e+00
## cimento51.l2    -1.089209     -3.931628 -1.511185e+00
## constant      2401.706044 142049.377851  1.874997e+06
## 
## Weights W:
## (This is the loading matrix)
## 
##             cimento17.l2 cimento51.l2      constant
## cimento17.d  -1.37697591   0.09742083 -1.896898e-17
## cimento51.d   0.04394564   0.05556889 -1.042128e-17
# cajools,Essa função retorna as regressões OLS de um VECM irrestrito, ou seja, ela retorna um objeto da classe lm. O usuário pode especificar o número exato de qual equação no VECM deve ser estimada e relatada, ou, se "reg.number=NULL", cada equação no VECM será estimada e os resultados correspondentes serão relatados. Neste caso a segunda serie, "R = 2" é o cimento 51.

cointegrating_vectors <- cajools(cointegration_test1, r = NULL)
summary(cointegrating_vectors)
## Response cimento17.d :
## 
## Call:
## lm(formula = cimento17.d ~ cimento17.dl1 + cimento51.dl1 + cimento17.l2 + 
##     cimento51.l2 + constant - 1, data = data.mat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -169590  -26962  -10223    8519 1115183 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## cimento17.dl1    -0.5882     0.1698  -3.463 0.000803 ***
## cimento51.dl1     1.0480     0.3476   3.015 0.003293 ** 
## cimento17.l2     -1.2796     0.3049  -4.196  6.1e-05 ***
## cimento51.l2      1.1168     0.3521   3.172 0.002039 ** 
## constant      10531.4765 13646.7388   0.772 0.442195    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 127300 on 95 degrees of freedom
## Multiple R-squared:  0.2474, Adjusted R-squared:  0.2078 
## F-statistic: 6.245 on 5 and 95 DF,  p-value: 4.739e-05
## 
## 
## Response cimento51.d :
## 
## Call:
## lm(formula = cimento51.d ~ cimento17.dl1 + cimento51.dl1 + cimento17.l2 + 
##     cimento51.l2 + constant - 1, data = data.mat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -113654  -11998   -7976    8112  418389 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## cimento17.dl1    0.46857    0.08006   5.853 6.88e-08 ***
## cimento51.dl1   -0.23931    0.16383  -1.461    0.147    
## cimento17.l2     0.09951    0.14374   0.692    0.490    
## cimento51.l2    -0.26634    0.16596  -1.605    0.112    
## constant      7999.07040 6432.82105   1.243    0.217    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 60030 on 95 degrees of freedom
## Multiple R-squared:  0.565,  Adjusted R-squared:  0.5421 
## F-statistic: 24.67 on 5 and 95 DF,  p-value: 7.534e-16
cointegrating_vectors <- cajools(cointegration_test2, r  = 2)
summary(cointegrating_vectors)
## 
## Call:
## lm(formula = substitute(form1), data = data.mat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -113654  -11998   -7976    8112  418389 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## cimento17.dl1    0.46857    0.08006   5.853 6.88e-08 ***
## cimento51.dl1   -0.23931    0.16383  -1.461    0.147    
## cimento17.l2     0.09951    0.14374   0.692    0.490    
## cimento51.l2    -0.26634    0.16596  -1.605    0.112    
## constant      7999.07040 6432.82105   1.243    0.217    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 60030 on 95 degrees of freedom
## Multiple R-squared:  0.565,  Adjusted R-squared:  0.5421 
## F-statistic: 24.67 on 5 and 95 DF,  p-value: 7.534e-16
plot(cointegrating_vectors$residuals)

as.character(formula(cointegrating_vectors))
## [1] "~"                                                                         
## [2] "z@Z0[, reg.number]"                                                        
## [3] "cimento17.dl1 + cimento51.dl1 + cimento17.l2 + cimento51.l2 + constant - 1"
cointegrating_vectors$coefficients
## cimento17.dl1 cimento51.dl1  cimento17.l2  cimento51.l2      constant 
##    0.46857410   -0.23930919    0.09951453   -0.26634214 7999.07039842

Funcoes

### My lag function
myLag <- function(data, lag) {
  data.frame(unclass(data[c(rep(NA, lag), 1:(nrow(data) - lag)), ]))
}

### Predict VECM funcition
predict.vecm <- function(object, new_data, predicted_var){
  if (inherits(object, "VECM")) {
    # Just valid for VECM models
    # Get endogenus and exogenus variables
    summary_vecm <- summary(object)
    model_vars <- colnames(object$model)
    endovars <- sub("Equation ", "", rownames(summary_vecm$bigcoefficients))
    if (!(predicted_var %in% endovars)) {
      stop("You must provide a valid endogenus variable.")
    }
    exovars <- NULL
    if (object$exogen) {
      ind_endovars <-
        unlist(sapply(endovars, function(x) grep(x, model_vars), simplify = FALSE))
      exovars <- model_vars[-ind_endovars]
      exovars <- exovars[exovars != "ECT"]
    }
    
    # First step: join new_data and (lags + 1) last values from the calibration data
    new_data <- data.frame(new_data)
    if (!all(colnames(new_data) %in% c(endovars, exovars))) {
      stop("new_data must have valid endogenus or exogenus column names.")
    }
    # Endovars but the one desired to be predicted
    endovars2 <- endovars[endovars != predicted_var]
    # if (!all(colnames(new_data) %in% c(endovars2, exovars))) {
    #   stop("new_data must have valid endogenus (all but the one desired to predict) or exogenus column names.")
    # }
    new_data <- new_data[, c(endovars2, exovars), drop = FALSE]
    new_data <- cbind(NA, new_data)
    colnames(new_data) <- c(predicted_var, endovars2, exovars)
    # Previous values to obtain lag values and first differences (lags + 1)
    dt_tail <- data.frame(tail(object$model[, c(endovars, exovars), drop = FALSE], object$lag + 1))
    new_data <- rbind(dt_tail, new_data)
    
    # Second step: get long rung relationship forecast (ECT term)
    ect_vars <- rownames(object$model.specific$beta)
    if ("const" %in% ect_vars) {
      new_data$const <- 1
    }
    ect_coeff <- object$model.specific$beta[, 1]
    new_data$ECT_0 <-
      apply(sweep(new_data[, ect_vars], MARGIN = 2, ect_coeff, `*`), MARGIN = 1, sum)
    # Get ECT-1 (Lag 1)
    new_data$ECT <- as.numeric(quantmod::Lag(new_data$ECT_0, 1))
    
    # Third step: get differences of the endogenus and exogenus variables provided in new_data
    diff_data <- apply(new_data[, c(endovars, exovars)], MARGIN = 2, diff)
    colnames(diff_data) <- paste0("DIFF_", c(endovars, exovars))
    diff_data <- rbind(NA, diff_data)
    new_data <- cbind(new_data, diff_data)
    
    # Fourth step: get x lags of the endogenus and exogenus variables
    for (k in 1:object$lag) {
      iter <- myLag(new_data[, paste0("DIFF_", endovars)], k)
      colnames(iter) <- paste0("DIFF_", endovars, " -", k)
      new_data <- cbind(new_data, iter)
    }
    
    # Fifth step: recursive calculatioon
    vecm_vars <- colnames(summary_vecm$bigcoefficients)
    if ("Intercept" %in% vecm_vars) {
      new_data$Intercept <- 1
    }
    vecm_vars[!(vecm_vars %in% c("ECT", "Intercept"))] <-
      paste0("DIFF_", vecm_vars[!(vecm_vars %in% c("ECT", "Intercept"))])
    equation <- paste("Equation", predicted_var)
    equation_coeff <- summary_vecm$coefficients[equation, ]
    predicted_var2 <- paste0("DIFF_", predicted_var)
    for (k in (object$lag + 2):nrow(new_data)) {
      # Estimate y_diff
      new_data[k, predicted_var2] <-
        sum(sweep(new_data[k, vecm_vars], MARGIN = 2, equation_coeff, `*`))
      # Estimate y_diff lags
      for (j in 1:object$lag) {
        new_data[, paste0(predicted_var2, " -", j)] <-
          as.numeric(quantmod::Lag(new_data[, predicted_var2], j))
      }
      # Estimate y
      new_data[k, predicted_var] <-
        new_data[(k - 1), predicted_var] + new_data[k, predicted_var2]
      # Estimate ECT
      new_data[k, "ECT_0"] <- sum(sweep(new_data[k, ect_vars], MARGIN = 2, ect_coeff, `*`))
      if (k < nrow(new_data)) {
        new_data[k + 1, "ECT"] <- new_data[k, "ECT_0"]
      }
    }
    predicted_values <- new_data[(object$lag + 2):nrow(new_data), predicted_var]
  } else {
    stop("You must provide a valid VECM model.")
  }
}

Projecao cimento_51 com X exogeno

# importing data
### short data
path <- "d:/Users/thomas.daher/Desktop/cimento/dados_parte1/"
file_name <- "Obras hidreletricas.xlsx"
sheet_name <- "serie_curta"
file_path <- paste(path, file_name, sep = "")
cimento <- read_excel(file_path, sheet = sheet_name)

cimento$Data <- as.Date(cimento$Data, format = "%Y/%m/%d")
cimento <- filter(cimento, Data >= as.Date("1990-09-01"))

###Creating dataset for regression and cointegration test
cimento_17 <- rev(diff(ts(log(cimento$cimento17), start = c(1990, 9), frequency = 12)))
cimento_51 <- rev(diff(ts(log(cimento$cimento51), start = c(1990, 9), frequency = 12)))
dset <- cbind(cimento_17,cimento_51)

### long data
sheet_name <- "serie_longa"
file_path <- paste(path, file_name, sep = "")
cimento <- read_excel(file_path, sheet = sheet_name)

cimento_long <- read_excel(file_path, sheet = sheet_name)
cimento_long$Data <- as.Date(cimento_long$Data, format = "%Y/%m/%d")

cimento_17_long <- rev(diff(ts(log(cimento_long$cimento17[-1]), start = c(1979, 5), frequency = 12)))
cimento_51_long <- rev(diff(ts(log(cimento_long$cimento51[-1]), start = c(1979, 5), frequency = 12)))
dset2 <- cbind(cimento_17 = cimento_17_long,cimento_51 = cimento_51_long)


### Optimal number number of lags
lagselect <- VARselect(dset, lag.max = 12, type = "const")
lagselect$selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      4      1      1      4
lagselect$criteria
##                    1             2             3             4             5
## AIC(n) -1.292406e+01 -1.292366e+01 -1.293839e+01 -1.298170e+01 -1.297742e+01
## HQ(n)  -1.285686e+01 -1.281165e+01 -1.278158e+01 -1.278009e+01 -1.273100e+01
## SC(n)  -1.275741e+01 -1.264590e+01 -1.254953e+01 -1.248174e+01 -1.236635e+01
## FPE(n)  2.438777e-06  2.440202e-06  2.405484e-06  2.305158e-06  2.317639e-06
##                    6             7             8             9            10
## AIC(n) -1.294280e+01 -1.297595e+01 -1.296270e+01 -1.290029e+01 -1.292475e+01
## HQ(n)  -1.265158e+01 -1.263992e+01 -1.258187e+01 -1.247466e+01 -1.245431e+01
## SC(n)  -1.222063e+01 -1.214268e+01 -1.201833e+01 -1.184481e+01 -1.175817e+01
## FPE(n)  2.403158e-06  2.329938e-06  2.367882e-06  2.529731e-06  2.480048e-06
##                   11            12
## AIC(n) -1.288170e+01 -1.282395e+01
## HQ(n)  -1.236647e+01 -1.226391e+01
## SC(n)  -1.160402e+01 -1.143517e+01
## FPE(n)  2.603809e-06  2.777469e-06
ctest1t <- ca.jo(dset, type = "trace", ecdet = "const", K = 4)
summary(ctest1t)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1] 3.486510e-01 4.356517e-02 4.582043e-19
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 1 |  4.37  7.52  9.24 12.97
## r = 0  | 46.38 17.85 19.96 24.60
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##               cimento_17.l4 cimento_51.l4   constant
## cimento_17.l4   1.000000000     1.0000000  1.0000000
## cimento_51.l4  -1.027973518    -0.4145711 -0.6987698
## constant        0.002829715    -0.0763558  0.1746788
## 
## Weights W:
## (This is the loading matrix)
## 
##              cimento_17.l4 cimento_51.l4      constant
## cimento_17.d    -0.9441770   -0.15655702 -1.011588e-16
## cimento_51.d     0.5966696   -0.09515987  8.276229e-17
ctest1e <- ca.jo(dset, type = "eigen", ecdet = "const", K = 2)
summary(ctest1e)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: maximal eigenvalue statistic (lambda max) , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1]  3.998295e-01  4.303795e-02 -2.166081e-19
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 1 |  4.40  7.52  9.24 12.97
## r = 0  | 51.05 13.75 15.67 20.20
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##               cimento_17.l2 cimento_51.l2 constant
## cimento_17.l2  1.0000000000      1.000000 1.000000
## cimento_51.l2 -1.0083809013      9.443679 1.597378
## constant       0.0008298347     -1.290361 2.192852
## 
## Weights W:
## (This is the loading matrix)
## 
##              cimento_17.l2 cimento_51.l2      constant
## cimento_17.d     -1.069497  -0.007963347 -1.800252e-18
## cimento_51.d      0.203797  -0.005561011 -5.114677e-19
### VECM

Model1 <- VECM(dset, 1, r = 1, estim = "2OLS", include = "const")
summary(Model1)
## #############
## ###Model VECM 
## #############
## Full sample size: 102    End sample size: 100
## Number of variables: 2   Number of estimated slope parameters 8
## AIC -1310.296    BIC -1286.85    SSR 0.4759895
## Cointegrating vector (estimated by 2OLS):
##    cimento_17 cimento_51
## r1          1  -1.001177
## 
## 
##                     ECT                Intercept          cimento_17 -1     
## Equation cimento_17 -1.0884(0.2747)*** 0.0013(0.0060)     0.1284(0.1738)    
## Equation cimento_51 0.1896(0.1703)     0.0010(0.0037)     0.0920(0.1078)    
##                     cimento_51 -1     
## Equation cimento_17 0.1222(0.1883)    
## Equation cimento_51 0.1596(0.1168)
coef(Model1)
##                            ECT   Intercept cimento_17 -1 cimento_51 -1
## Equation cimento_17 -1.0883615 0.001349639    0.12844130     0.1222050
## Equation cimento_51  0.1896342 0.001001921    0.09202481     0.1596238
#Diagnostic Tests

#Need to Transform VECM to VAR

Model1VAR <- vec2var(ctest1t, r = 1)

#Serial Correlation

Serial1 <- serial.test(Model1VAR, lags.pt = 2, type = "PT.asymptotic")
## Warning in pchisq(STATISTIC, df = PARAMETER): NaNs produzidos

## Warning in pchisq(STATISTIC, df = PARAMETER): NaNs produzidos
Serial1
## 
##  Portmanteau Test (asymptotic)
## 
## data:  Residuals of VAR object Model1VAR
## Chi-squared = 2.3317, df = -6, p-value = NA
#ARCH Effects

Arch1 <- arch.test(Model1VAR, lags.multi = 2, multivariate.only = TRUE)
Arch1
## 
##  ARCH (multivariate)
## 
## data:  Residuals of VAR object Model1VAR
## Chi-squared = 54.181, df = 18, p-value = 1.72e-05
#Normality of Residuals

Norm1 <- normality.test(Model1VAR, multivariate.only = TRUE)
Norm1
## $JB
## 
##  JB-Test (multivariate)
## 
## data:  Residuals of VAR object Model1VAR
## Chi-squared = 401.72, df = 4, p-value < 2.2e-16
## 
## 
## $Skewness
## 
##  Skewness only (multivariate)
## 
## data:  Residuals of VAR object Model1VAR
## Chi-squared = 77.294, df = 2, p-value < 2.2e-16
## 
## 
## $Kurtosis
## 
##  Kurtosis only (multivariate)
## 
## data:  Residuals of VAR object Model1VAR
## Chi-squared = 324.43, df = 2, p-value < 2.2e-16
#Impulse Response Functions

cimento51irf <- irf(Model1VAR, impulse = "cimento_17", response = "cimento_51", n.ahead = 20, boot = TRUE)
plot(cimento51irf, ylab = "cimento51", main = "cimento_17 shock to cimento_51")

cimento17irf <- irf(Model1VAR, impulse = "cimento_51", response = "cimento_17", n.ahead = 20, boot = TRUE)
plot(cimento17irf, ylab = "cimento17", main = "cimento_51 shock to cimento_17")

#Variance Decomposition

FEVD1 <- fevd(Model1VAR, n.ahead = 10)
plot(FEVD1)

### VECM forecast

cimento51_barra_diff <- rev(predict.vecm(Model1, new_data = dset2 , predicted_var = "cimento_51"))

Analise cimento_51_barra

# desrevertendo a série 51

plot(cimento51_barra_diff)

cimento51_barra_diff <- exp(cimento51_barra_diff)
cimento_51_barra <- cumsum(cimento51_barra_diff)
plot(cimento_51_barra)

#cimento_51_barra <- exp(cimento_51_barra)
  
df <- as.data.frame(cbind( cimento17 = (rev(cimento_17_long)),cimento51bar = cimento51_barra_diff))
  
  ggplot(df, aes(x = 1:238)) +
  geom_line(aes(y = cimento17, color = "cimento_17"), size = 1) +
  geom_line(aes(y = cimento51bar, color = "cimento_51bar"), size = 1)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# desrevertendo a série 17

cimento_17_long2 <- rev(cimento_17_long)
cimento_17_long2 <- exp(cimento_17_long2)
cimento_17_long2 <- cumsum(cimento_17_long2)

#Plot

df <- as.data.frame(cbind(cimento_51_barra,cimento_17_long2))

### plots comparativos
ggplot(df, aes(x = 1:238)) +
  geom_line(aes(y = cimento_17_long2, color = "cimento_17"), size = 1) +
  geom_line(aes(y = cimento_51_barra, color = "cimento_51_barra"), size = 1) +
  labs(title = "série 17 contra a 51 com realizado e estimado", y = "valor") +
  scale_color_manual(values = c("cimento_17" = "blue", "cimento_51_barra" = "red")) +
  theme_minimal()

### Somente o projetado

df <- df[1:(nrow(df) - 102), ]

ggplot(df, aes(x = 1:(136))) +
  geom_line(aes(y = cimento_17_long2, color = "cimento_17"), size = 1) +
  geom_line(aes(y = cimento_51_barra, color = "cimento_51_barra"), size = 1) +
  labs(title = "séries plotando somente o projetado versus realizado cimento 17", y = "valor") +
  scale_color_manual(values = c("cimento_17" = "blue", "cimento_51_barra" = "red"))

adf.test((cimento_51_barra))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  (cimento_51_barra)
## Dickey-Fuller = -1.8826, Lag order = 6, p-value = 0.6252
## alternative hypothesis: stationary
adf.test(diff(cimento_51_barra))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(cimento_51_barra)
## Dickey-Fuller = -2.4977, Lag order = 6, p-value = 0.3666
## alternative hypothesis: stationary
plot(c(cimento_51_barra - cimento_17_long2))

# Parte2

Nova projeção do VECM

# Antes de realizar a projeção do VECM devemos nos certificar de alguns fatores:
## Todas as séries são estacionárias em primeira diferença
## Determinar o lag ótimo (p) para o modelo
## Realizar o teste de Johansen para lag (p)
## Estimar o VAR irrestrito
## Com cointegração especificar o VECM com (p - l) Lags

### Apagando os dados
rm(list = ls(all.names = TRUE))
gc()
##           used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 3010107 160.8    4741034 253.2  4741034 253.2
## Vcells 5158963  39.4   10146329  77.5  8388608  64.0
## importando dados

path <- "d:/Users/thomas.daher/Desktop/cimento/dados_parte1/"
file_name <- "Obras hidreletricas.xlsx"
sheet_name <- "serie_curta"
file_path <- paste(path, file_name, sep = "")
cimento <- read_excel(file_path, sheet = sheet_name)

cimento$Data <- as.Date(cimento$Data, format = "%Y/%m/%d")
cimento <- filter(cimento, Data >= as.Date("1990-09-01"))

# Análise de estacionariedade das séries em primeira diferença

## Primeiro o teste o sem tirar a diferença, somente a invertendo e transformando em série temporal

cimento_17 <- rev((ts((cimento$cimento17), start = c(1990, 9), frequency = 12)))
cimento_51 <- rev((ts((cimento$cimento51), start = c(1990, 9), frequency = 12)))
plot(cimento_17)

plot(cimento_51)

adf.test(cimento_17)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  cimento_17
## Dickey-Fuller = -2.165, Lag order = 4, p-value = 0.5083
## alternative hypothesis: stationary
adf.test(cimento_51)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  cimento_51
## Dickey-Fuller = -2.065, Lag order = 4, p-value = 0.5497
## alternative hypothesis: stationary
## Vemos que não são estacionárias, portanto vamos agora tirar a primeira diferença.

cimento_17 <- rev(diff(ts((cimento$cimento17), start = c(1990, 9), frequency = 12)))
cimento_51 <- rev(diff(ts((cimento$cimento51), start = c(1990, 9), frequency = 12)))
plot(cimento_17)

plot(cimento_51)

adf.test(cimento_17)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  cimento_17
## Dickey-Fuller = -3.3675, Lag order = 4, p-value = 0.06372
## alternative hypothesis: stationary
adf.test(cimento_51)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  cimento_51
## Dickey-Fuller = -3.1483, Lag order = 4, p-value = 0.1007
## alternative hypothesis: stationary
## P_value série 17 = 0.0637 e da série 51 = 0.1007, considero que a 10% de confiança as séries são estacionárias em primeira diferença.

# Devemos agora determinar o lag ótimo para o modelo

dset <- cbind(cimento_17,cimento_51)

lagselect <- VARselect(dset, lag.max = 12, type = "const")
lagselect$selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      2      2      2      2
lagselect$criteria
##                   1            2            3            4            5
## AIC(n) 4.472896e+01 4.451279e+01 4.452653e+01 4.459971e+01 4.463361e+01
## HQ(n)  4.479616e+01 4.462480e+01 4.468334e+01 4.480133e+01 4.488002e+01
## SC(n)  4.489561e+01 4.479055e+01 4.491539e+01 4.509968e+01 4.524467e+01
## FPE(n) 2.664168e+19 2.146642e+19 2.177212e+19 2.344185e+19 2.427702e+19
##                   6            7            8            9           10
## AIC(n) 4.468770e+01 4.466925e+01 4.472872e+01 4.475248e+01 4.481572e+01
## HQ(n)  4.497892e+01 4.500527e+01 4.510955e+01 4.517810e+01 4.528615e+01
## SC(n)  4.540987e+01 4.550252e+01 4.567309e+01 4.580795e+01 4.598229e+01
## FPE(n) 2.566777e+19 2.525421e+19 2.687967e+19 2.762815e+19 2.956815e+19
##                  11           12
## AIC(n) 4.462365e+01 4.464467e+01
## HQ(n)  4.513888e+01 4.520471e+01
## SC(n)  4.590133e+01 4.603346e+01
## FPE(n) 2.453953e+19 2.523222e+19
## Para todos os testes o Lag de 2 é o mais adequando a estas duas séries

# Realizar o teste de Johansen para lag (p), que no nosso caso p = 2.

ctest1e <-ca.jo(dset, type = c("eigen"), ecdet = c("none"), K = 2,
spec=c("longrun"), season = NULL, dumvar = NULL)
summary(ctest1e)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: maximal eigenvalue statistic (lambda max) , with linear trend 
## 
## Eigenvalues (lambda):
## [1] 0.4054626 0.1331663
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 1 | 14.29  6.50  8.18 11.65
## r = 0  | 52.00 12.91 14.90 19.19
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##               cimento_17.l2 cimento_51.l2
## cimento_17.l2      1.000000       1.00000
## cimento_51.l2     -1.089155      -3.90878
## 
## Weights W:
## (This is the loading matrix)
## 
##              cimento_17.l2 cimento_51.l2
## cimento_17.d   -1.37773904    0.09818396
## cimento_51.d    0.04349453    0.05601999
# Estimar o VAR irrestrito

modelo_var_irrestrito <- VAR(dset, p = 2, type = "none")
summary(modelo_var_irrestrito)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: cimento_17, cimento_51 
## Deterministic variables: none 
## Sample size: 100 
## Log Likelihood: -2503.352 
## Roots of the characteristic polynomial:
## 0.6565 0.4386 0.3843 0.3843
## Call:
## VAR(y = dset, p = 2, type = "none")
## 
## 
## Estimation results for equation cimento_17: 
## =========================================== 
## cimento_17 = cimento_17.l1 + cimento_51.l1 + cimento_17.l2 + cimento_51.l2 
## 
##               Estimate Std. Error t value Pr(>|t|)    
## cimento_17.l1  0.40729    0.16939   2.404 0.018116 *  
## cimento_51.l1  1.07871    0.34453   3.131 0.002309 ** 
## cimento_17.l2 -0.71201    0.20193  -3.526 0.000649 ***
## cimento_51.l2  0.08936    0.18254   0.490 0.625569    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 127100 on 96 degrees of freedom
## Multiple R-Squared: 0.6043,  Adjusted R-squared: 0.5878 
## F-statistic: 36.65 on 4 and 96 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation cimento_51: 
## =========================================== 
## cimento_51 = cimento_17.l1 + cimento_51.l1 + cimento_17.l2 + cimento_51.l2 
## 
##               Estimate Std. Error t value Pr(>|t|)    
## cimento_17.l1  0.46514    0.08024   5.797 8.61e-08 ***
## cimento_51.l1  0.78405    0.16321   4.804 5.73e-06 ***
## cimento_17.l2 -0.38474    0.09566  -4.022 0.000115 ***
## cimento_51.l2 -0.01144    0.08647  -0.132 0.894991    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 60200 on 96 degrees of freedom
## Multiple R-Squared: 0.8715,  Adjusted R-squared: 0.8662 
## F-statistic: 162.8 on 4 and 96 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##            cimento_17 cimento_51
## cimento_17  1.606e+10  6.089e+09
## cimento_51  6.089e+09  3.573e+09
## 
## Correlation matrix of residuals:
##            cimento_17 cimento_51
## cimento_17     1.0000     0.8038
## cimento_51     0.8038     1.0000
# Camo há cointegração visto no teste ca.jo(), devemos especificar o VECM com (p - l) Lags

vecm_model <- VECM(
  dset,
  2,
  r = 1,
  include = c("none"),
  beta = NULL,
  estim = c("ML"),
  LRinclude = c("none"),
  exogen = NULL
)
summary(vecm_model)
## #############
## ###Model VECM 
## #############
## Full sample size: 102    End sample size: 99
## Number of variables: 2   Number of estimated slope parameters 10
## AIC 4423.837     BIC 4452.383    SSR 2.070318e+12
## Cointegrating vector (estimated by ML):
##    cimento_17 cimento_51
## r1          1  -1.116972
## 
## 
##                     ECT                cimento_17 -1     cimento_51 -1     
## Equation cimento_17 -1.1077(0.4116)**  0.5536(0.3293).   0.0798(0.4047)    
## Equation cimento_51 0.1153(0.1957)     0.3695(0.1565)*   0.0130(0.1924)    
##                     cimento_17 -2       cimento_51 -2      
## Equation cimento_17 -0.1637(0.2300)     -0.1965(0.1853)    
## Equation cimento_51 -0.0503(0.1093)     -0.0254(0.0881)
predicted <- predict(vecm_model, n.ahead= 1)


### long data import
sheet_name <- "serie_longa"
file_path <- paste(path, file_name, sep = "")
cimento_long <- read_excel(file_path, sheet = sheet_name)
cimento_long$Data <- as.Date(cimento_long$Data, format = "%Y/%m/%d")
cimento_17_long <- rev(diff(ts((cimento_long$cimento17[-1]), start = c(1979, 5), frequency = 12)))

### projeção dos vlores para cimento 51

num_predictions <- (length(cimento_17_long) - length(cimento_51))

Projeção

#Variável X de ajuste

exogenous <- cimento_17_long
exogenous <- tail(exogenous, n = num_predictions)

df <- dset

# Loop to iteratively predict and update data
for (i in 1:num_predictions) {
  
  vecm_model <- VECM(
    df,
    2,
    r = 1,
    include = c("none"),
    beta = NULL,
    estim = c("2OLS"),
    LRinclude = c("none"),
    exogen = NULL
  )

  predicted <- as.data.frame(predict(vecm_model, n.ahead = 1))
  
  # Update the original data with the predicted value
  df <- rbind(df, predicted)
  
  # changing the last row value to 1
  num_rows <- nrow(df)
  
  # Check if 'cimento_17' is a column in df
  if ("cimento_17" %in% colnames(df)) {
    df$cimento_17[num_rows] <- head(exogenous, 1)
  } else {
    print("coluna cimento_17 nao encontrada.")
  }
  
  exogenous <- exogenous[-1]
}

Analisando a nova série de cimento

plot(rev(df$cimento_51) %>% cumsum())

plot(rev(df$cimento_17) %>% cumsum())

# desrevertendo e transformando em série novamente

cimento_17 <- rev(df$cimento_17) %>% cumsum()
cimento_51 <- rev(df$cimento_51) %>% cumsum()

df <- as.data.frame(cbind(cimento_17,cimento_51))

df2 <- df[1:(nrow(df) - 102), ]

ggplot(df2, aes(x = 1:(136))) +
  geom_line(aes(y = cimento_17, color = "cimento_17"), size = 1) +
  geom_line(aes(y = cimento_51, color = "cimento_51_barra"), size = 1) +
  labs(title = "Séries plotando somente o projetado versus realizado cimento 17", y = "valor") +
  scale_color_manual(values = c("cimento_17" = "blue", "cimento_51_barra" = "red"))

ggplot(df, aes(x = 1:238)) +
  geom_line(aes(y = cimento_17, color = "cimento_17"), size = 1) +
  geom_line(aes(y = cimento_51, color = "cimento_51_barra"), size = 1) +
  labs(title = "série 17 contra a 51 com realizado e estimado", y = "valor") +
  scale_color_manual(values = c("cimento_17" = "blue", "cimento_51_barra" = "red")) +
  theme_minimal()