#Importando dados e formatando a data
path <- "d:/Users/thomas.daher/Desktop/cimento/dados_parte1/"
file_name <- "Obras hidreletricas.xlsx"
sheet_name <- "serie_curta"
file_path <- paste(path, file_name, sep = "")
cimento <- read_excel(file_path, sheet = sheet_name)
cimento$Data <- as.Date(cimento$Data, format = "%Y/%m/%d")
cimento <- filter(cimento, Data >= as.Date("1990-09-01"))
# regredindo a serie 51 em primeira diferenca contra a 17 em primeira diferenca e ambas series em (t-1)
formula <- "diff(cimento51) ~ 0 + `cimento51_t-1`[-1] + `cimento17_t-1`[-1] + diff(cimento17)"
summary(lm(data = cimento, formula = formula))
##
## Call:
## lm(formula = formula, data = cimento)
##
## Residuals:
## Min 1Q Median 3Q Max
## -432333 -11846 18 14545 282935
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## `cimento51_t-1`[-1] -0.06419 0.04623 -1.389 0.168
## `cimento17_t-1`[-1] 0.06271 0.04320 1.452 0.150
## diff(cimento17) 0.75181 0.03773 19.928 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 71160 on 99 degrees of freedom
## Multiple R-squared: 0.8153, Adjusted R-squared: 0.8097
## F-statistic: 145.7 on 3 and 99 DF, p-value: < 2.2e-16
# Plot das variacoes
ggplot(cimento, aes(x = Data)) +
geom_line(aes(y = delta_51t, color = "Delta 51"), linetype = "solid") +
geom_line(aes(y = delta_17, color = "Delta 17"), linetype = "solid") +
labs(
x = "Data",
y = "Variação",
title = "Variação de cada série"
) +
scale_color_manual(
values = c("Delta 51" = "blue", "Delta 17" = "red"),
labels = c("Delta 51", "Delta 17")
) +
theme_minimal() +
theme(
plot.title = element_text(size = 18, hjust = 0.5),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.title = element_blank(),
legend.text = element_text(size = 10)
)
# Regressao de uma variacao contra a outra
formula <- "delta_51t ~ delta_17"
OLS <- (lm(data = cimento, formula = formula))
summary(OLS)
##
## Call:
## lm(formula = formula, data = cimento)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.15759 -0.00765 0.00351 0.02041 0.18851
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.12599 0.08913 1.414 0.161
## delta_17 0.87671 0.07855 11.161 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1254 on 101 degrees of freedom
## Multiple R-squared: 0.5523, Adjusted R-squared: 0.5478
## F-statistic: 124.6 on 1 and 101 DF, p-value: < 2.2e-16
# Transformando em serie temporal
cimento_17 <- ts(cimento$cimento17)
cimento_51 <- ts(cimento$cimento51)
# Realizando teste de Dickey-Fuller aumentado
adf.test(diff(cimento_17))
##
## Augmented Dickey-Fuller Test
##
## data: diff(cimento_17)
## Dickey-Fuller = -3.3697, Lag order = 4, p-value = 0.06337
## alternative hypothesis: stationary
adf.test(diff(cimento_51))
##
## Augmented Dickey-Fuller Test
##
## data: diff(cimento_51)
## Dickey-Fuller = -3.1684, Lag order = 4, p-value = 0.09691
## alternative hypothesis: stationary
# Plotando as series e suas primeiras diferenças
plot(cimento_17)
plot(diff(cimento_17))
plot(cimento_51)
plot(diff(cimento_51))
# Criando a regressao de uma serie contra a outra
formula <- "diff(cimento51) ~ diff(cimento17)"
linear_model <- lm(data = cimento, formula = formula)
summary(linear_model)
##
## Call:
## lm(formula = formula, data = cimento)
##
## Residuals:
## Min 1Q Median 3Q Max
## -422787 -13347 -12493 12064 286771
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.335e+04 7.315e+03 1.824 0.0711 .
## diff(cimento17) 7.294e-01 3.732e-02 19.543 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 71030 on 100 degrees of freedom
## Multiple R-squared: 0.7925, Adjusted R-squared: 0.7904
## F-statistic: 381.9 on 1 and 100 DF, p-value: < 2.2e-16
plot(linear_model)
# Teste adf dos resíduos da série
adf.test(residuals(linear_model))
## Warning in adf.test(residuals(linear_model)): p-value smaller than printed
## p-value
##
## Augmented Dickey-Fuller Test
##
## data: residuals(linear_model)
## Dickey-Fuller = -4.1612, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
plot(residuals(linear_model))
# Plot da diferenca entre as series e da diferenca das diferençcs
plot((cimento$cimento17) - (cimento$cimento51))
plot(diff(cimento$cimento17) - diff(cimento$cimento51))
### Cointegracao
# Transformando em series temporais, tirando a primeira diferença e invertendo para poder projetar o passado.
ts_data17 <- rev(diff(ts(cimento$cimento17, start = 1990-10-01, frequency = 12)))
ts_data51 <- rev(diff(ts(cimento$cimento51, start = 1990-10-01, frequency = 12)))
# Teste de estacionariedade usando Augmented Dickey-Fuller (ADF) test
adf_test17 <- ur.df(ts_data17, type = "trend", lags = 0)
adf_test51 <- ur.df(ts_data51, type = "trend", lags = 0)
summary(adf_test17)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -148792 -24970 -14213 -304 1232234
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.701e+04 2.706e+04 0.629 0.530974
## z.lag.1 -2.829e-01 7.041e-02 -4.018 0.000115 ***
## tt -3.018e+01 4.572e+02 -0.066 0.947498
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 133900 on 98 degrees of freedom
## Multiple R-squared: 0.1416, Adjusted R-squared: 0.1241
## F-statistic: 8.084 on 2 and 98 DF, p-value: 0.000563
##
##
## Value of test-statistic is: -4.0181 5.3891 8.0836
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2 6.22 4.75 4.07
## phi3 8.43 6.49 5.47
summary(adf_test51)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -155633 -24057 -6849 753 596794
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.073e+04 1.750e+04 0.613 0.54098
## z.lag.1 -1.619e-01 5.519e-02 -2.934 0.00416 **
## tt -4.725e+01 2.935e+02 -0.161 0.87245
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 86000 on 98 degrees of freedom
## Multiple R-squared: 0.08097, Adjusted R-squared: 0.06222
## F-statistic: 4.317 on 2 and 98 DF, p-value: 0.01596
##
##
## Value of test-statistic is: -2.9344 2.8786 4.3173
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2 6.22 4.75 4.07
## phi3 8.43 6.49 5.47
# Criando um dataframe para poder rodar o teste Johansen
df <- data.frame(cimento17 = ts_data17, cimento51 = ts_data51)
#Esta função é usada para estimar modelos de vetor de correção de erros (VEC) para séries temporais cointegradas. Ela é usada após a confirmação de cointegração usando o teste ADF.
cointegration_test1 <- ca.jo(df, ecdet = "const", type="trace", K=2, spec="longrun",
season= NULL)
summary(cointegration_test1)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: trace statistic , without linear trend and constant in cointegration
##
## Eigenvalues (lambda):
## [1] 4.055524e-01 1.332928e-01 5.551115e-16
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 1 | 14.31 7.52 9.24 12.97
## r = 0 | 66.32 17.85 19.96 24.60
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## cimento17.l2 cimento51.l2 constant
## cimento17.l2 1.000000 1.000000 1.000000e+00
## cimento51.l2 -1.089209 -3.931628 -1.511185e+00
## constant 2401.706044 142049.377851 1.874997e+06
##
## Weights W:
## (This is the loading matrix)
##
## cimento17.l2 cimento51.l2 constant
## cimento17.d -1.37697591 0.09742083 -1.896898e-17
## cimento51.d 0.04394564 0.05556889 -1.042128e-17
cointegration_test2 <- ca.jo(df, ecdet = "const", type="eigen", K=2, spec="longrun",
season= NULL)
summary(cointegration_test2)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: maximal eigenvalue statistic (lambda max) , without linear trend and constant in cointegration
##
## Eigenvalues (lambda):
## [1] 4.055524e-01 1.332928e-01 5.551115e-16
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 1 | 14.31 7.52 9.24 12.97
## r = 0 | 52.01 13.75 15.67 20.20
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## cimento17.l2 cimento51.l2 constant
## cimento17.l2 1.000000 1.000000 1.000000e+00
## cimento51.l2 -1.089209 -3.931628 -1.511185e+00
## constant 2401.706044 142049.377851 1.874997e+06
##
## Weights W:
## (This is the loading matrix)
##
## cimento17.l2 cimento51.l2 constant
## cimento17.d -1.37697591 0.09742083 -1.896898e-17
## cimento51.d 0.04394564 0.05556889 -1.042128e-17
# cajools,Essa função retorna as regressões OLS de um VECM irrestrito, ou seja, ela retorna um objeto da classe lm. O usuário pode especificar o número exato de qual equação no VECM deve ser estimada e relatada, ou, se "reg.number=NULL", cada equação no VECM será estimada e os resultados correspondentes serão relatados. Neste caso a segunda serie, "R = 2" é o cimento 51.
cointegrating_vectors <- cajools(cointegration_test1, r = NULL)
summary(cointegrating_vectors)
## Response cimento17.d :
##
## Call:
## lm(formula = cimento17.d ~ cimento17.dl1 + cimento51.dl1 + cimento17.l2 +
## cimento51.l2 + constant - 1, data = data.mat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -169590 -26962 -10223 8519 1115183
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## cimento17.dl1 -0.5882 0.1698 -3.463 0.000803 ***
## cimento51.dl1 1.0480 0.3476 3.015 0.003293 **
## cimento17.l2 -1.2796 0.3049 -4.196 6.1e-05 ***
## cimento51.l2 1.1168 0.3521 3.172 0.002039 **
## constant 10531.4765 13646.7388 0.772 0.442195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 127300 on 95 degrees of freedom
## Multiple R-squared: 0.2474, Adjusted R-squared: 0.2078
## F-statistic: 6.245 on 5 and 95 DF, p-value: 4.739e-05
##
##
## Response cimento51.d :
##
## Call:
## lm(formula = cimento51.d ~ cimento17.dl1 + cimento51.dl1 + cimento17.l2 +
## cimento51.l2 + constant - 1, data = data.mat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -113654 -11998 -7976 8112 418389
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## cimento17.dl1 0.46857 0.08006 5.853 6.88e-08 ***
## cimento51.dl1 -0.23931 0.16383 -1.461 0.147
## cimento17.l2 0.09951 0.14374 0.692 0.490
## cimento51.l2 -0.26634 0.16596 -1.605 0.112
## constant 7999.07040 6432.82105 1.243 0.217
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 60030 on 95 degrees of freedom
## Multiple R-squared: 0.565, Adjusted R-squared: 0.5421
## F-statistic: 24.67 on 5 and 95 DF, p-value: 7.534e-16
cointegrating_vectors <- cajools(cointegration_test2, r = 2)
summary(cointegrating_vectors)
##
## Call:
## lm(formula = substitute(form1), data = data.mat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -113654 -11998 -7976 8112 418389
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## cimento17.dl1 0.46857 0.08006 5.853 6.88e-08 ***
## cimento51.dl1 -0.23931 0.16383 -1.461 0.147
## cimento17.l2 0.09951 0.14374 0.692 0.490
## cimento51.l2 -0.26634 0.16596 -1.605 0.112
## constant 7999.07040 6432.82105 1.243 0.217
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 60030 on 95 degrees of freedom
## Multiple R-squared: 0.565, Adjusted R-squared: 0.5421
## F-statistic: 24.67 on 5 and 95 DF, p-value: 7.534e-16
plot(cointegrating_vectors$residuals)
as.character(formula(cointegrating_vectors))
## [1] "~"
## [2] "z@Z0[, reg.number]"
## [3] "cimento17.dl1 + cimento51.dl1 + cimento17.l2 + cimento51.l2 + constant - 1"
cointegrating_vectors$coefficients
## cimento17.dl1 cimento51.dl1 cimento17.l2 cimento51.l2 constant
## 0.46857410 -0.23930919 0.09951453 -0.26634214 7999.07039842
### My lag function
myLag <- function(data, lag) {
data.frame(unclass(data[c(rep(NA, lag), 1:(nrow(data) - lag)), ]))
}
### Predict VECM funcition
predict.vecm <- function(object, new_data, predicted_var){
if (inherits(object, "VECM")) {
# Just valid for VECM models
# Get endogenus and exogenus variables
summary_vecm <- summary(object)
model_vars <- colnames(object$model)
endovars <- sub("Equation ", "", rownames(summary_vecm$bigcoefficients))
if (!(predicted_var %in% endovars)) {
stop("You must provide a valid endogenus variable.")
}
exovars <- NULL
if (object$exogen) {
ind_endovars <-
unlist(sapply(endovars, function(x) grep(x, model_vars), simplify = FALSE))
exovars <- model_vars[-ind_endovars]
exovars <- exovars[exovars != "ECT"]
}
# First step: join new_data and (lags + 1) last values from the calibration data
new_data <- data.frame(new_data)
if (!all(colnames(new_data) %in% c(endovars, exovars))) {
stop("new_data must have valid endogenus or exogenus column names.")
}
# Endovars but the one desired to be predicted
endovars2 <- endovars[endovars != predicted_var]
# if (!all(colnames(new_data) %in% c(endovars2, exovars))) {
# stop("new_data must have valid endogenus (all but the one desired to predict) or exogenus column names.")
# }
new_data <- new_data[, c(endovars2, exovars), drop = FALSE]
new_data <- cbind(NA, new_data)
colnames(new_data) <- c(predicted_var, endovars2, exovars)
# Previous values to obtain lag values and first differences (lags + 1)
dt_tail <- data.frame(tail(object$model[, c(endovars, exovars), drop = FALSE], object$lag + 1))
new_data <- rbind(dt_tail, new_data)
# Second step: get long rung relationship forecast (ECT term)
ect_vars <- rownames(object$model.specific$beta)
if ("const" %in% ect_vars) {
new_data$const <- 1
}
ect_coeff <- object$model.specific$beta[, 1]
new_data$ECT_0 <-
apply(sweep(new_data[, ect_vars], MARGIN = 2, ect_coeff, `*`), MARGIN = 1, sum)
# Get ECT-1 (Lag 1)
new_data$ECT <- as.numeric(quantmod::Lag(new_data$ECT_0, 1))
# Third step: get differences of the endogenus and exogenus variables provided in new_data
diff_data <- apply(new_data[, c(endovars, exovars)], MARGIN = 2, diff)
colnames(diff_data) <- paste0("DIFF_", c(endovars, exovars))
diff_data <- rbind(NA, diff_data)
new_data <- cbind(new_data, diff_data)
# Fourth step: get x lags of the endogenus and exogenus variables
for (k in 1:object$lag) {
iter <- myLag(new_data[, paste0("DIFF_", endovars)], k)
colnames(iter) <- paste0("DIFF_", endovars, " -", k)
new_data <- cbind(new_data, iter)
}
# Fifth step: recursive calculatioon
vecm_vars <- colnames(summary_vecm$bigcoefficients)
if ("Intercept" %in% vecm_vars) {
new_data$Intercept <- 1
}
vecm_vars[!(vecm_vars %in% c("ECT", "Intercept"))] <-
paste0("DIFF_", vecm_vars[!(vecm_vars %in% c("ECT", "Intercept"))])
equation <- paste("Equation", predicted_var)
equation_coeff <- summary_vecm$coefficients[equation, ]
predicted_var2 <- paste0("DIFF_", predicted_var)
for (k in (object$lag + 2):nrow(new_data)) {
# Estimate y_diff
new_data[k, predicted_var2] <-
sum(sweep(new_data[k, vecm_vars], MARGIN = 2, equation_coeff, `*`))
# Estimate y_diff lags
for (j in 1:object$lag) {
new_data[, paste0(predicted_var2, " -", j)] <-
as.numeric(quantmod::Lag(new_data[, predicted_var2], j))
}
# Estimate y
new_data[k, predicted_var] <-
new_data[(k - 1), predicted_var] + new_data[k, predicted_var2]
# Estimate ECT
new_data[k, "ECT_0"] <- sum(sweep(new_data[k, ect_vars], MARGIN = 2, ect_coeff, `*`))
if (k < nrow(new_data)) {
new_data[k + 1, "ECT"] <- new_data[k, "ECT_0"]
}
}
predicted_values <- new_data[(object$lag + 2):nrow(new_data), predicted_var]
} else {
stop("You must provide a valid VECM model.")
}
}
# importing data
### short data
path <- "d:/Users/thomas.daher/Desktop/cimento/dados_parte1/"
file_name <- "Obras hidreletricas.xlsx"
sheet_name <- "serie_curta"
file_path <- paste(path, file_name, sep = "")
cimento <- read_excel(file_path, sheet = sheet_name)
cimento$Data <- as.Date(cimento$Data, format = "%Y/%m/%d")
cimento <- filter(cimento, Data >= as.Date("1990-09-01"))
###Creating dataset for regression and cointegration test
cimento_17 <- rev(diff(ts(log(cimento$cimento17), start = c(1990, 9), frequency = 12)))
cimento_51 <- rev(diff(ts(log(cimento$cimento51), start = c(1990, 9), frequency = 12)))
dset <- cbind(cimento_17,cimento_51)
### long data
sheet_name <- "serie_longa"
file_path <- paste(path, file_name, sep = "")
cimento <- read_excel(file_path, sheet = sheet_name)
cimento_long <- read_excel(file_path, sheet = sheet_name)
cimento_long$Data <- as.Date(cimento_long$Data, format = "%Y/%m/%d")
cimento_17_long <- rev(diff(ts(log(cimento_long$cimento17[-1]), start = c(1979, 5), frequency = 12)))
cimento_51_long <- rev(diff(ts(log(cimento_long$cimento51[-1]), start = c(1979, 5), frequency = 12)))
dset2 <- cbind(cimento_17 = cimento_17_long,cimento_51 = cimento_51_long)
### Optimal number number of lags
lagselect <- VARselect(dset, lag.max = 12, type = "const")
lagselect$selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 4 1 1 4
lagselect$criteria
## 1 2 3 4 5
## AIC(n) -1.292406e+01 -1.292366e+01 -1.293839e+01 -1.298170e+01 -1.297742e+01
## HQ(n) -1.285686e+01 -1.281165e+01 -1.278158e+01 -1.278009e+01 -1.273100e+01
## SC(n) -1.275741e+01 -1.264590e+01 -1.254953e+01 -1.248174e+01 -1.236635e+01
## FPE(n) 2.438777e-06 2.440202e-06 2.405484e-06 2.305158e-06 2.317639e-06
## 6 7 8 9 10
## AIC(n) -1.294280e+01 -1.297595e+01 -1.296270e+01 -1.290029e+01 -1.292475e+01
## HQ(n) -1.265158e+01 -1.263992e+01 -1.258187e+01 -1.247466e+01 -1.245431e+01
## SC(n) -1.222063e+01 -1.214268e+01 -1.201833e+01 -1.184481e+01 -1.175817e+01
## FPE(n) 2.403158e-06 2.329938e-06 2.367882e-06 2.529731e-06 2.480048e-06
## 11 12
## AIC(n) -1.288170e+01 -1.282395e+01
## HQ(n) -1.236647e+01 -1.226391e+01
## SC(n) -1.160402e+01 -1.143517e+01
## FPE(n) 2.603809e-06 2.777469e-06
ctest1t <- ca.jo(dset, type = "trace", ecdet = "const", K = 4)
summary(ctest1t)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: trace statistic , without linear trend and constant in cointegration
##
## Eigenvalues (lambda):
## [1] 3.486510e-01 4.356517e-02 4.582043e-19
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 1 | 4.37 7.52 9.24 12.97
## r = 0 | 46.38 17.85 19.96 24.60
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## cimento_17.l4 cimento_51.l4 constant
## cimento_17.l4 1.000000000 1.0000000 1.0000000
## cimento_51.l4 -1.027973518 -0.4145711 -0.6987698
## constant 0.002829715 -0.0763558 0.1746788
##
## Weights W:
## (This is the loading matrix)
##
## cimento_17.l4 cimento_51.l4 constant
## cimento_17.d -0.9441770 -0.15655702 -1.011588e-16
## cimento_51.d 0.5966696 -0.09515987 8.276229e-17
ctest1e <- ca.jo(dset, type = "eigen", ecdet = "const", K = 2)
summary(ctest1e)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: maximal eigenvalue statistic (lambda max) , without linear trend and constant in cointegration
##
## Eigenvalues (lambda):
## [1] 3.998295e-01 4.303795e-02 -2.166081e-19
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 1 | 4.40 7.52 9.24 12.97
## r = 0 | 51.05 13.75 15.67 20.20
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## cimento_17.l2 cimento_51.l2 constant
## cimento_17.l2 1.0000000000 1.000000 1.000000
## cimento_51.l2 -1.0083809013 9.443679 1.597378
## constant 0.0008298347 -1.290361 2.192852
##
## Weights W:
## (This is the loading matrix)
##
## cimento_17.l2 cimento_51.l2 constant
## cimento_17.d -1.069497 -0.007963347 -1.800252e-18
## cimento_51.d 0.203797 -0.005561011 -5.114677e-19
### VECM
Model1 <- VECM(dset, 1, r = 1, estim = "2OLS", include = "const")
summary(Model1)
## #############
## ###Model VECM
## #############
## Full sample size: 102 End sample size: 100
## Number of variables: 2 Number of estimated slope parameters 8
## AIC -1310.296 BIC -1286.85 SSR 0.4759895
## Cointegrating vector (estimated by 2OLS):
## cimento_17 cimento_51
## r1 1 -1.001177
##
##
## ECT Intercept cimento_17 -1
## Equation cimento_17 -1.0884(0.2747)*** 0.0013(0.0060) 0.1284(0.1738)
## Equation cimento_51 0.1896(0.1703) 0.0010(0.0037) 0.0920(0.1078)
## cimento_51 -1
## Equation cimento_17 0.1222(0.1883)
## Equation cimento_51 0.1596(0.1168)
coef(Model1)
## ECT Intercept cimento_17 -1 cimento_51 -1
## Equation cimento_17 -1.0883615 0.001349639 0.12844130 0.1222050
## Equation cimento_51 0.1896342 0.001001921 0.09202481 0.1596238
#Diagnostic Tests
#Need to Transform VECM to VAR
Model1VAR <- vec2var(ctest1t, r = 1)
#Serial Correlation
Serial1 <- serial.test(Model1VAR, lags.pt = 2, type = "PT.asymptotic")
## Warning in pchisq(STATISTIC, df = PARAMETER): NaNs produzidos
## Warning in pchisq(STATISTIC, df = PARAMETER): NaNs produzidos
Serial1
##
## Portmanteau Test (asymptotic)
##
## data: Residuals of VAR object Model1VAR
## Chi-squared = 2.3317, df = -6, p-value = NA
#ARCH Effects
Arch1 <- arch.test(Model1VAR, lags.multi = 2, multivariate.only = TRUE)
Arch1
##
## ARCH (multivariate)
##
## data: Residuals of VAR object Model1VAR
## Chi-squared = 54.181, df = 18, p-value = 1.72e-05
#Normality of Residuals
Norm1 <- normality.test(Model1VAR, multivariate.only = TRUE)
Norm1
## $JB
##
## JB-Test (multivariate)
##
## data: Residuals of VAR object Model1VAR
## Chi-squared = 401.72, df = 4, p-value < 2.2e-16
##
##
## $Skewness
##
## Skewness only (multivariate)
##
## data: Residuals of VAR object Model1VAR
## Chi-squared = 77.294, df = 2, p-value < 2.2e-16
##
##
## $Kurtosis
##
## Kurtosis only (multivariate)
##
## data: Residuals of VAR object Model1VAR
## Chi-squared = 324.43, df = 2, p-value < 2.2e-16
#Impulse Response Functions
cimento51irf <- irf(Model1VAR, impulse = "cimento_17", response = "cimento_51", n.ahead = 20, boot = TRUE)
plot(cimento51irf, ylab = "cimento51", main = "cimento_17 shock to cimento_51")
cimento17irf <- irf(Model1VAR, impulse = "cimento_51", response = "cimento_17", n.ahead = 20, boot = TRUE)
plot(cimento17irf, ylab = "cimento17", main = "cimento_51 shock to cimento_17")
#Variance Decomposition
FEVD1 <- fevd(Model1VAR, n.ahead = 10)
plot(FEVD1)
### VECM forecast
cimento51_barra_diff <- rev(predict.vecm(Model1, new_data = dset2 , predicted_var = "cimento_51"))
# desrevertendo a série 51
plot(cimento51_barra_diff)
cimento51_barra_diff <- exp(cimento51_barra_diff)
cimento_51_barra <- cumsum(cimento51_barra_diff)
plot(cimento_51_barra)
#cimento_51_barra <- exp(cimento_51_barra)
df <- as.data.frame(cbind( cimento17 = (rev(cimento_17_long)),cimento51bar = cimento51_barra_diff))
ggplot(df, aes(x = 1:238)) +
geom_line(aes(y = cimento17, color = "cimento_17"), size = 1) +
geom_line(aes(y = cimento51bar, color = "cimento_51bar"), size = 1)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# desrevertendo a série 17
cimento_17_long2 <- rev(cimento_17_long)
cimento_17_long2 <- exp(cimento_17_long2)
cimento_17_long2 <- cumsum(cimento_17_long2)
#Plot
df <- as.data.frame(cbind(cimento_51_barra,cimento_17_long2))
### plots comparativos
ggplot(df, aes(x = 1:238)) +
geom_line(aes(y = cimento_17_long2, color = "cimento_17"), size = 1) +
geom_line(aes(y = cimento_51_barra, color = "cimento_51_barra"), size = 1) +
labs(title = "série 17 contra a 51 com realizado e estimado", y = "valor") +
scale_color_manual(values = c("cimento_17" = "blue", "cimento_51_barra" = "red")) +
theme_minimal()
### Somente o projetado
df <- df[1:(nrow(df) - 102), ]
ggplot(df, aes(x = 1:(136))) +
geom_line(aes(y = cimento_17_long2, color = "cimento_17"), size = 1) +
geom_line(aes(y = cimento_51_barra, color = "cimento_51_barra"), size = 1) +
labs(title = "séries plotando somente o projetado versus realizado cimento 17", y = "valor") +
scale_color_manual(values = c("cimento_17" = "blue", "cimento_51_barra" = "red"))
adf.test((cimento_51_barra))
##
## Augmented Dickey-Fuller Test
##
## data: (cimento_51_barra)
## Dickey-Fuller = -1.8826, Lag order = 6, p-value = 0.6252
## alternative hypothesis: stationary
adf.test(diff(cimento_51_barra))
##
## Augmented Dickey-Fuller Test
##
## data: diff(cimento_51_barra)
## Dickey-Fuller = -2.4977, Lag order = 6, p-value = 0.3666
## alternative hypothesis: stationary
plot(c(cimento_51_barra - cimento_17_long2))
# Parte2
# Antes de realizar a projeção do VECM devemos nos certificar de alguns fatores:
## Todas as séries são estacionárias em primeira diferença
## Determinar o lag ótimo (p) para o modelo
## Realizar o teste de Johansen para lag (p)
## Estimar o VAR irrestrito
## Com cointegração especificar o VECM com (p - l) Lags
### Apagando os dados
rm(list = ls(all.names = TRUE))
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3010107 160.8 4741034 253.2 4741034 253.2
## Vcells 5158963 39.4 10146329 77.5 8388608 64.0
## importando dados
path <- "d:/Users/thomas.daher/Desktop/cimento/dados_parte1/"
file_name <- "Obras hidreletricas.xlsx"
sheet_name <- "serie_curta"
file_path <- paste(path, file_name, sep = "")
cimento <- read_excel(file_path, sheet = sheet_name)
cimento$Data <- as.Date(cimento$Data, format = "%Y/%m/%d")
cimento <- filter(cimento, Data >= as.Date("1990-09-01"))
# Análise de estacionariedade das séries em primeira diferença
## Primeiro o teste o sem tirar a diferença, somente a invertendo e transformando em série temporal
cimento_17 <- rev((ts((cimento$cimento17), start = c(1990, 9), frequency = 12)))
cimento_51 <- rev((ts((cimento$cimento51), start = c(1990, 9), frequency = 12)))
plot(cimento_17)
plot(cimento_51)
adf.test(cimento_17)
##
## Augmented Dickey-Fuller Test
##
## data: cimento_17
## Dickey-Fuller = -2.165, Lag order = 4, p-value = 0.5083
## alternative hypothesis: stationary
adf.test(cimento_51)
##
## Augmented Dickey-Fuller Test
##
## data: cimento_51
## Dickey-Fuller = -2.065, Lag order = 4, p-value = 0.5497
## alternative hypothesis: stationary
## Vemos que não são estacionárias, portanto vamos agora tirar a primeira diferença.
cimento_17 <- rev(diff(ts((cimento$cimento17), start = c(1990, 9), frequency = 12)))
cimento_51 <- rev(diff(ts((cimento$cimento51), start = c(1990, 9), frequency = 12)))
plot(cimento_17)
plot(cimento_51)
adf.test(cimento_17)
##
## Augmented Dickey-Fuller Test
##
## data: cimento_17
## Dickey-Fuller = -3.3675, Lag order = 4, p-value = 0.06372
## alternative hypothesis: stationary
adf.test(cimento_51)
##
## Augmented Dickey-Fuller Test
##
## data: cimento_51
## Dickey-Fuller = -3.1483, Lag order = 4, p-value = 0.1007
## alternative hypothesis: stationary
## P_value série 17 = 0.0637 e da série 51 = 0.1007, considero que a 10% de confiança as séries são estacionárias em primeira diferença.
# Devemos agora determinar o lag ótimo para o modelo
dset <- cbind(cimento_17,cimento_51)
lagselect <- VARselect(dset, lag.max = 12, type = "const")
lagselect$selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 2 2 2 2
lagselect$criteria
## 1 2 3 4 5
## AIC(n) 4.472896e+01 4.451279e+01 4.452653e+01 4.459971e+01 4.463361e+01
## HQ(n) 4.479616e+01 4.462480e+01 4.468334e+01 4.480133e+01 4.488002e+01
## SC(n) 4.489561e+01 4.479055e+01 4.491539e+01 4.509968e+01 4.524467e+01
## FPE(n) 2.664168e+19 2.146642e+19 2.177212e+19 2.344185e+19 2.427702e+19
## 6 7 8 9 10
## AIC(n) 4.468770e+01 4.466925e+01 4.472872e+01 4.475248e+01 4.481572e+01
## HQ(n) 4.497892e+01 4.500527e+01 4.510955e+01 4.517810e+01 4.528615e+01
## SC(n) 4.540987e+01 4.550252e+01 4.567309e+01 4.580795e+01 4.598229e+01
## FPE(n) 2.566777e+19 2.525421e+19 2.687967e+19 2.762815e+19 2.956815e+19
## 11 12
## AIC(n) 4.462365e+01 4.464467e+01
## HQ(n) 4.513888e+01 4.520471e+01
## SC(n) 4.590133e+01 4.603346e+01
## FPE(n) 2.453953e+19 2.523222e+19
## Para todos os testes o Lag de 2 é o mais adequando a estas duas séries
# Realizar o teste de Johansen para lag (p), que no nosso caso p = 2.
ctest1e <-ca.jo(dset, type = c("eigen"), ecdet = c("none"), K = 2,
spec=c("longrun"), season = NULL, dumvar = NULL)
summary(ctest1e)
##
## ######################
## # Johansen-Procedure #
## ######################
##
## Test type: maximal eigenvalue statistic (lambda max) , with linear trend
##
## Eigenvalues (lambda):
## [1] 0.4054626 0.1331663
##
## Values of teststatistic and critical values of test:
##
## test 10pct 5pct 1pct
## r <= 1 | 14.29 6.50 8.18 11.65
## r = 0 | 52.00 12.91 14.90 19.19
##
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
##
## cimento_17.l2 cimento_51.l2
## cimento_17.l2 1.000000 1.00000
## cimento_51.l2 -1.089155 -3.90878
##
## Weights W:
## (This is the loading matrix)
##
## cimento_17.l2 cimento_51.l2
## cimento_17.d -1.37773904 0.09818396
## cimento_51.d 0.04349453 0.05601999
# Estimar o VAR irrestrito
modelo_var_irrestrito <- VAR(dset, p = 2, type = "none")
summary(modelo_var_irrestrito)
##
## VAR Estimation Results:
## =========================
## Endogenous variables: cimento_17, cimento_51
## Deterministic variables: none
## Sample size: 100
## Log Likelihood: -2503.352
## Roots of the characteristic polynomial:
## 0.6565 0.4386 0.3843 0.3843
## Call:
## VAR(y = dset, p = 2, type = "none")
##
##
## Estimation results for equation cimento_17:
## ===========================================
## cimento_17 = cimento_17.l1 + cimento_51.l1 + cimento_17.l2 + cimento_51.l2
##
## Estimate Std. Error t value Pr(>|t|)
## cimento_17.l1 0.40729 0.16939 2.404 0.018116 *
## cimento_51.l1 1.07871 0.34453 3.131 0.002309 **
## cimento_17.l2 -0.71201 0.20193 -3.526 0.000649 ***
## cimento_51.l2 0.08936 0.18254 0.490 0.625569
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 127100 on 96 degrees of freedom
## Multiple R-Squared: 0.6043, Adjusted R-squared: 0.5878
## F-statistic: 36.65 on 4 and 96 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation cimento_51:
## ===========================================
## cimento_51 = cimento_17.l1 + cimento_51.l1 + cimento_17.l2 + cimento_51.l2
##
## Estimate Std. Error t value Pr(>|t|)
## cimento_17.l1 0.46514 0.08024 5.797 8.61e-08 ***
## cimento_51.l1 0.78405 0.16321 4.804 5.73e-06 ***
## cimento_17.l2 -0.38474 0.09566 -4.022 0.000115 ***
## cimento_51.l2 -0.01144 0.08647 -0.132 0.894991
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 60200 on 96 degrees of freedom
## Multiple R-Squared: 0.8715, Adjusted R-squared: 0.8662
## F-statistic: 162.8 on 4 and 96 DF, p-value: < 2.2e-16
##
##
##
## Covariance matrix of residuals:
## cimento_17 cimento_51
## cimento_17 1.606e+10 6.089e+09
## cimento_51 6.089e+09 3.573e+09
##
## Correlation matrix of residuals:
## cimento_17 cimento_51
## cimento_17 1.0000 0.8038
## cimento_51 0.8038 1.0000
# Camo há cointegração visto no teste ca.jo(), devemos especificar o VECM com (p - l) Lags
vecm_model <- VECM(
dset,
2,
r = 1,
include = c("none"),
beta = NULL,
estim = c("ML"),
LRinclude = c("none"),
exogen = NULL
)
summary(vecm_model)
## #############
## ###Model VECM
## #############
## Full sample size: 102 End sample size: 99
## Number of variables: 2 Number of estimated slope parameters 10
## AIC 4423.837 BIC 4452.383 SSR 2.070318e+12
## Cointegrating vector (estimated by ML):
## cimento_17 cimento_51
## r1 1 -1.116972
##
##
## ECT cimento_17 -1 cimento_51 -1
## Equation cimento_17 -1.1077(0.4116)** 0.5536(0.3293). 0.0798(0.4047)
## Equation cimento_51 0.1153(0.1957) 0.3695(0.1565)* 0.0130(0.1924)
## cimento_17 -2 cimento_51 -2
## Equation cimento_17 -0.1637(0.2300) -0.1965(0.1853)
## Equation cimento_51 -0.0503(0.1093) -0.0254(0.0881)
predicted <- predict(vecm_model, n.ahead= 1)
### long data import
sheet_name <- "serie_longa"
file_path <- paste(path, file_name, sep = "")
cimento_long <- read_excel(file_path, sheet = sheet_name)
cimento_long$Data <- as.Date(cimento_long$Data, format = "%Y/%m/%d")
cimento_17_long <- rev(diff(ts((cimento_long$cimento17[-1]), start = c(1979, 5), frequency = 12)))
### projeção dos vlores para cimento 51
num_predictions <- (length(cimento_17_long) - length(cimento_51))
#Variável X de ajuste
exogenous <- cimento_17_long
exogenous <- tail(exogenous, n = num_predictions)
df <- dset
# Loop to iteratively predict and update data
for (i in 1:num_predictions) {
vecm_model <- VECM(
df,
2,
r = 1,
include = c("none"),
beta = NULL,
estim = c("2OLS"),
LRinclude = c("none"),
exogen = NULL
)
predicted <- as.data.frame(predict(vecm_model, n.ahead = 1))
# Update the original data with the predicted value
df <- rbind(df, predicted)
# changing the last row value to 1
num_rows <- nrow(df)
# Check if 'cimento_17' is a column in df
if ("cimento_17" %in% colnames(df)) {
df$cimento_17[num_rows] <- head(exogenous, 1)
} else {
print("coluna cimento_17 nao encontrada.")
}
exogenous <- exogenous[-1]
}
plot(rev(df$cimento_51) %>% cumsum())
plot(rev(df$cimento_17) %>% cumsum())
# desrevertendo e transformando em série novamente
cimento_17 <- rev(df$cimento_17) %>% cumsum()
cimento_51 <- rev(df$cimento_51) %>% cumsum()
df <- as.data.frame(cbind(cimento_17,cimento_51))
df2 <- df[1:(nrow(df) - 102), ]
ggplot(df2, aes(x = 1:(136))) +
geom_line(aes(y = cimento_17, color = "cimento_17"), size = 1) +
geom_line(aes(y = cimento_51, color = "cimento_51_barra"), size = 1) +
labs(title = "Séries plotando somente o projetado versus realizado cimento 17", y = "valor") +
scale_color_manual(values = c("cimento_17" = "blue", "cimento_51_barra" = "red"))
ggplot(df, aes(x = 1:238)) +
geom_line(aes(y = cimento_17, color = "cimento_17"), size = 1) +
geom_line(aes(y = cimento_51, color = "cimento_51_barra"), size = 1) +
labs(title = "série 17 contra a 51 com realizado e estimado", y = "valor") +
scale_color_manual(values = c("cimento_17" = "blue", "cimento_51_barra" = "red")) +
theme_minimal()