# install.packages(c("vars", "urca", "tseries", "ggplot2", "gridExtra"))

library(vars)
## Cargando paquete requerido: MASS
## Cargando paquete requerido: strucchange
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Cargando paquete requerido: sandwich
## Cargando paquete requerido: urca
## Cargando paquete requerido: lmtest
library(urca)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(ggplot2)
library(gridExtra)

############################################################
# 1. SIMULACIÓN DE DATOS
############################################################

set.seed(123)

n <- 200

# Generamos 3 series con cierta relación entre ellas
e1 <- rnorm(n)
e2 <- rnorm(n)
e3 <- rnorm(n)

y1 <- cumsum(e1)
y2 <- 0.5 * y1 + cumsum(e2)
y3 <- 0.3 * y1 + 0.2 * y2 + cumsum(e3)

data <- data.frame(y1, y2, y3)

############################################################
# 2. GRÁFICAS DE LAS SERIES
############################################################

g1 <- ggplot(data, aes(x = 1:n, y = y1)) +
  geom_line(color = "blue") +
  ggtitle("Serie y1")

g2 <- ggplot(data, aes(x = 1:n, y = y2)) +
  geom_line(color = "red") +
  ggtitle("Serie y2")

g3 <- ggplot(data, aes(x = 1:n, y = y3)) +
  geom_line(color = "green") +
  ggtitle("Serie y3")

grid.arrange(g1, g2, g3, ncol = 1)

############################################################
# 3. PRUEBAS DE ESTACIONARIEDAD (ADF)
############################################################

adf.test(y1)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y1
## Dickey-Fuller = -2.2954, Lag order = 5, p-value = 0.4524
## alternative hypothesis: stationary
adf.test(y2)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y2
## Dickey-Fuller = -1.8185, Lag order = 5, p-value = 0.6521
## alternative hypothesis: stationary
adf.test(y3)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y3
## Dickey-Fuller = -0.69396, Lag order = 5, p-value = 0.9696
## alternative hypothesis: stationary
# Normalmente serán no estacionarias → diferenciar
dy1 <- diff(y1)
dy2 <- diff(y2)
dy3 <- diff(y3)

############################################################
# 4. SELECCIÓN DE REZAGOS PARA VAR
############################################################

data_diff <- na.omit(data.frame(dy1, dy2, dy3))

lagselect <- VARselect(data_diff, lag.max = 10, type = "const")
lagselect$criteria
##                   1           2          3          4          5          6
## AIC(n) -0.089412744 -0.02552013 0.02077339 0.03621041 0.07000952 0.07541474
## HQ(n)  -0.006028002  0.12040317 0.22923525 0.30721083 0.40354849 0.47149227
## SC(n)   0.116412464  0.33467398 0.53533641 0.70514234 0.89331035 1.05308448
## FPE(n)  0.914485397  0.97490189 1.02129367 1.03755098 1.07382621 1.08053800
##                7         8         9        10
## AIC(n) 0.1357496 0.2009560 0.2363948 0.2991204
## HQ(n)  0.5943657 0.7221106 0.8200880 0.9453521
## SC(n)  1.2677882 1.4873635 1.6771712 1.8942657
## FPE(n) 1.1490435 1.2283035 1.2750493 1.3608247
############################################################
# 5. MODELO VAR
############################################################

var_model <- VAR(data_diff, p = 2, type = "const")
summary(var_model)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: dy1, dy2, dy3 
## Deterministic variables: const 
## Sample size: 197 
## Log Likelihood: -811.329 
## Roots of the characteristic polynomial:
## 0.3071 0.3071 0.3019 0.3019 0.127 0.04695
## Call:
## VAR(y = data_diff, p = 2, type = "const")
## 
## 
## Estimation results for equation dy1: 
## ==================================== 
## dy1 = dy1.l1 + dy2.l1 + dy3.l1 + dy1.l2 + dy2.l2 + dy3.l2 + const 
## 
##        Estimate Std. Error t value Pr(>|t|)
## dy1.l1 -0.07781    0.08172  -0.952    0.342
## dy2.l1  0.04985    0.06962   0.716    0.475
## dy3.l1 -0.02636    0.07096  -0.372    0.711
## dy1.l2 -0.07612    0.08169  -0.932    0.353
## dy2.l2 -0.06299    0.06969  -0.904    0.367
## dy3.l2  0.05774    0.07108   0.812    0.418
## const  -0.01161    0.06765  -0.172    0.864
## 
## 
## Residual standard error: 0.9476 on 190 degrees of freedom
## Multiple R-Squared: 0.0207,  Adjusted R-squared: -0.01023 
## F-statistic: 0.6692 on 6 and 190 DF,  p-value: 0.6746 
## 
## 
## Estimation results for equation dy2: 
## ==================================== 
## dy2 = dy1.l1 + dy2.l1 + dy3.l1 + dy1.l2 + dy2.l2 + dy3.l2 + const 
## 
##         Estimate Std. Error t value Pr(>|t|)
## dy1.l1  0.011035   0.094613   0.117    0.907
## dy2.l1  0.008654   0.080612   0.107    0.915
## dy3.l1 -0.069874   0.082156  -0.851    0.396
## dy1.l2  0.042985   0.094583   0.454    0.650
## dy2.l2 -0.067277   0.080682  -0.834    0.405
## dy3.l2  0.059986   0.082298   0.729    0.467
## const   0.023239   0.078323   0.297    0.767
## 
## 
## Residual standard error: 1.097 on 190 degrees of freedom
## Multiple R-Squared: 0.01119, Adjusted R-squared: -0.02003 
## F-statistic: 0.3585 on 6 and 190 DF,  p-value: 0.9043 
## 
## 
## Estimation results for equation dy3: 
## ==================================== 
## dy3 = dy1.l1 + dy2.l1 + dy3.l1 + dy1.l2 + dy2.l2 + dy3.l2 + const 
## 
##        Estimate Std. Error t value Pr(>|t|)
## dy1.l1 -0.01165    0.09030  -0.129    0.897
## dy2.l1  0.03440    0.07693   0.447    0.655
## dy3.l1 -0.11418    0.07841  -1.456    0.147
## dy1.l2  0.02235    0.09027   0.248    0.805
## dy2.l2  0.02897    0.07700   0.376    0.707
## dy3.l2 -0.01984    0.07854  -0.253    0.801
## const   0.04433    0.07475   0.593    0.554
## 
## 
## Residual standard error: 1.047 on 190 degrees of freedom
## Multiple R-Squared: 0.01378, Adjusted R-squared: -0.01736 
## F-statistic: 0.4425 on 6 and 190 DF,  p-value: 0.8497 
## 
## 
## 
## Covariance matrix of residuals:
##        dy1    dy2    dy3
## dy1 0.8979 0.4345 0.3367
## dy2 0.4345 1.2037 0.3129
## dy3 0.3367 0.3129 1.0964
## 
## Correlation matrix of residuals:
##        dy1    dy2    dy3
## dy1 1.0000 0.4179 0.3394
## dy2 0.4179 1.0000 0.2724
## dy3 0.3394 0.2724 1.0000
############################################################
# 6. CAUSALIDAD DE GRANGER
############################################################

# ¿y2 causa a y1?
granger_y2_y1 <- causality(var_model, cause = "dy2")
print(granger_y2_y1)
## $Granger
## 
##  Granger causality H0: dy2 do not Granger-cause dy1 dy3
## 
## data:  VAR object var_model
## F-Test = 0.48204, df1 = 4, df2 = 570, p-value = 0.7489
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: dy2 and dy1 dy3
## 
## data:  VAR object var_model
## Chi-squared = 31.997, df = 2, p-value = 1.127e-07
# ¿y1 causa a y2?
granger_y1_y2 <- causality(var_model, cause = "dy1")
print(granger_y1_y2)
## $Granger
## 
##  Granger causality H0: dy1 do not Granger-cause dy2 dy3
## 
## data:  VAR object var_model
## F-Test = 0.065325, df1 = 4, df2 = 570, p-value = 0.9922
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: dy1 and dy2 dy3
## 
## data:  VAR object var_model
## Chi-squared = 36.783, df = 2, p-value = 1.03e-08
############################################################
# 7. GRÁFICAS: FUNCIONES IMPULSO-RESPUESTA
############################################################

irf_result <- irf(var_model, impulse = "dy1", response = "dy2", n.ahead = 10, boot = TRUE)
plot(irf_result)

############################################################
# 8. PRUEBA DE COINTEGRACIÓN DE JOHANSEN
############################################################

# Usamos los datos en niveles
johansen_test <- ca.jo(data, type = "trace", ecdet = "const", K = 2)
summary(johansen_test)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1] 8.137096e-02 3.762205e-02 9.645518e-03 4.327346e-19
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 2 |  1.92  7.52  9.24 12.97
## r <= 1 |  9.51 17.85 19.96 24.60
## r = 0  | 26.32 32.00 34.91 41.07
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##              y1.l2      y2.l2      y3.l2   constant
## y1.l2      1.00000  1.0000000  1.0000000  1.0000000
## y2.l2     27.41840 -0.1224349 -0.2961117 -0.5281511
## y3.l2    -20.63583  0.2753543 -0.6755809 -0.6990262
## constant -70.12262 -2.9265132  9.3488089 -3.8004592
## 
## Weights W:
## (This is the loading matrix)
## 
##              y1.l2        y2.l2        y3.l2      constant
## y1.d -0.0024501104 -0.045267154 -0.004166726 -1.045478e-17
## y2.d -0.0044105866  0.006384182  0.004580397 -2.038217e-17
## y3.d -0.0005113845 -0.044872784  0.010822444 -4.288473e-19
############################################################
# 9. MODELO VECM
############################################################

vecm_model <- cajorls(johansen_test, r = 1) # r = número de relaciones de cointegración
summary(vecm_model$rlm)
## Response y1.d :
## 
## Call:
## lm(formula = y1.d ~ ect1 + y1.dl1 + y2.dl1 + y3.dl1 - 1, data = data.mat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.47332 -0.66489 -0.03653  0.59182  2.92279 
## 
## Coefficients:
##         Estimate Std. Error t value Pr(>|t|)  
## ect1   -0.002450   0.001003  -2.443   0.0154 *
## y1.dl1 -0.090513   0.080547  -1.124   0.2625  
## y2.dl1  0.022443   0.069903   0.321   0.7485  
## y3.dl1 -0.027826   0.069669  -0.399   0.6900  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9357 on 194 degrees of freedom
## Multiple R-squared:  0.03856,    Adjusted R-squared:  0.01874 
## F-statistic: 1.945 on 4 and 194 DF,  p-value: 0.1045
## 
## 
## Response y2.d :
## 
## Call:
## lm(formula = y2.d ~ ect1 + y1.dl1 + y2.dl1 + y3.dl1 - 1, data = data.mat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.54472 -0.63137 -0.09289  0.72364  2.82058 
## 
## Coefficients:
##         Estimate Std. Error t value Pr(>|t|)    
## ect1   -0.004411   0.001124  -3.923 0.000121 ***
## y1.dl1 -0.018630   0.090306  -0.206 0.836777    
## y2.dl1 -0.054010   0.078372  -0.689 0.491556    
## y3.dl1 -0.054515   0.078110  -0.698 0.486056    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.049 on 194 degrees of freedom
## Multiple R-squared:  0.07824,    Adjusted R-squared:  0.05924 
## F-statistic: 4.117 on 4 and 194 DF,  p-value: 0.003175
## 
## 
## Response y3.d :
## 
## Call:
## lm(formula = y3.d ~ ect1 + y1.dl1 + y2.dl1 + y3.dl1 - 1, data = data.mat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0091 -0.6482  0.0961  0.6689  3.1905 
## 
## Coefficients:
##          Estimate Std. Error t value Pr(>|t|)
## ect1   -0.0005114  0.0011120  -0.460    0.646
## y1.dl1 -0.0156219  0.0893191  -0.175    0.861
## y2.dl1  0.0262895  0.0775161   0.339    0.735
## y3.dl1 -0.1065429  0.0772565  -1.379    0.169
## 
## Residual standard error: 1.038 on 194 degrees of freedom
## Multiple R-squared:  0.01279,    Adjusted R-squared:  -0.007567 
## F-statistic: 0.6282 on 4 and 194 DF,  p-value: 0.6429
############################################################
# 10. INTERPRETACIÓN 
############################################################

# - Prueba de Granger:
#   Evalúa si una variable ayuda a predecir otra.
#   Si p-value < 0.05 → hay causalidad en sentido de Granger.

# - VAR:
#   Modelo de variables endógenas donde todas dependen de sus rezagos y de las otras.

# - Johansen:
#   Detecta cointegración (relaciones de largo plazo).
#   Usa dos tests:
#     * Trace
#     * Eigenvalue

# - VECM:
#   Se usa cuando hay cointegración.
#   Incluye:
#     * dinámica de corto plazo
#     * corrección de error (equilibrio de largo plazo)