###############################################################
# ANALISIS DE CAUSALIDAD DE GRANGER Y MODELO VECM EN R
###############################################################

#--------------------------------------------------------------
# 1. LIBRERIAS
#--------------------------------------------------------------

# install.packages(c("vars","urca","tseries","ggplot2"))

library(vars)
## Cargando paquete requerido: MASS
## Cargando paquete requerido: strucchange
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Cargando paquete requerido: sandwich
## Cargando paquete requerido: urca
## Cargando paquete requerido: lmtest
library(urca)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(ggplot2)

#--------------------------------------------------------------
# 2. GENERAR DATOS SIMULADOS
#--------------------------------------------------------------

set.seed(123)

n <- 200

# Serie X (caminata aleatoria)
x <- cumsum(rnorm(n))

# Serie Y relacionada con X
y <- 0.7*x + cumsum(rnorm(n))

data <- data.frame(
  tiempo = 1:n,
  x = x,
  y = y
)

#--------------------------------------------------------------
# 3. GRAFICAR SERIES
#--------------------------------------------------------------

ggplot(data, aes(tiempo, x)) +
  geom_line(color="blue") +
  ggtitle("Serie X") +
  theme_minimal()

ggplot(data, aes(tiempo, y)) +
  geom_line(color="red") +
  ggtitle("Serie Y") +
  theme_minimal()

#--------------------------------------------------------------
# 4. TEST DE ESTACIONARIEDAD
#--------------------------------------------------------------

# Augmented Dickey Fuller
adf.test(x)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  x
## Dickey-Fuller = -2.2954, Lag order = 5, p-value = 0.4524
## alternative hypothesis: stationary
adf.test(y)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y
## Dickey-Fuller = -1.9402, Lag order = 5, p-value = 0.6012
## alternative hypothesis: stationary
#--------------------------------------------------------------
# 5. DIFERENCIAS
#--------------------------------------------------------------

dx <- diff(x)
dy <- diff(y)

adf.test(dx)
## Warning in adf.test(dx): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  dx
## Dickey-Fuller = -6.1271, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
adf.test(dy)
## Warning in adf.test(dy): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  dy
## Dickey-Fuller = -6.1003, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
#--------------------------------------------------------------
# 6. SELECCION DE REZAGOS
#--------------------------------------------------------------

lag_selection <- VARselect(
  cbind(x,y),
  lag.max = 10,
  type = "const"
)

lag_selection
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      1      1      1      1 
## 
## $criteria
##                  1           2           3           4           5           6
## AIC(n) -0.11914613 -0.08477788 -0.05524744 -0.03976213 -0.04446851 -0.02986155
## HQ(n)  -0.07760964 -0.01555040  0.04167103  0.08484734  0.10783195  0.15012990
## SC(n)  -0.01660853  0.08611812  0.18400696  0.26785068  0.33150270  0.41446806
## FPE(n)  0.88768273  0.91873866  0.94631412  0.96115439  0.95675374  0.97099567
##                  7           8         9         10
## AIC(n) -0.04233883 -0.01431238 0.0071592 0.04185699
## HQ(n)   0.16534361  0.22106106 0.2702236 0.33261241
## SC(n)   0.47034918  0.56673404 0.6565640 0.75962021
## FPE(n)  0.95917639  0.98673604 1.0085368 1.04463816
# Seleccionamos rezagos sugeridos por AIC
p <- lag_selection$selection["AIC(n)"]

#--------------------------------------------------------------
# 7. TEST DE COINTEGRACION DE JOHANSEN
#--------------------------------------------------------------

johansen_test <- ca.jo(
  cbind(x,y),
  type = "trace",
  ecdet = "const",
  K = 2
)

summary(johansen_test)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1]  3.418694e-02  1.302707e-02 -6.938894e-18
## 
## Values of teststatistic and critical values of test:
## 
##          test 10pct  5pct  1pct
## r <= 1 | 2.60  7.52  9.24 12.97
## r = 0  | 9.48 17.85 19.96 24.60
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                x.l2       y.l2    constant
## x.l2      1.0000000  1.0000000   1.0000000
## y.l2      0.2461803 -0.8827394  -0.5664161
## constant -3.6405120  6.8116898 -12.1370731
## 
## Weights W:
## (This is the loading matrix)
## 
##            x.l2        y.l2      constant
## x.d -0.05413976 0.003020397 -2.232116e-18
## y.d -0.02910854 0.024963738  2.042803e-18
#--------------------------------------------------------------
# 8. ESTIMAR MODELO VECM
#--------------------------------------------------------------

vecm_model <- cajorls(
  johansen_test,
  r = 1
)

summary(vecm_model$rlm)
## Response x.d :
## 
## Call:
## lm(formula = x.d ~ ect1 + x.dl1 + y.dl1 - 1, data = data.mat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.24750 -0.57470 -0.04524  0.56278  2.90353 
## 
## Coefficients:
##       Estimate Std. Error t value Pr(>|t|)   
## ect1  -0.05414    0.02081  -2.602  0.00998 **
## x.dl1 -0.13724    0.08491  -1.616  0.10767   
## y.dl1  0.05542    0.06718   0.825  0.41040   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9323 on 195 degrees of freedom
## Multiple R-squared:  0.04054,    Adjusted R-squared:  0.02578 
## F-statistic: 2.746 on 3 and 195 DF,  p-value: 0.04417
## 
## 
## Response y.d :
## 
## Call:
## lm(formula = y.d ~ ect1 + x.dl1 + y.dl1 - 1, data = data.mat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7093 -0.7853  0.0208  0.7071  3.5791 
## 
## Coefficients:
##       Estimate Std. Error t value Pr(>|t|)
## ect1  -0.02911    0.02629  -1.107    0.270
## x.dl1 -0.05890    0.10730  -0.549    0.584
## y.dl1  0.01211    0.08488   0.143    0.887
## 
## Residual standard error: 1.178 on 195 degrees of freedom
## Multiple R-squared:  0.006979,   Adjusted R-squared:  -0.008298 
## F-statistic: 0.4568 on 3 and 195 DF,  p-value: 0.7128
# Interpretación:
# El coeficiente del término de corrección de error
# indica qué tan rápido el sistema vuelve al equilibrio
# de largo plazo.

#--------------------------------------------------------------
# 9. ESTIMAR MODELO VAR
#--------------------------------------------------------------

var_model <- VAR(
  cbind(x,y),
  p = 2,
  type = "const"
)

summary(var_model)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: x, y 
## Deterministic variables: const 
## Sample size: 198 
## Log Likelihood: -541.565 
## Roots of the characteristic polynomial:
## 0.9725 0.9513 0.03264 0.02993
## Call:
## VAR(y = cbind(x, y), p = 2, type = "const")
## 
## 
## Estimation results for equation x: 
## ================================== 
## x = x.l1 + y.l1 + x.l2 + y.l2 + const 
## 
##       Estimate Std. Error t value Pr(>|t|)    
## x.l1   0.86391    0.08550  10.104   <2e-16 ***
## y.l1   0.05362    0.06801   0.788   0.4314    
## x.l2   0.08497    0.08495   1.000   0.3184    
## y.l2  -0.06962    0.06796  -1.024   0.3069    
## const  0.21767    0.13129   1.658   0.0989 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.937 on 193 degrees of freedom
## Multiple R-Squared: 0.8852,  Adjusted R-squared: 0.8828 
## F-statistic: 371.9 on 4 and 193 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation y: 
## ================================== 
## y = x.l1 + y.l1 + x.l2 + y.l2 + const 
## 
##       Estimate Std. Error t value Pr(>|t|)    
## x.l1  -0.04943    0.10747  -0.460    0.646    
## y.l1   0.99727    0.08549  11.666   <2e-16 ***
## x.l2   0.04529    0.10678   0.424    0.672    
## y.l2  -0.02647    0.08542  -0.310    0.757    
## const  0.27602    0.16502   1.673    0.096 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 1.178 on 193 degrees of freedom
## Multiple R-Squared: 0.9479,  Adjusted R-squared: 0.9468 
## F-statistic:   877 on 4 and 193 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##        x      y
## x 0.8780 0.6008
## y 0.6008 1.3873
## 
## Correlation matrix of residuals:
##        x      y
## x 1.0000 0.5444
## y 0.5444 1.0000
#--------------------------------------------------------------
# 10. CAUSALIDAD DE GRANGER
#--------------------------------------------------------------

# X causa Y?
causality(var_model, cause="x")
## $Granger
## 
##  Granger causality H0: x do not Granger-cause y
## 
## data:  VAR object var_model
## F-Test = 0.10715, df1 = 2, df2 = 386, p-value = 0.8984
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: x and y
## 
## data:  VAR object var_model
## Chi-squared = 45.267, df = 1, p-value = 1.719e-11
# Y causa X?
causality(var_model, cause="y")
## $Granger
## 
##  Granger causality H0: y do not Granger-cause x
## 
## data:  VAR object var_model
## F-Test = 1.1318, df1 = 2, df2 = 386, p-value = 0.3235
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: y and x
## 
## data:  VAR object var_model
## Chi-squared = 45.267, df = 1, p-value = 1.719e-11
# Si p-value < 0.05 se rechaza H0
# existe causalidad de Granger

#--------------------------------------------------------------
# 11. FUNCION IMPULSO RESPUESTA
#--------------------------------------------------------------

irf_model <- irf(
  var_model,
  impulse = "x",
  response = "y",
  n.ahead = 10,
  boot = TRUE
)

plot(irf_model)

# Interpretación:
# muestra cómo un shock en X afecta a Y
# a lo largo del tiempo.

#--------------------------------------------------------------
# 12. DESCOMPOSICION DE VARIANZA
#--------------------------------------------------------------

fevd_model <- fevd(
  var_model,
  n.ahead = 10
)

plot(fevd_model)

# Interpretación:
# muestra qué porcentaje de la varianza
# del error de pronóstico es explicado
# por cada variable.

EJEMPLO 2

#--------------------------------------------------------------
# 1. LIBRERIAS
#--------------------------------------------------------------

#install.packages(c("vars","urca","tseries","ggplot2","forecast","reshape2"))

library(vars)
library(urca)
library(tseries)
library(ggplot2)
library(forecast)
library(reshape2)

#--------------------------------------------------------------
# 2. GENERAR DATOS ECONOMICOS SIMULADOS
#--------------------------------------------------------------

# Simularemos dos variables macroeconómicas:
# ingreso (income)
# consumo (consumption)
# que tienen relación de largo plazo

set.seed(123)

n <- 150

income <- cumsum(rnorm(n,0.5,1))
consumption <- 0.8*income + cumsum(rnorm(n,0,1))

data <- data.frame(
  time = 1:n,
  income = income,
  consumption = consumption
)

#--------------------------------------------------------------
# 3. GRAFICA DE SERIES
#--------------------------------------------------------------

data_long <- melt(data,id="time")

ggplot(data_long, aes(time,value,color=variable))+
  geom_line(size=1.2)+
  theme_minimal()+
  labs(title="Series económicas simuladas",
       x="Tiempo",
       y="Valor",
       color="Variable")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#--------------------------------------------------------------
# 4. TEST DE ESTACIONARIEDAD
#--------------------------------------------------------------

adf.test(income)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  income
## Dickey-Fuller = -1.3541, Lag order = 5, p-value = 0.8455
## alternative hypothesis: stationary
adf.test(consumption)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  consumption
## Dickey-Fuller = -3.339, Lag order = 5, p-value = 0.06742
## alternative hypothesis: stationary
# Si no son estacionarias, diferenciamos

dincome <- diff(income)
dconsumption <- diff(consumption)

adf.test(dincome)
## Warning in adf.test(dincome): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  dincome
## Dickey-Fuller = -5.2461, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
adf.test(dconsumption)
## Warning in adf.test(dconsumption): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  dconsumption
## Dickey-Fuller = -7.3163, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
#--------------------------------------------------------------
# 5. SELECCION DE REZAGOS
#--------------------------------------------------------------

lagselect <- VARselect(cbind(income,consumption),
                       lag.max=10,
                       type="const")

lagselect
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      1      1      1      1 
## 
## $criteria
##                 1           2           3           4           5           6
## AIC(n) -0.1687190 -0.14158584 -0.10471143 -0.08015942 -0.08682095 -0.05291579
## HQ(n)  -0.1174878 -0.05620045  0.01482812  0.07353429  0.10102692  0.16908623
## SC(n)  -0.0426486  0.06853147  0.18945281  0.29805175  0.37543715  0.49338923
## FPE(n)  0.8447573  0.86803342  0.90073470  0.92329701  0.91743660  0.94947821
##                  7           8          9         10
## AIC(n) -0.06026624 -0.01133082 0.01318095 0.05883255
## HQ(n)   0.19588994  0.27897951 0.33764544 0.41745120
## SC(n)   0.57008571  0.70306805 0.81162675 0.94132528
## FPE(n)  0.94306986  0.99111748 1.01668885 1.06544686
#--------------------------------------------------------------
# 6. TEST DE COINTEGRACION DE JOHANSEN
#--------------------------------------------------------------

johansen <- ca.jo(cbind(income,consumption),
                  type="trace",
                  ecdet="const",
                  K=2)

summary(johansen)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1]  2.259431e-01  1.793394e-02 -1.172217e-16
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 1 |  2.68  7.52  9.24 12.97
## r = 0  | 40.58 17.85 19.96 24.60
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                income.l2 consumption.l2  constant
## income.l2       1.000000       1.000000  1.000000
## consumption.l2 -1.400222      -1.033474 -1.317479
## constant       35.711424      -2.420212  6.942094
## 
## Weights W:
## (This is the loading matrix)
## 
##                income.l2 consumption.l2      constant
## income.d      0.02052623    -0.01473668 -2.529658e-16
## consumption.d 0.02240151     0.02099983 -6.624234e-16
#--------------------------------------------------------------
# 7. ESTIMAR VECM
#--------------------------------------------------------------

vecm <- cajorls(johansen,r=1)

summary(vecm$rlm)
## Response income.d :
## 
## Call:
## lm(formula = income.d ~ ect1 + income.dl1 + consumption.dl1 - 
##     1, data = data.mat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.41996 -0.56705 -0.01336  0.59915  2.34340 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## ect1             0.020526   0.003495   5.874  2.8e-08 ***
## income.dl1       0.047661   0.099054   0.481    0.631    
## consumption.dl1 -0.117132   0.084289  -1.390    0.167    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9491 on 145 degrees of freedom
## Multiple R-squared:  0.2237, Adjusted R-squared:  0.2077 
## F-statistic: 13.93 on 3 and 145 DF,  p-value: 4.968e-08
## 
## 
## Response consumption.d :
## 
## Call:
## lm(formula = consumption.d ~ ect1 + income.dl1 + consumption.dl1 - 
##     1, data = data.mat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.76168 -0.72069 -0.04029  0.67585  2.94459 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## ect1             0.022402   0.004055   5.525 1.49e-07 ***
## income.dl1      -0.043180   0.114931  -0.376    0.708    
## consumption.dl1 -0.160127   0.097800  -1.637    0.104    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.101 on 145 degrees of freedom
## Multiple R-squared:  0.1825, Adjusted R-squared:  0.1656 
## F-statistic: 10.79 on 3 and 145 DF,  p-value: 1.921e-06
#--------------------------------------------------------------
# 8. CONVERTIR A VAR
#--------------------------------------------------------------

var_model <- vec2var(johansen,r=1)

summary(var_model)
##               Length Class  Mode   
## deterministic    2   -none- numeric
## A                2   -none- list   
## p                1   -none- numeric
## K                1   -none- numeric
## y              300   -none- numeric
## obs              1   -none- numeric
## totobs           1   -none- numeric
## call             3   -none- call   
## vecm             1   ca.jo  S4     
## datamat       1036   -none- numeric
## resid          296   -none- numeric
## r                1   -none- numeric
#--------------------------------------------------------------
# 9. CAUSALIDAD DE GRANGER
#--------------------------------------------------------------
var_model <- VAR(cbind(income, consumption), p = 2, type = "const")
causality(var_model,cause="income")
## $Granger
## 
##  Granger causality H0: income do not Granger-cause consumption
## 
## data:  VAR object var_model
## F-Test = 0.85835, df1 = 2, df2 = 286, p-value = 0.425
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: income and consumption
## 
## data:  VAR object var_model
## Chi-squared = 33.989, df = 1, p-value = 5.543e-09
causality(var_model,cause="consumption")
## $Granger
## 
##  Granger causality H0: consumption do not Granger-cause income
## 
## data:  VAR object var_model
## F-Test = 0.84218, df1 = 2, df2 = 286, p-value = 0.4318
## 
## 
## $Instant
## 
##  H0: No instantaneous causality between: consumption and income
## 
## data:  VAR object var_model
## Chi-squared = 33.989, df = 1, p-value = 5.543e-09
#--------------------------------------------------------------
# 10. IMPULSE RESPONSE FUNCTIONS
#--------------------------------------------------------------

irf_model <- irf(var_model,
                 impulse="income",
                 response="consumption",
                 n.ahead=12,
                 boot=TRUE)

plot(irf_model)

#--------------------------------------------------------------
# 11. DESCOMPOSICION DE VARIANZA
#--------------------------------------------------------------

fevd_model <- fevd(var_model,n.ahead=10)

plot(fevd_model)

#--------------------------------------------------------------
# 12. PREDICCION
#--------------------------------------------------------------

forecast_model <- predict(var_model,
                          n.ahead=10,
                          ci=0.95)

forecast_income <- forecast_model$fcst$income
forecast_consumption <- forecast_model$fcst$consumption

#--------------------------------------------------------------
# 13. ORGANIZAR DATOS DE PREDICCION
#--------------------------------------------------------------

future_time <- (n+1):(n+10)

pred_income <- data.frame(
  time=future_time,
  forecast=forecast_income[,1],
  lower=forecast_income[,2],
  upper=forecast_income[,3]
)

pred_consumption <- data.frame(
  time=future_time,
  forecast=forecast_consumption[,1],
  lower=forecast_consumption[,2],
  upper=forecast_consumption[,3]
)

#--------------------------------------------------------------
# 14. GRAFICA DE PRONOSTICO
#--------------------------------------------------------------

ggplot()+

# datos reales
geom_line(data=data,
          aes(time,income),
          color="blue",
          size=1)+

# prediccion
geom_line(data=pred_income,
          aes(time,forecast),
          color="darkblue",
          linetype="dashed",
          size=1.2)+

# intervalo de confianza
geom_ribbon(data=pred_income,
            aes(time,
                ymin=lower,
                ymax=upper),
            fill="blue",
            alpha=0.2)+

theme_minimal()+
labs(title="Pronóstico de Ingreso",
     x="Tiempo",
     y="Valor")

#--------------------------------------------------------------
# 15. PRONOSTICO CONSUMO
#--------------------------------------------------------------

ggplot()+

geom_line(data=data,
          aes(time,consumption),
          color="red",
          size=1)+

geom_line(data=pred_consumption,
          aes(time,forecast),
          color="darkred",
          linetype="dashed",
          size=1.2)+

geom_ribbon(data=pred_consumption,
            aes(time,
                ymin=lower,
                ymax=upper),
            fill="red",
            alpha=0.2)+

theme_minimal()+
labs(title="Pronóstico de Consumo",
     x="Tiempo",
     y="Valor")

ANALISIS MACROECONOMICO COMPLETO CON VECM PIB – INFLACION – DESEMPLEO (COLOMBIA)

###############################################################
# ANALISIS MACROECONOMICO COMPLETO CON VECM
# PIB – INFLACION – DESEMPLEO (COLOMBIA)
# DATOS DEL BANCO MUNDIAL
###############################################################

############################
# 1. LIBRERIAS
############################

# instalar paquetes si es necesario
#install.packages(c(
#"WDI","vars","urca","tseries",
#"ggplot2","dplyr","tidyr",
#"forecast","reshape2"
#))

library(WDI)
library(vars)
library(urca)
library(tseries)
library(ggplot2)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## 
## Adjuntando el paquete: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
library(forecast)
library(reshape2)

############################
# 2. DESCARGA DE DATOS
############################

# Indicadores del Banco Mundial
# PIB crecimiento -> NY.GDP.MKTP.KD.ZG
# Inflación -> FP.CPI.TOTL.ZG
# Desempleo -> SL.UEM.TOTL.ZS

data <- WDI(
country="CO",
indicator=c(
gdp="NY.GDP.MKTP.KD.ZG",
inflation="FP.CPI.TOTL.ZG",
unemployment="SL.UEM.TOTL.ZS"
),
start=1991,
end=2023
)

# ordenar por año
data <- data %>%
arrange(year)

# eliminar valores faltantes
data <- na.omit(data)

############################
# 3. EXPLORACION DE DATOS
############################

summary(data)
##    country             iso2c              iso3c                year     
##  Length:33          Length:33          Length:33          Min.   :1991  
##  Class :character   Class :character   Class :character   1st Qu.:1999  
##  Mode  :character   Mode  :character   Mode  :character   Median :2007  
##                                                           Mean   :2007  
##                                                           3rd Qu.:2015  
##                                                           Max.   :2023  
##       gdp           inflation       unemployment  
##  Min.   :-7.186   Min.   : 2.017   Min.   : 7.80  
##  1st Qu.: 2.056   1st Qu.: 3.523   1st Qu.: 9.36  
##  Median : 3.430   Median : 6.352   Median :11.06  
##  Mean   : 3.399   Mean   : 9.708   Mean   :11.60  
##  3rd Qu.: 5.202   3rd Qu.:11.736   3rd Qu.:13.22  
##  Max.   :10.801   Max.   :30.388   Max.   :20.52
# convertir formato largo para graficar
data_long <- pivot_longer(
data,
cols=c(gdp,inflation,unemployment),
names_to="variable",
values_to="value"
)

############################
# 4. GRAFICA DE SERIES
############################

ggplot(data_long,aes(year,value,color=variable))+

geom_line(size=1.2)+
geom_point()+

theme_minimal()+

labs(
title="Colombia: PIB, Inflación y Desempleo",
x="Año",
y="Porcentaje",
color="Variable"
)

# INTERPRETACION
# Esta gráfica permite observar:
# tendencia macroeconómica
# volatilidad de inflación
# dinámica del desempleo

############################
# 5. TEST DE ESTACIONARIEDAD
############################

# Augmented Dickey Fuller

adf.test(data$gdp)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$gdp
## Dickey-Fuller = -2.5994, Lag order = 3, p-value = 0.3412
## alternative hypothesis: stationary
adf.test(data$inflation)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$inflation
## Dickey-Fuller = -0.57268, Lag order = 3, p-value = 0.9708
## alternative hypothesis: stationary
adf.test(data$unemployment)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data$unemployment
## Dickey-Fuller = -2.8596, Lag order = 3, p-value = 0.2406
## alternative hypothesis: stationary
# INTERPRETACION
# p-value > 0.05
# no estacionaria
# presencia de raíz unitaria

############################
# 6. DIFERENCIAS (si necesario)
############################

dgdp <- diff(data$gdp)
dinflation <- diff(data$inflation)
dunemployment <- diff(data$unemployment)

adf.test(dgdp)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  dgdp
## Dickey-Fuller = -2.8567, Lag order = 3, p-value = 0.2423
## alternative hypothesis: stationary
adf.test(dinflation)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  dinflation
## Dickey-Fuller = -3.1555, Lag order = 3, p-value = 0.1271
## alternative hypothesis: stationary
adf.test(dunemployment)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  dunemployment
## Dickey-Fuller = -2.4266, Lag order = 3, p-value = 0.4082
## alternative hypothesis: stationary
# INTERPRETACION
# si p-value < 0.05
# la serie diferenciada es estacionaria

############################
# 7. SELECCION DE REZAGOS
############################

lag_selection <- VARselect(
data[,c("gdp","inflation","unemployment")],
lag.max=8,
type="const"
)

lag_selection
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      8      8      8      7 
## 
## $criteria
##                1          2          3          4          5         6
## AIC(n)  4.322959   4.672485   5.085822   5.083006   5.027411  3.025820
## HQ(n)   4.485230   4.956459   5.491499   5.610387   5.676495  3.796607
## SC(n)   4.908019   5.696341   6.548473   6.984453   7.367653  5.804857
## FPE(n) 76.041481 112.008834 186.366628 226.115295 309.963244 85.039029
##                 7    8
## AIC(n) -3.3400285 -Inf
## HQ(n)  -2.4475386 -Inf
## SC(n)  -0.1221963 -Inf
## FPE(n)  0.6939034  NaN
# INTERPRETACION
# elegir el número de rezagos según
# AIC
# HQ
# SC (BIC)

############################
# 8. TEST DE COINTEGRACION
############################

johansen_test <- ca.jo(

data[,c("gdp","inflation","unemployment")],

type="trace",
ecdet="const",
K=2
)

summary(johansen_test)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , without linear trend and constant in cointegration 
## 
## Eigenvalues (lambda):
## [1] 2.962125e-01 2.838949e-01 1.914693e-01 1.110223e-16
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 2 |  6.59  7.52  9.24 12.97
## r <= 1 | 16.94 17.85 19.96 24.60
## r = 0  | 27.83 32.00 34.91 41.07
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                     gdp.l2 inflation.l2 unemployment.l2    constant
## gdp.l2           1.0000000    1.0000000        1.000000   1.0000000
## inflation.l2    -0.1423303    0.3031132       -0.897852   0.4418598
## unemployment.l2  0.1771976    0.6833280        7.498285   0.9152008
## constant        -4.6255061  -12.8290515      -81.912057 -26.5540318
## 
## Weights W:
## (This is the loading matrix)
## 
##                     gdp.l2 inflation.l2 unemployment.l2      constant
## gdp.d          -0.44778545  -0.42190505     0.044257513  8.082282e-17
## inflation.d     0.47679690  -0.29071983     0.005507301  1.711718e-16
## unemployment.d -0.01700483   0.05290823    -0.038758829 -7.143724e-19
# INTERPRETACION
# Si estadístico trace > valor crítico
# existe cointegración
# relación de largo plazo entre variables

############################
# 9. ESTIMACION VECM
############################

vecm_model <- cajorls(
johansen_test,
r=1
)

summary(vecm_model$rlm)
## Response gdp.d :
## 
## Call:
## lm(formula = gdp.d ~ ect1 + gdp.dl1 + inflation.dl1 + unemployment.dl1 - 
##     1, data = data.mat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3928  -1.4165  -0.6916   0.7237  10.5414 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## ect1              -0.4478     0.4485  -0.998   0.3270  
## gdp.dl1           -0.6044     0.3081  -1.961   0.0602 .
## inflation.dl1     -0.3876     0.3880  -0.999   0.3268  
## unemployment.dl1   0.1293     0.4685   0.276   0.7847  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.758 on 27 degrees of freedom
## Multiple R-squared:  0.4073, Adjusted R-squared:  0.3195 
## F-statistic: 4.639 on 4 and 27 DF,  p-value: 0.005571
## 
## 
## Response inflation.d :
## 
## Call:
## lm(formula = inflation.d ~ ect1 + gdp.dl1 + inflation.dl1 + unemployment.dl1 - 
##     1, data = data.mat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.288 -1.546 -0.137  0.801  4.708 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## ect1              0.47680    0.26263   1.815  0.08057 . 
## gdp.dl1           0.50518    0.18043   2.800  0.00933 **
## inflation.dl1    -0.07514    0.22721  -0.331  0.74341   
## unemployment.dl1  0.25983    0.27433   0.947  0.35197   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.201 on 27 degrees of freedom
## Multiple R-squared:  0.317,  Adjusted R-squared:  0.2158 
## F-statistic: 3.133 on 4 and 27 DF,  p-value: 0.03071
## 
## 
## Response unemployment.d :
## 
## Call:
## lm(formula = unemployment.d ~ ect1 + gdp.dl1 + inflation.dl1 + 
##     unemployment.dl1 - 1, data = data.mat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4471 -0.8669 -0.1127  0.4708  5.7253 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## ect1              -0.0170     0.2556  -0.067    0.947
## gdp.dl1           -0.1543     0.1756  -0.879    0.387
## inflation.dl1      0.0437     0.2211   0.198    0.845
## unemployment.dl1   0.0508     0.2670   0.190    0.851
## 
## Residual standard error: 2.142 on 27 degrees of freedom
## Multiple R-squared:  0.1103, Adjusted R-squared:  -0.02146 
## F-statistic: 0.8372 on 4 and 27 DF,  p-value: 0.5136
# INTERPRETACION ECONOMETRICA

# El término de corrección de error (ECT)
# muestra la velocidad de ajuste hacia
# el equilibrio de largo plazo.

# Si el coeficiente es:
# negativo
# estadísticamente significativo

# entonces el sistema converge al equilibrio.

############################
# 10. CONVERSION A VAR
############################

var_model <- vec2var(
johansen_test,
r=1
)

summary(var_model)
##               Length Class  Mode   
## deterministic   3    -none- numeric
## A               2    -none- list   
## p               1    -none- numeric
## K               1    -none- numeric
## y              99    -none- numeric
## obs             1    -none- numeric
## totobs          1    -none- numeric
## call            3    -none- call   
## vecm            1    ca.jo  S4     
## datamat       310    -none- numeric
## resid          93    -none- numeric
## r               1    -none- numeric
# INTERPRETACION

# Si p-value < 0.05
# se rechaza H0

# La variable tiene capacidad predictiva
# sobre la otra variable.

############################
# 12. IMPULSE RESPONSE FUNCTIONS
############################

irf_model <- irf(

var_model,

impulse="inflation",
response="gdp",

n.ahead=10,
boot=TRUE
)

plot(irf_model)

# INTERPRETACION

# Un shock en inflación
# muestra el efecto dinámico
# sobre el crecimiento económico.

############################
# 13. DESCOMPOSICION DE VARIANZA
############################

fevd_model <- fevd(
var_model,
n.ahead=10
)

plot(fevd_model)

# INTERPRETACION

# Permite analizar
# qué proporción de la varianza
# del error de pronóstico
# es explicada por cada variable.

############################
# 14. PRONOSTICO
############################

forecast_model <- predict(

var_model,
n.ahead=6,
ci=0.95
)

gdp_forecast <- forecast_model$fcst$gdp
inflation_forecast <- forecast_model$fcst$inflation
unemployment_forecast <- forecast_model$fcst$unemployment

future_years <- max(data$year)+1:6

pred_gdp <- data.frame(
year=future_years,
forecast=gdp_forecast[,1],
lower=gdp_forecast[,2],
upper=gdp_forecast[,3]
)

############################
# 15. GRAFICA DE PRONOSTICO
############################

ggplot()+

geom_line(
data=data,
aes(year,gdp),
color="blue",
size=1
)+

geom_line(
data=pred_gdp,
aes(year,forecast),
color="darkblue",
linetype="dashed",
size=1.2
)+

geom_ribbon(
data=pred_gdp,
aes(year,
ymin=lower,
ymax=upper),
fill="blue",
alpha=0.2
)+

theme_minimal()+

labs(
title="Pronóstico del crecimiento del PIB",
x="Año",
y="Crecimiento %"
)

###############################################################
# FIN DEL ANALISIS ECONOMETRICO
###############################################################