Taller 2

Cargamos librerias necesarias

# 1. Cargar las librerías necesarias
library(tseries)

## Warning: package 'tseries' was built under R version 4.3.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(forecast)

## Warning: package 'forecast' was built under R version 4.3.3

library(urca)

## Warning: package 'urca' was built under R version 4.3.3

library(car)

## Warning: package 'car' was built under R version 4.3.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.3.3

library(readxl)

Cargamos la base

###Cargar datos
datos <- read.csv2("C:/Users/karsu/OneDrive - dane.gov.co/Desktop/LUIS CARLOS/PROYECTO DE GRADO/DATOS/DATA_SERIE2.csv")
head(datos)

##    AÑO MES SECUESTRO EXTORSION
## 1 2015   1        20       652
## 2 2015   2        15       650
## 3 2015   3        18       574
## 4 2015   4         9       595
## 5 2015   5        14       609
## 6 2015   6        17       549

class(datos)

## [1] "data.frame"

dim(datos)

## [1] 120   4

logDPI<-log(datos$SECUESTRO) ###SECUESTRO
logPCE<-log(datos$EXTORSION) ###EXTORSION

#Se crean las series de tiempo
x<- ts(logDPI, st = c (2015,1),freq=12)
y<- ts(logPCE, st = c (2015,1),freq=12)



## Se organiza en una tabla

datos1<-cbind(x,y)
head(datos1)

##                 x        y
## Jan 2015 2.995732 6.480045
## Feb 2015 2.708050 6.476972
## Mar 2015 2.890372 6.352629
## Apr 2015 2.197225 6.388561
## May 2015 2.639057 6.411818
## Jun 2015 2.833213 6.308098

class(datos1)

## [1] "mts"    "ts"     "matrix" "array"

### Se grafican
ts.plot(datos1, col=c("blue","red"), main = "Tendencia")

#Generar Modelo
modelo1<-lm(y~x)
summary(modelo1)

## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.66951 -0.21788  0.07223  0.26842  0.70723 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.49589    0.21913   25.08  < 2e-16 ***
## x            0.36331    0.08257    4.40 2.39e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4185 on 118 degrees of freedom
## Multiple R-squared:  0.141,  Adjusted R-squared:  0.1337 
## F-statistic: 19.36 on 1 and 118 DF,  p-value: 2.385e-05

residualesmodelo1<-modelo1$residuals
summary(residualesmodelo1)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -1.66951 -0.21788  0.07223  0.00000  0.26842  0.70723

library(car)
residualPlots(modelo1,fitted=TRUE)

##            Test stat Pr(>|Test stat|)  
## x             2.2691          0.02509 *
## Tukey test    2.2691          0.02326 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Evaluando la estacionaridad de los residuales
library(tseries)
adf.test(residualesmodelo1)

## Warning in adf.test(residualesmodelo1): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  residualesmodelo1
## Dickey-Fuller = -4.8248, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

#Rechazo H0 por lo tanto los residuales son estacionarios
#las variables estan cointegrada


#Con tendencia
library(urca)
adf.test1<-ur.df(residualesmodelo1,type="trend",selectlags = "AIC")
summary(adf.test1)

## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.98579 -0.13527 -0.00153  0.11832  0.89219 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.298397   0.071115  -4.196 5.40e-05 ***
## z.lag.1     -0.577430   0.097025  -5.951 2.99e-08 ***
## tt           0.004965   0.001092   4.547 1.37e-05 ***
## z.diff.lag   0.030848   0.092494   0.334    0.739    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2762 on 114 degrees of freedom
## Multiple R-squared:  0.2868, Adjusted R-squared:  0.2681 
## F-statistic: 15.28 on 3 and 114 DF,  p-value: 2.01e-08
## 
## 
## Value of test-statistic is: -5.9513 11.8477 17.7535 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2  6.22  4.75  4.07
## phi3  8.43  6.49  5.47

adf.test1@teststat

##                tau3     phi2     phi3
## statistic -5.951326 11.84772 17.75352

adf.test1@cval

##       1pct  5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2  6.22  4.75  4.07
## phi3  8.43  6.49  5.47

### -5.9513 es menor -3.43, no esta zona de rechazo por tanto rechazo H0, la serie es estacionaria



#con constante
adf.test2<-ur.df(residualesmodelo1,type="drift",selectlags = "AIC")
summary(adf.test2)

## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.92743 -0.13883 -0.00401  0.17106  1.08741 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.003569   0.027522   0.130 0.897061    
## z.lag.1     -0.255474   0.071784  -3.559 0.000542 ***
## z.diff.lag  -0.125170   0.092951  -1.347 0.180747    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2989 on 115 degrees of freedom
## Multiple R-squared:  0.1575, Adjusted R-squared:  0.1428 
## F-statistic: 10.75 on 2 and 115 DF,  p-value: 5.254e-05
## 
## 
## Value of test-statistic is: -3.5589 6.3484 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81

#-3.5589 menor a -2.88, cae en zona de no rechazo por tanto rechazo H0
#entonces la serie es estacionaria 

#con constante y tendencia
adf.test3<-ur.df(residualesmodelo1,type="none",selectlags = "AIC")
summary(adf.test3)

## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.92396 -0.13529 -0.00046  0.17450  1.09090 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## z.lag.1    -0.25559    0.07147  -3.576  0.00051 ***
## z.diff.lag -0.12497    0.09254  -1.350  0.17952    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2976 on 116 degrees of freedom
## Multiple R-squared:  0.1575, Adjusted R-squared:  0.143 
## F-statistic: 10.84 on 2 and 116 DF,  p-value: 4.824e-05
## 
## 
## Value of test-statistic is: -3.5761 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62

#-3.5761  menor a -2.88, cae en zona de no rechazo por tanto rechazo H0
#entonces la serie es estacionaria

# Prueba de phillips - ouliaris para cointegración
#Con p2 no importa cual variable esta al lado izquierdo de la ecuación
prueba.PO<-ca.po(datos1,type="Pz")
summary(prueba.PO)

## 
## ######################################## 
## # Phillips and Ouliaris Unit Root Test # 
## ######################################## 
## 
## Test of type Pz 
## detrending of series none 
## 
## Response x :
## 
## Call:
## lm(formula = x ~ zr - 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.84011 -0.24009  0.03831  0.24412  0.96646 
## 
## Coefficients:
##     Estimate Std. Error t value Pr(>|t|)    
## zrx  0.37488    0.08703   4.308 3.45e-05 ***
## zry  0.25337    0.03570   7.097 1.06e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4074 on 117 degrees of freedom
## Multiple R-squared:  0.9768, Adjusted R-squared:  0.9764 
## F-statistic:  2460 on 2 and 117 DF,  p-value: < 2.2e-16
## 
## 
## Response y :
## 
## Call:
## lm(formula = y ~ zr - 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83070 -0.09372  0.01805  0.09718  1.95513 
## 
## Coefficients:
##     Estimate Std. Error t value Pr(>|t|)    
## zrx  0.03370    0.06219   0.542    0.589    
## zry  0.98626    0.02551  38.660   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2911 on 117 degrees of freedom
## Multiple R-squared:  0.998,  Adjusted R-squared:  0.998 
## F-statistic: 2.924e+04 on 2 and 117 DF,  p-value: < 2.2e-16
## 
## 
## 
## Value of test-statistic is: 124.6793 
## 
## Critical values of Pz are:
##                   10pct    5pct    1pct
## critical values 33.9267 40.8217 55.1911

#Si el test estadistico es menor al valor critico 5%
# las variables test-statistic=124.6793 mayor al valor critico=40.8217
#Por lo tanto las variables estan cointegradas
prueba.PU<-ca.po(datos1,type="Pu")
summary(prueba.PU)

## 
## ######################################## 
## # Phillips and Ouliaris Unit Root Test # 
## ######################################## 
## 
## Test of type Pu 
## detrending of series none 
## 
## 
## Call:
## lm(formula = z[, 1] ~ z[, -1] - 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.78069 -0.22738  0.04563  0.31464  0.81534 
## 
## Coefficients:
##         Estimate Std. Error t value Pr(>|t|)    
## z[, -1] 0.405376   0.006086   66.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4307 on 119 degrees of freedom
## Multiple R-squared:  0.9739, Adjusted R-squared:  0.9737 
## F-statistic:  4436 on 1 and 119 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: 122.694 
## 
## Critical values of Pu are:
##                   10pct    5pct    1pct
## critical values 20.3933 25.9711 38.3413

#Si el test estadistico es menor al valor critico 5%
# las variables test-statistic=122.694  mayor al valor critico=25.9711
#Por lo tanto las variables estan cointegradas

###################### MODELO DE CORRECCION DE ERRORES ###################
#Segundo paso: Generar el modelo de correción de errores
#Generar la variable tendencia para agregarla al modelo y revisar si
#genera residuales estacionarios

tendencia <- seq_along(y)
tendencia

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120

modelo2<-lm(y~tendencia+x)
summary(modelo2)

## 
## Call:
## lm(formula = y ~ tendencia + x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3848 -0.1587  0.0440  0.1694  0.6335 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.4011090  0.1577704   34.23  < 2e-16 ***
## tendencia   0.0086484  0.0008195   10.55  < 2e-16 ***
## x           0.1993610  0.0613501    3.25  0.00151 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3008 on 117 degrees of freedom
## Multiple R-squared:  0.5599, Adjusted R-squared:  0.5524 
## F-statistic: 74.42 on 2 and 117 DF,  p-value: < 2.2e-16

modelo2

## 
## Call:
## lm(formula = y ~ tendencia + x)
## 
## Coefficients:
## (Intercept)    tendencia            x  
##    5.401109     0.008648     0.199361

residualesmodelo2 <- modelo2$residuals
summary(residualesmodelo2)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.3848 -0.1587  0.0440  0.0000  0.1694  0.6335

residualPlots(modelo2)

##            Test stat Pr(>|Test stat|)
## tendencia     0.2006           0.8414
## x             0.1071           0.9149
## Tukey test   -1.0174           0.3090

modelo2

## 
## Call:
## lm(formula = y ~ tendencia + x)
## 
## Coefficients:
## (Intercept)    tendencia            x  
##    5.401109     0.008648     0.199361

# Probar que los residuales son estacionarios
# Prueba Dickey-fuller para revisar estacionariedad 
adf.test2 <- ur.df(residualesmodelo2, type="none",selectlags = "AIC")
summary(adf.test2)

## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.07596 -0.08916  0.00829  0.10786  1.05054 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## z.lag.1    -0.54397    0.09072  -5.996 2.34e-08 ***
## z.diff.lag  0.07369    0.09113   0.809     0.42    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2551 on 116 degrees of freedom
## Multiple R-squared:  0.2649, Adjusted R-squared:  0.2522 
## F-statistic:  20.9 on 2 and 116 DF,  p-value: 1.773e-08
## 
## 
## Value of test-statistic is: -5.9962 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62

##-5.9962  es menor que -1.95 Acepto H0:
# Los residuales son estacionarios

##Cuantas diferencias se necesitan

library(forecast)
ndiffs(x)

## [1] 1

ndiffs(y)

## [1] 1

#Para calcular el modelo de correción de errores se generan las diferencias
# y el error con un rezago

dy<-diff(y)
dx<-diff(x)

#si una variable no es significativa se elimina
modelo3<-lm(dy~dx)
summary(modelo3)

## 
## Call:
## lm(formula = dy ~ dx)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79550 -0.10009 -0.00650  0.08702  1.87983 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.005359   0.026532   0.202    0.840
## dx          0.070146   0.054710   1.282    0.202
## 
## Residual standard error: 0.2894 on 117 degrees of freedom
## Multiple R-squared:  0.01386,    Adjusted R-squared:  0.005427 
## F-statistic: 1.644 on 1 and 117 DF,  p-value: 0.2023

#Se generan los residuales
res3<-modelo3$residuals
# Se calcula un rezago en los residuales
res3_1 <- lag(res3) #Un rezago

# Se genera el modelo de correcion de errores
modelo4<-lm(dy~dx+res3_1)
summary(modelo4)

## Warning in summary.lm(modelo4): essentially perfect fit: summary may be
## unreliable

## 
## Call:
## lm(formula = dy ~ dx + res3_1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -9.389e-17 -1.153e-17 -2.050e-18  4.060e-18  5.317e-16 
## 
## Coefficients:
##              Estimate Std. Error   t value Pr(>|t|)    
## (Intercept) 5.359e-03  4.861e-18 1.102e+15   <2e-16 ***
## dx          7.015e-02  1.002e-17 6.998e+15   <2e-16 ***
## res3_1      1.000e+00  1.694e-17 5.904e+16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.303e-17 on 116 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.767e+33 on 2 and 116 DF,  p-value: < 2.2e-16

MCE1 <- modelo4
summary(MCE1)

## Warning in summary.lm(MCE1): essentially perfect fit: summary may be unreliable

## 
## Call:
## lm(formula = dy ~ dx + res3_1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -9.389e-17 -1.153e-17 -2.050e-18  4.060e-18  5.317e-16 
## 
## Coefficients:
##              Estimate Std. Error   t value Pr(>|t|)    
## (Intercept) 5.359e-03  4.861e-18 1.102e+15   <2e-16 ***
## dx          7.015e-02  1.002e-17 6.998e+15   <2e-16 ***
## res3_1      1.000e+00  1.694e-17 5.904e+16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.303e-17 on 116 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.767e+33 on 2 and 116 DF,  p-value: < 2.2e-16

## Veamos la estacionariedad de las series:

library(tseries)
#Prueba Phillips-Perron
pp.test(y)

## Warning in pp.test(y): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  y
## Dickey-Fuller Z(alpha) = -49.354, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary

# p-value = 0.01 La serie es estacionaria
pp.test(x)

## Warning in pp.test(x): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  x
## Dickey-Fuller Z(alpha) = -77.046, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary

# p-value = 0.01 La serie es estacionaria


#Veamos la cointegración
#Test Phillips-Ouliaris Cointegration 
po.test(cbind(x,y))

## Warning in po.test(cbind(x, y)): p-value smaller than printed p-value

## 
##  Phillips-Ouliaris Cointegration Test
## 
## data:  cbind(x, y)
## Phillips-Ouliaris demeaned = -72.333, Truncation lag parameter = 1,
## p-value = 0.01

#H0: No Cointegradas
#p-value = 0.01 acepto H0 por tanto las variables estan cointegradas

#Primer paso estimar el modelo:

m1 <- lm(y~x)
summary(m1)

## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.66951 -0.21788  0.07223  0.26842  0.70723 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.49589    0.21913   25.08  < 2e-16 ***
## x            0.36331    0.08257    4.40 2.39e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4185 on 118 degrees of freedom
## Multiple R-squared:  0.141,  Adjusted R-squared:  0.1337 
## F-statistic: 19.36 on 1 and 118 DF,  p-value: 2.385e-05

# Segundo paso: determinar si los residuas del modelo son I(0)
resm1<-resid(m1)
library(tseries)
pp.test(resm1)

## Warning in pp.test(resm1): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  resm1
## Dickey-Fuller Z(alpha) = -62.827, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary

Box.test(resm1,type = "Ljung-Box", lag=5) #Ho: independencia

## 
##  Box-Ljung test
## 
## data:  resm1
## X-squared = 175.1, df = 5, p-value < 2.2e-16

# Ahora estimamos el modelo
n <- length(y)
MCE2<-lm(diff(y)~diff(x)+lag(resm1[1:(n-1)],1)) #se incluyen 5 rezagos
summary(MCE2)

## 
## Call:
## lm(formula = diff(y) ~ diff(x) + lag(resm1[1:(n - 1)], 1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.01646 -0.09888  0.02941  0.12254  1.44924 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               0.004379   0.025123   0.174 0.861938    
## diff(x)                   0.109674   0.052831   2.076 0.040111 *  
## lag(resm1[1:(n - 1)], 1) -0.235283   0.061776  -3.809 0.000225 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.274 on 116 degrees of freedom
## Multiple R-squared:  0.1235, Adjusted R-squared:  0.1084 
## F-statistic:  8.17 on 2 and 116 DF,  p-value: 0.0004793

Box.test(resid(MCE2)) #hay independencia

## 
##  Box-Pierce test
## 
## data:  resid(MCE2)
## X-squared = 0.1072, df = 1, p-value = 0.7433

# Se ha corregido la estructura 

## Tenemos el efecto a largo plazo

summary(m1)

## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.66951 -0.21788  0.07223  0.26842  0.70723 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.49589    0.21913   25.08  < 2e-16 ***
## x            0.36331    0.08257    4.40 2.39e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4185 on 118 degrees of freedom
## Multiple R-squared:  0.141,  Adjusted R-squared:  0.1337 
## F-statistic: 19.36 on 1 and 118 DF,  p-value: 2.385e-05

summary(MCE1)

## Warning in summary.lm(MCE1): essentially perfect fit: summary may be unreliable

## 
## Call:
## lm(formula = dy ~ dx + res3_1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -9.389e-17 -1.153e-17 -2.050e-18  4.060e-18  5.317e-16 
## 
## Coefficients:
##              Estimate Std. Error   t value Pr(>|t|)    
## (Intercept) 5.359e-03  4.861e-18 1.102e+15   <2e-16 ***
## dx          7.015e-02  1.002e-17 6.998e+15   <2e-16 ***
## res3_1      1.000e+00  1.694e-17 5.904e+16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.303e-17 on 116 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.767e+33 on 2 and 116 DF,  p-value: < 2.2e-16

summary(MCE2)

## 
## Call:
## lm(formula = diff(y) ~ diff(x) + lag(resm1[1:(n - 1)], 1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.01646 -0.09888  0.02941  0.12254  1.44924 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               0.004379   0.025123   0.174 0.861938    
## diff(x)                   0.109674   0.052831   2.076 0.040111 *  
## lag(resm1[1:(n - 1)], 1) -0.235283   0.061776  -3.809 0.000225 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.274 on 116 degrees of freedom
## Multiple R-squared:  0.1235, Adjusted R-squared:  0.1084 
## F-statistic:  8.17 on 2 and 116 DF,  p-value: 0.0004793

Pronósticar año siguiente y comparar con los valores reales

# 2. Datos reales para 2025 (solo para comparación, no para modelado)
datos_2025 <- data.frame(
  MES = c(1, 2, 3),
  SECUESTRO = c(21, 28, 35),
  EXTORSION = c(954, 965, 811)
)

# Transformar los datos reales al formato logarítmico (solo para comparación)
logDPI_2025 <- log(datos_2025$SECUESTRO)
logPCE_2025 <- log(datos_2025$EXTORSION)

# Crear series de tiempo para los datos reales (solo para comparación)
x_2025_real <- ts(logDPI_2025, start = c(2025, 1), frequency = 12)
y_2025_real <- ts(logPCE_2025, start = c(2025, 1), frequency = 12)

# 3. Pronóstico de x (log(SECUESTRO)) con una tendencia simple basada en históricos
trend <- seq_along(x)
m1_x <- lm(x ~ trend)
last_x <- x[length(x)]  # Último valor histórico de x
x_forecast <- numeric(12)
x_forecast[1] <- last_x  # Comenzar con el último valor histórico
for (i in 2:12) {
  new_trend <- length(x) + (i - 1)  # Extender la tendencia
  x_forecast[i] <- predict(m1_x, newdata = data.frame(trend = new_trend))
}
x_forecast_ts <- ts(x_forecast, start = c(2025, 1), frequency = 12)

# 4. Pronóstico de y (log(EXTORSION)) usando MCE2
n <- length(y)
last_x <- x[n]
last_y <- y[n]
last_resm1 <- resm1[n-1]

# Vectores para almacenar pronósticos
forecast_y <- numeric(12)
forecast_y[1] <- last_y  # Comenzar con el último valor histórico

# Coeficientes de MCE2 (usar los ya definidos en tu modelo)
coef_mce2 <- coef(MCE2)
intercept_mce2 <- coef_mce2[1]
coef_dx <- coef_mce2[2]
coef_reslag <- coef_mce2[3]

# Pronosticar para todo 2025
current_y <- last_y
current_res <- last_resm1

for (i in 2:12) {
  # Calcular diff(x) usando el pronóstico de x
  dx_i <- x_forecast_ts[i] - x_forecast_ts[i-1]
  # Calcular diff(y) usando MCE2
  dy_i <- intercept_mce2 + coef_dx * dx_i + coef_reslag * current_res
  forecast_y[i] <- current_y + dy_i
  
  # Actualizar residuo
  coef_m1 <- coef(m1)
  current_res <- forecast_y[i] - (coef_m1[1] + coef_m1[2] * x_forecast_ts[i])
  
  # Actualizar valor
  current_y <- forecast_y[i]
}

forecast_y_ts <- ts(forecast_y, start = c(2025, 1), frequency = 12)

# 5. Transformar a escala original
forecast_secuestro <- exp(x_forecast_ts)
forecast_extorsion <- exp(forecast_y_ts)
real_secuestro <- datos_2025$SECUESTRO  # Solo para comparación
real_extorsion <- datos_2025$EXTORSION  # Solo para comparación

# Crear series de tiempo completas para los reales (solo enero-marzo con NA después)
real_secuestro_ts <- ts(c(real_secuestro, rep(NA, 9)), start = c(2025, 1), frequency = 12)
real_extorsion_ts <- ts(c(real_extorsion, rep(NA, 9)), start = c(2025, 1), frequency = 12)

# 6. Calcular intervalos de confianza (95%)
res_mce2 <- resid(MCE2)
sigma <- sd(res_mce2)  # Desviación estándar de los residuos
z_score <- qnorm(0.975)  # Valor crítico para 95% de confianza
ci_secuestro <- exp(x_forecast_ts) + c(rep(NA, 3), sigma * z_score * exp(x_forecast_ts[4:12]) * c(1, 1, 1, 1, 1, 1, 1, 1, 1))  # Aproximación simple
ci_secuestro_lower <- exp(x_forecast_ts) - c(rep(NA, 3), sigma * z_score * exp(x_forecast_ts[4:12]) * c(1, 1, 1, 1, 1, 1, 1, 1, 1))
ci_extorsion <- exp(forecast_y_ts) + c(rep(NA, 3), sigma * z_score * exp(forecast_y_ts[4:12]) * c(1, 1, 1, 1, 1, 1, 1, 1, 1))
ci_extorsion_lower <- exp(forecast_y_ts) - c(rep(NA, 3), sigma * z_score * exp(forecast_y_ts[4:12]) * c(1, 1, 1, 1, 1, 1, 1, 1, 1))

# 7. Comparar pronósticos con valores reales (enero-marzo)
comparacion <- data.frame(
  Mes = c("Enero", "Febrero", "Marzo"),
  SECUESTRO_Real = real_secuestro,
  SECUESTRO_Pronostico = forecast_secuestro[1:3],
  SECUESTRO_Error = real_secuestro - forecast_secuestro[1:3],
  SECUESTRO_Error_Porcentual = ((real_secuestro - forecast_secuestro[1:3]) / real_secuestro) * 100,
  EXTORSION_Real = real_extorsion,
  EXTORSION_Pronostico = forecast_extorsion[1:3],
  EXTORSION_Error = real_extorsion - forecast_extorsion[1:3],
  EXTORSION_Error_Porcentual = ((real_extorsion - forecast_extorsion[1:3]) / real_extorsion) * 100
)

# 8. Calcular métricas de error
MAE_secuestro <- mean(abs(comparacion$SECUESTRO_Error))
RMSE_secuestro <- sqrt(mean(comparacion$SECUESTRO_Error^2))
MAPE_secuestro <- mean(abs(comparacion$SECUESTRO_Error_Porcentual))

MAE_extorsion <- mean(abs(comparacion$EXTORSION_Error))
RMSE_extorsion <- sqrt(mean(comparacion$EXTORSION_Error^2))
MAPE_extorsion <- mean(abs(comparacion$EXTORSION_Error_Porcentual))

# 9. Tabla de pronósticos para todo 2025
meses <- c("Enero", "Febrero", "Marzo", "Abril", "Mayo", "Junio", "Julio", "Agosto", "Septiembre", "Octubre", "Noviembre", "Diciembre")
pronosticos_2025 <- data.frame(
  Mes = meses,
  SECUESTRO_Pronostico = round(forecast_secuestro, 2),
  EXTORSION_Pronostico = round(forecast_extorsion, 2)
)

# 10. Mostrar resultados
cat("\nComparación de Pronósticos vs. Valores Reales (Enero-Marzo 2025):\n")

## 
## Comparación de Pronósticos vs. Valores Reales (Enero-Marzo 2025):

print(comparacion)

##       Mes SECUESTRO_Real SECUESTRO_Pronostico SECUESTRO_Error
## 1   Enero             21             27.00000        -6.00000
## 2 Febrero             28             16.74304        11.25696
## 3   Marzo             35             16.79977        18.20023
##   SECUESTRO_Error_Porcentual EXTORSION_Real EXTORSION_Pronostico
## 1                  -28.57143            954            1260.0000
## 2                   40.20342            965            1099.5660
## 3                   52.00064            811             986.1247
##   EXTORSION_Error EXTORSION_Error_Porcentual
## 1       -306.0000                  -32.07547
## 2       -134.5660                  -13.94466
## 3       -175.1247                  -21.59367

cat("\nMétricas de Error para SECUESTRO (Enero-Marzo):\n")

## 
## Métricas de Error para SECUESTRO (Enero-Marzo):

cat("MAE: ", MAE_secuestro, "\n")

## MAE:  11.81906

cat("RMSE: ", RMSE_secuestro, "\n")

## RMSE:  12.83183

cat("MAPE: ", MAPE_secuestro, "%\n")

## MAPE:  40.2585 %

cat("\nMétricas de Error para EXTORSION (Enero-Marzo):\n")

## 
## Métricas de Error para EXTORSION (Enero-Marzo):

cat("MAE: ", MAE_extorsion, "\n")

## MAE:  205.2302

cat("RMSE: ", RMSE_extorsion, "\n")

## RMSE:  217.8781

cat("MAPE: ", MAPE_extorsion, "%\n")

## MAPE:  22.53793 %

cat("\nPronósticos para todo 2025:\n")

## 
## Pronósticos para todo 2025:

print(pronosticos_2025)

##           Mes SECUESTRO_Pronostico EXTORSION_Pronostico
## 1       Enero                27.00              1260.00
## 2     Febrero                16.74              1099.57
## 3       Marzo                16.80               986.12
## 4       Abril                16.86               907.60
## 5        Mayo                16.91               852.04
## 6       Junio                16.97               812.10
## 7       Julio                17.03               783.05
## 8      Agosto                17.09               761.75
## 9  Septiembre                17.14               746.08
## 10    Octubre                17.20               734.52
## 11  Noviembre                17.26               726.01
## 12  Diciembre                17.32               719.78

# 11. Graficar resultados directamente en la ventana de R con intervalos de confianza
# Gráfico para SECUESTRO
plot(real_secuestro_ts, 
     col = "blue", main = "SECUESTRO: Pronósticos vs. Reales 2025", 
     ylab = "SECUESTRO", xlab = "Mes", ylim = c(15, 40), type = "l")
lines(forecast_secuestro, col = "red")
lines(ts(ci_secuestro, start = c(2025, 1), frequency = 12), col = "green", lty = 2)
lines(ts(ci_secuestro_lower, start = c(2025, 1), frequency = 12), col = "green", lty = 2)
legend("topright", legend = c("Real (Ene-Mar)", "Pronóstico", "IC 95%"), col = c("blue", "red", "green"), lty = c(1, 1, 2))

# Gráfico para EXTORSION
plot(real_extorsion_ts, 
     col = "blue", main = "EXTORSION: Pronósticos vs. Reales 2025", 
     ylab = "EXTORSION", xlab = "Mes", ylim = c(700, 1000), type = "l")
lines(forecast_extorsion, col = "red")
lines(ts(ci_extorsion, start = c(2025, 1), frequency = 12), col = "green", lty = 2)
lines(ts(ci_extorsion_lower, start = c(2025, 1), frequency = 12), col = "green", lty = 2)
legend("topright", legend = c("Real (Ene-Mar)", "Pronóstico", "IC 95%"), col = c("blue", "red", "green"), lty = c(1, 1, 2))

Conclusiones

Las variables SECUESTRO y EXTORSION están cointegradas, como lo demuestran las pruebas Phillips-Ouliaris (Pz y Pu) con valores estadísticos (124.6793 y 122.694) superiores a los valores críticos al 5%, indicando una relación de largo plazo.
Los residuales del modelo m1 (y del modelo con tendencia modelo2) son estacionarios, según las pruebas Dickey-Fuller y Phillips-Perron, lo que respalda la validez del modelo de corrección de errores.
El modelo modelo2 que incluye tendencia mejora el R-cuadrado ajustado (0.5524) en comparación con m1 (0.1337), indicando que la tendencia temporal es un factor importante en la relación.
La prueba Box-Pierce en MCE2 (p = 0.7433) indica que los residuales son independientes, validando la especificación del modelo.
Los pronósticos de EXTORSION muestran una disminución después de marzo (de 986.12 a 719.78), lo que podría indicar una estabilización o reducción en la actividad delictiva proyectada.

Taller 2

LUIS SUAREZ

Cargamos librerias necesarias

Cargamos la base