Se instalan las librerías:

#library(tidyverse)
#library(lubridate)
#library(car)
#library(urca)
#library(tseries)
#library(aTSA)
#library(forecast)
#library(foreign)
#library(timsac)
#library(vars)
#library(mFilter)
#library(dynlm)
#library(readxl)

Se carga la base de datos:

coint <- read_excel("Cointegración en R.xls")
View(coint)
attach(coint)
names(coint)
## [1] "Year"     "Quarter"  "tiempo"   "DPI"      "GDP"      "PCE"      "CP"      
## [8] "DIVIDEND"
class(coint)
## [1] "tbl_df"     "tbl"        "data.frame"

Gráfico de la serie de datos:

plot(PCE, type="l")

Crear variables de series de tiempo

GDP.ts = ts(GDP, start=c(1974,1), end=c(2007,4), frequency = 4)
PCE.ts = ts(PCE, start=c(1974,1), end=c(2007,4), frequency = 4)

datos1=cbind(GDP.ts, PCE.ts)
plot(cbind(GDP.ts, PCE.ts))

Como se puede ver en el gráfico, ambas series de tiempo presentan un comportamiento similar a lo largo del tiempo. Por lo cual se realizará una prueba de cointegración para analizar la relación entre ambas a lo largo del tiempo.

Cointegracion

Generar modelo

cor(PCE, GDP)
## [1] 0.9991024
modelo1 = lm(PCE.ts ~ GDP.ts )
a <- VARselect(datos1,lag.max = 10,type="const");a$selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      2      2      2      2

El valor más repetido es 2, por lo cual este será el valor a utilizar en nuestra p para generar modelos.

modelos = VAR(datos1, p=2)
summary(modelos)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: GDP.ts, PCE.ts 
## Deterministic variables: const 
## Sample size: 134 
## Log Likelihood: -1208.96 
## Roots of the characteristic polynomial:
## 1.006 0.9007 0.2847 0.1399
## Call:
## VAR(y = datos1, p = 2)
## 
## 
## Estimation results for equation GDP.ts: 
## ======================================= 
## GDP.ts = GDP.ts.l1 + PCE.ts.l1 + GDP.ts.l2 + PCE.ts.l2 + const 
## 
##           Estimate Std. Error t value Pr(>|t|)    
## GDP.ts.l1  0.96042    0.11155   8.610 2.19e-14 ***
## PCE.ts.l1  0.74110    0.20287   3.653 0.000375 ***
## GDP.ts.l2 -0.07072    0.11018  -0.642 0.522110    
## PCE.ts.l2 -0.57426    0.20649  -2.781 0.006231 ** 
## const     24.30575   11.28947   2.153 0.033183 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 34.35 on 129 degrees of freedom
## Multiple R-Squared: 0.9991,  Adjusted R-squared: 0.999 
## F-statistic: 3.438e+04 on 4 and 129 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation PCE.ts: 
## ======================================= 
## PCE.ts = GDP.ts.l1 + PCE.ts.l1 + GDP.ts.l2 + PCE.ts.l2 + const 
## 
##           Estimate Std. Error t value Pr(>|t|)    
## GDP.ts.l1  0.05774    0.06454   0.895    0.373    
## PCE.ts.l1  1.09123    0.11737   9.297 4.71e-16 ***
## GDP.ts.l2 -0.07105    0.06375  -1.115    0.267    
## PCE.ts.l2 -0.06658    0.11947  -0.557    0.578    
## const      6.63225    6.53169   1.015    0.312    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 19.87 on 129 degrees of freedom
## Multiple R-Squared: 0.9993,  Adjusted R-squared: 0.9993 
## F-statistic: 4.687e+04 on 4 and 129 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##        GDP.ts PCE.ts
## GDP.ts 1179.6  460.4
## PCE.ts  460.4  394.9
## 
## Correlation matrix of residuals:
##        GDP.ts PCE.ts
## GDP.ts 1.0000 0.6745
## PCE.ts 0.6745 1.0000
residuos <- resid(modelos)

Se obtuvo un p value menor a 0.05, por lo cual los resultados son significativos.

Prueba Dickey Fuller

adf.test(residuos[,1]) 
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag    ADF p.value
## [1,]   0 -12.28    0.01
## [2,]   1  -7.60    0.01
## [3,]   2  -6.48    0.01
## [4,]   3  -5.45    0.01
## [5,]   4  -4.67    0.01
## Type 2: with drift no trend 
##      lag    ADF p.value
## [1,]   0 -12.23    0.01
## [2,]   1  -7.57    0.01
## [3,]   2  -6.45    0.01
## [4,]   3  -5.43    0.01
## [5,]   4  -4.65    0.01
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -12.19    0.01
## [2,]   1  -7.54    0.01
## [3,]   2  -6.43    0.01
## [4,]   3  -5.41    0.01
## [5,]   4  -4.63    0.01
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
y = ur.df(residuos[,1], type = "trend", selectlags = "AIC")
summary(y)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -119.062  -19.577    0.652   19.013  147.063 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.54056    6.04531  -0.089    0.929    
## z.lag.1     -0.98607    0.13083  -7.537 7.72e-12 ***
## tt           0.01280    0.07799   0.164    0.870    
## z.diff.lag  -0.08040    0.08887  -0.905    0.367    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 34.14 on 128 degrees of freedom
## Multiple R-squared:  0.535,  Adjusted R-squared:  0.5241 
## F-statistic:  49.1 on 3 and 128 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -7.5372 18.968 28.4248 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2  6.22  4.75  4.07
## phi3  8.43  6.49  5.47
adf.test(residuos[,2]) 
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag    ADF p.value
## [1,]   0 -11.61    0.01
## [2,]   1  -6.85    0.01
## [3,]   2  -5.54    0.01
## [4,]   3  -5.08    0.01
## [5,]   4  -5.03    0.01
## Type 2: with drift no trend 
##      lag    ADF p.value
## [1,]   0 -11.57    0.01
## [2,]   1  -6.82    0.01
## [3,]   2  -5.52    0.01
## [4,]   3  -5.06    0.01
## [5,]   4  -5.02    0.01
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -11.54    0.01
## [2,]   1  -6.80    0.01
## [3,]   2  -5.50    0.01
## [4,]   3  -5.05    0.01
## [5,]   4  -5.00    0.01
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
y = ur.df(residuos[,2], type = "trend", selectlags = "AIC")
summary(y)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -96.619  -7.803   0.418  10.804  41.779 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.69262    3.48200  -0.199   0.8426    
## z.lag.1     -0.85702    0.12603  -6.800 3.58e-10 ***
## tt           0.01336    0.04492   0.298   0.7666    
## z.diff.lag  -0.15585    0.08825  -1.766   0.0798 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.66 on 128 degrees of freedom
## Multiple R-squared:  0.5187, Adjusted R-squared:  0.5074 
## F-statistic: 45.98 on 3 and 128 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -6.8002 15.443 23.1354 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.99 -3.43 -3.13
## phi2  6.22  4.75  4.07
## phi3  8.43  6.49  5.47

Es importante tomar en consideración:

En este caso el p valor es en ambas pruebas es menor a 0.05, esto nos indica que ambas son estacionarias, por lo cual hay cointegración. Al haber cointegración esto nos indica que hay relación a largo plazo entre las variables.

Philips y Oularis

prueba.P0 = ca.po(datos1, type="Pz")
summary(prueba.P0)
## 
## ######################################## 
## # Phillips and Ouliaris Unit Root Test # 
## ######################################## 
## 
## Test of type Pz 
## detrending of series none 
## 
## Response GDP.ts :
## 
## Call:
## lm(formula = GDP.ts ~ zr - 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -150.279  -18.515    2.796   27.331  141.137 
## 
## Coefficients:
##          Estimate Std. Error t value Pr(>|t|)    
## zrGDP.ts  0.95334    0.03958  24.089   <2e-16 ***
## zrPCE.ts  0.08483    0.06116   1.387    0.168    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37.23 on 133 degrees of freedom
## Multiple R-squared:  0.9999, Adjusted R-squared:  0.9999 
## F-statistic: 5.421e+05 on 2 and 133 DF,  p-value: < 2.2e-16
## 
## 
## Response PCE.ts :
## 
## Call:
## lm(formula = PCE.ts ~ zr - 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -104.417   -8.223    1.965   11.714   48.952 
## 
## Coefficients:
##          Estimate Std. Error t value Pr(>|t|)    
## zrGDP.ts 0.007454   0.021281    0.35    0.727    
## zrPCE.ts 0.996942   0.032886   30.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.02 on 133 degrees of freedom
## Multiple R-squared:  0.9999, Adjusted R-squared:  0.9999 
## F-statistic: 7.855e+05 on 2 and 133 DF,  p-value: < 2.2e-16
## 
## 
## 
## Value of test-statistic is: 13.4923 
## 
## Critical values of Pz are:
##                   10pct    5pct    1pct
## critical values 33.9267 40.8217 55.1911
prueba.P2 = ca.po(datos1, type="Pu")
summary(prueba.P2)
## 
## ######################################## 
## # Phillips and Ouliaris Unit Root Test # 
## ######################################## 
## 
## Test of type Pu 
## detrending of series none 
## 
## 
## Call:
## lm(formula = z[, 1] ~ z[, -1] - 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -185.94  -32.81   29.65   77.98  145.10 
## 
## Coefficients:
##         Estimate Std. Error t value Pr(>|t|)    
## z[, -1]  1.54450    0.00323   478.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 81.14 on 135 degrees of freedom
## Multiple R-squared:  0.9994, Adjusted R-squared:  0.9994 
## F-statistic: 2.286e+05 on 1 and 135 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: 13.4278 
## 
## Critical values of Pu are:
##                   10pct    5pct    1pct
## critical values 20.3933 25.9711 38.3413

Al ser los residuos estacionarios , ya que los valores de p presentan un valor menor a 0.05 significa que hay cointegración.

Corrección de errores

dlnPCE = diff(PCE.ts)
dlnGDP = diff(GDP.ts)

modelo3 = lm(dlnPCE~dlnGDP)
summary(modelo3)
## 
## Call:
## lm(formula = dlnPCE ~ dlnGDP)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -55.171  -6.835  -0.706   7.283  40.057 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.82308    1.58733   4.928 2.42e-06 ***
## dlnGDP       0.36719    0.03443  10.665  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.98 on 133 degrees of freedom
## Multiple R-squared:  0.461,  Adjusted R-squared:  0.4569 
## F-statistic: 113.7 on 1 and 133 DF,  p-value: < 2.2e-16
res3=modelo3$residuals
res3_1 = lag(res3)

MCE = lm(dlnPCE ~ dlnGDP + res3_1)
summary(MCE)
## 
## Call:
## lm(formula = dlnPCE ~ dlnGDP + res3_1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -51.325  -6.831  -0.756   7.618  39.266 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.24990    1.61223   4.497  1.5e-05 ***
## dlnGDP       0.38558    0.03576  10.781  < 2e-16 ***
## res3_1      -0.15209    0.08988  -1.692    0.093 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.91 on 131 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4742, Adjusted R-squared:  0.4661 
## F-statistic: 59.06 on 2 and 131 DF,  p-value: < 2.2e-16
MCE$coefficients[3]
##     res3_1 
## -0.1520933
abs(MCE$coefficients[3])
##    res3_1 
## 0.1520933

Como se puede observar en los resultados, se obtuvo un valor negativo y un valor absoluto menor a 1. Por lo cual si hay cointegración entre ambas series de tiempo en dicho periodo determinado.