Taller 2 Autocorrelación

se instala la libreria y la base de datos:

# Library: Paquetes
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.1     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(AER)
## Warning: package 'AER' was built under R version 4.2.2
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 4.2.2
## Loading required package: survival
library(dynlm)
## Warning: package 'dynlm' was built under R version 4.2.2
library(forecast)
## Warning: package 'forecast' was built under R version 4.2.2
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(readxl)
library(stargazer)
## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(quantmod)
## Warning: package 'quantmod' was built under R version 4.2.2
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.2.2
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.2.2
library(urca)
## Warning: package 'urca' was built under R version 4.2.2
library(readr)
Datos_T2 <- read_csv("C:/Users/Martin Alejandro/OneDrive/Documentos/econometria/eco II/Datos_T2.csv")
## Rows: 52 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (11): YEAR, GASEXP, POP, GASP, INCOME, PNC, PUC, PPT, PD, PN, PS
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Datos_T2)

Dar formato a columna de años, usar formato de datos temporales

Datos_T2 %<>% mutate(YEAR = as.yearqtr(YEAR, format = "%Y:0%q"))

Desarrollo

Punto a

y <- xts(Datos_T2$GASEXP, Datos_T2$YEAR)["1953::2004"] # Gasto total de EE.UU. en gasolina #
X1 <- xts(Datos_T2$INCOME, Datos_T2$YEAR)["1953::2004"] # Ingreso percapita #
Pob <- xts(Datos_T2$POP, Datos_T2$YEAR)["1953::2004"] # Población total de EE. UU en miles #
X2 <- xts(Datos_T2$GASP, Datos_T2$YEAR)["1953::2004"] # Índice de precios de la gasolina #
X3 <- xts(Datos_T2$PNC, Datos_T2$YEAR)["1953::2004"] # Índice de precios de los vehículos nuevos #
X4 <- xts(Datos_T2$PUC, Datos_T2$YEAR)["1953::2004"] # Índice de precios de los coches usados #
X5 <- xts(Datos_T2$PPT, Datos_T2$YEAR)["1953::2004"] # Índice de precios del transporte público #
X6 <- xts(Datos_T2$PD, Datos_T2$YEAR)["1953::2004"] # Índice de precios agregado para bienes de consumo duraderos #
X7 <- xts(Datos_T2$PN, Datos_T2$YEAR)["1953::2004"] # Índice de precios agregado para bienes de consumo no duraderos #
X8 <- xts(Datos_T2$PS, Datos_T2$YEAR)["1953::2004"] # Índice de precios agregado para servicios al consumidor #

Y <- y/Pob # Demanda de gasolina #

Modelo1 <- lm(Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data = Datos_T2)
summary(Modelo1)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data = Datos_T2)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.229e-05 -3.323e-06  2.288e-07  3.615e-06  1.235e-05 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -9.437e-05  1.436e-05  -6.571 5.40e-08 ***
## X1           7.807e-09  9.574e-10   8.155 2.87e-10 ***
## X2           4.812e-06  1.779e-07  27.039  < 2e-16 ***
## X3           6.769e-07  5.118e-07   1.323   0.1930    
## X4           6.059e-07  2.363e-07   2.565   0.0139 *  
## X5          -1.539e-07  2.280e-07  -0.675   0.5033    
## X6          -2.382e-06  5.263e-07  -4.526 4.70e-05 ***
## X7           1.171e-06  6.083e-07   1.925   0.0609 .  
## X8          -9.379e-08  3.551e-07  -0.264   0.7930    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.292e-06 on 43 degrees of freedom
## Multiple R-squared:  0.9994, Adjusted R-squared:  0.9993 
## F-statistic:  9009 on 8 and 43 DF,  p-value: < 2.2e-16
Residuales <- residuals(Modelo1)

# Varianza constante #
(ggplot(Modelo1, aes(fitted.values(Modelo1),Residuales))
  + geom_point(col=I("red"))
  + stat_smooth(method = "loess")
  + stat_binhex(bins = 50)
  + scale_fill_gradient(low = "light blue",high = "black")
  + geom_hline(yintercept = 0,col ="black", linetype = "dashed")
  + xlab("Fitted Values")
  + ylab("Residuals"))
## Don't know how to automatically pick scale for object of type xts/zoo. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type xts/zoo. Defaulting to continuous.
## `geom_smooth()` using formula 'y ~ x'

Punto b

Pruebas de validación

Prueba de Reset Ramsey

resettest(Modelo1, power = 2:3, type = "regressor") 
## 
##  RESET test
## 
## data:  Modelo1
## RESET = 11.514, df1 = 16, df2 = 27, p-value = 3.419e-08

Residuales recursivos

library(strucchange)
## Warning: package 'strucchange' was built under R version 4.2.2
## 
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
## 
##     boundary
cusum<- efp (Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8,type = "OLS-CUSUM")
sctest(Modelo1)
## 
##  M-fluctuation test
## 
## data:  Modelo1
## f(efp) = 1.5351, p-value = 0.1505
plot(cusum)

Leverage

library(olsrr)
## Warning: package 'olsrr' was built under R version 4.2.2
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
library(car)
ols_plot_cooksd_chart(Modelo1)

summary(influence.measures(Modelo1))
## Potentially influential observations of
##   lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data = Datos_T2) :
## 
##      dfb.1_ dfb.X1 dfb.X2 dfb.X3 dfb.X4 dfb.X5  dfb.X6  dfb.X7 dfb.X8  dffit  
## [1,] -0.01   0.01   0.00   0.02  -0.01   0.00   -0.01    0.00   0.00   -0.03  
## [2,] -0.74   0.89   0.96   0.84   0.64   1.91_* -1.35_*  0.43  -2.09_*  2.98_*
## [3,] -0.54   0.28   0.55   0.51  -0.37   0.69   -0.08   -0.24  -0.51   -1.58_*
## [4,]  0.01   0.00   0.00  -0.01  -0.01  -0.02    0.01   -0.01   0.03    0.06  
## [5,] -0.06   0.13   0.42   0.10  -0.22  -0.25    0.04   -0.34   0.48    1.20  
##      cov.r   cook.d hat    
## [1,]  1.83_*  0.00   0.32  
## [2,]  0.23_*  0.79   0.43  
## [3,]  0.32_*  0.24   0.23  
## [4,]  1.80_*  0.00   0.31  
## [5,]  2.17_*  0.16   0.55_*
influencePlot(Modelo1)
leveragePlots(Modelo1)

Test de Chow

library(strucchange)

Modelo para la prueba de Chow y Durbin sin series de tiempo

c <- Datos_T2$GASEXP/Datos_T2$POP # Demanda de gasolina sin series de tiempo #
view(c)

modelo <- lm( c ~ Datos_T2$GASP + Datos_T2$INCOME + Datos_T2$PNC + Datos_T2$PUC + Datos_T2$PPT + Datos_T2$PD+ Datos_T2$PN + Datos_T2$PS, data = Datos_T2)
summary(modelo)
## 
## Call:
## lm(formula = c ~ Datos_T2$GASP + Datos_T2$INCOME + Datos_T2$PNC + 
##     Datos_T2$PUC + Datos_T2$PPT + Datos_T2$PD + Datos_T2$PN + 
##     Datos_T2$PS, data = Datos_T2)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.229e-05 -3.323e-06  2.288e-07  3.615e-06  1.235e-05 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -9.437e-05  1.436e-05  -6.571 5.40e-08 ***
## Datos_T2$GASP    4.812e-06  1.779e-07  27.039  < 2e-16 ***
## Datos_T2$INCOME  7.807e-09  9.574e-10   8.155 2.87e-10 ***
## Datos_T2$PNC     6.769e-07  5.118e-07   1.323   0.1930    
## Datos_T2$PUC     6.059e-07  2.363e-07   2.565   0.0139 *  
## Datos_T2$PPT    -1.539e-07  2.280e-07  -0.675   0.5033    
## Datos_T2$PD     -2.382e-06  5.263e-07  -4.526 4.70e-05 ***
## Datos_T2$PN      1.171e-06  6.083e-07   1.925   0.0609 .  
## Datos_T2$PS     -9.379e-08  3.551e-07  -0.264   0.7930    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.292e-06 on 43 degrees of freedom
## Multiple R-squared:  0.9994, Adjusted R-squared:  0.9993 
## F-statistic:  9009 on 8 and 43 DF,  p-value: < 2.2e-16
dd_fs <- Fstats(c ~ Datos_T2$GASP + Datos_T2$INCOME + Datos_T2$PNC + Datos_T2$PUC + Datos_T2$PPT + Datos_T2$PD+ Datos_T2$PN + Datos_T2$PS, from = 0.1)
## Warning in Fstats(c ~ Datos_T2$GASP + Datos_T2$INCOME + Datos_T2$PNC +
## Datos_T2$PUC + : 'from' changed (was too small)
## Warning in Fstats(c ~ Datos_T2$GASP + Datos_T2$INCOME + Datos_T2$PNC +
## Datos_T2$PUC + : 'to' changed (was too large)
sctest(dd_fs)
## 
##  supF test
## 
## data:  dd_fs
## sup.F = 104.75, p-value < 2.2e-16
strucchange::breakpoints(dd_fs)
## 
##   Optimal 2-segment partition: 
## 
## Call:
## breakpoints.Fstats(obj = dd_fs)
## 
## Breakpoints at observation number:
## 38 
## 
## Corresponding to breakdates:
## 0.7115385
plot(dd_fs)

dd_fs <- Fstats(Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 , from = 0.1)
## Warning in Fstats(Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, from = 0.1): 'from'
## changed (was too small)
## Warning in Fstats(Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, from = 0.1): 'to'
## changed (was too large)
sctest(dd_fs)
## 
##  supF test
## 
## data:  dd_fs
## sup.F = 104.75, p-value < 2.2e-16
strucchange::breakpoints(dd_fs)
## 
##   Optimal 2-segment partition: 
## 
## Call:
## breakpoints.Fstats(obj = dd_fs)
## 
## Breakpoints at observation number:
## 38 
## 
## Corresponding to breakdates:
## 0.7115385
plot(dd_fs)

Prueba de Breusch-Pagan

library(lmtest)
bptest(Modelo1)
## 
##  studentized Breusch-Pagan test
## 
## data:  Modelo1
## BP = 10.325, df = 8, p-value = 0.2429

Multicolinealidad

vif(Modelo1)
##         X1         X2         X3         X4         X5         X6         X7 
##   51.45310   54.80093  656.08640  265.13813  453.14369  682.36374 1595.63523 
##         X8 
## 1028.15186

Punto b

Pruebas formales

Prueba de Durbin Watson

dwtest(modelo)
## 
##  Durbin-Watson test
## 
## data:  modelo
## DW = 1.1197, p-value = 3.744e-06
## alternative hypothesis: true autocorrelation is greater than 0

Prueba de Breusch-Godfrey

bgtest(Modelo1)
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  Modelo1
## LM test = 10.09, df = 1, p-value = 0.001491

Ljung box

Box.test(residuals(Modelo1), type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  residuals(Modelo1)
## X-squared = 10.254, df = 1, p-value = 0.001364

Punto c

Autocorrelación simple

acf(na.omit(Y), lag.max = 10, plot = FALSE)
## 
## Autocorrelations of series 'na.omit(Y)', by lag
## 
##     0     1     2     3     4     5     6     7     8     9    10 
## 1.000 0.911 0.837 0.793 0.734 0.658 0.610 0.577 0.529 0.480 0.437
acf(na.omit(Y), lag.max = 50, plot = TRUE)

Autocorrelación parcial

pacf(na.omit(Y), lag.max = 30, plot = TRUE)

pacf(na.omit(Y), lag.max = 30,plot = FALSE)
## 
## Partial autocorrelations of series 'na.omit(Y)', by lag
## 
##      1      2      3      4      5      6      7      8      9     10     11 
##  0.911  0.046  0.137 -0.079 -0.125  0.089  0.060 -0.043 -0.032 -0.052 -0.045 
##     12     13     14     15     16     17     18     19     20     21     22 
## -0.023 -0.051 -0.038 -0.062 -0.013 -0.019  0.004  0.035 -0.165 -0.084 -0.080 
##     23     24     25     26     27     28     29     30 
## -0.057 -0.084 -0.081 -0.022  0.091  0.077  0.047 -0.041

Corrección, modelos autoregresivos

Modelo AR(1)

ar.ols(
  Y,
  order.max = 1,
  demean = FALSE,
  intercept = TRUE
)
## 
## Call:
## ar.ols(x = Y, order.max = 1, demean = FALSE, intercept = TRUE)
## 
## Coefficients:
##      1  
## 1.0344  
## 
## Intercept: 4.785e-06 (8.379e-06) 
## 
## Order selected 1  sigma^2 estimated as  1.159e-09

Longitud de la base de datos

N <- length(Y)
Demanda_level <- as.numeric(Y[-1])
Demanda_lags <- as.numeric(Y[-N])
armod <- lm(Demanda_level ~ Demanda_lags)

Modelo robusto

coeftest(armod, vcov. = vcovHC, type = "HC1")
## 
## t test of coefficients:
## 
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.7854e-06 5.3502e-06  0.8944   0.3755    
## Demanda_lags 1.0344e+00 3.2395e-02 31.9301   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Modelo AR(2)

Y2 <- dynlm(ts(Demanda_level) ~ L(ts(Demanda_level), 1) + L(ts(Demanda_level), 2))
coeftest(Y2, vcov. = vcovHC, type = "HC4")
## 
## t test of coefficients:
## 
##                            Estimate  Std. Error t value Pr(>|t|)   
## (Intercept)              5.2096e-06  7.7081e-06  0.6759 0.502517   
## L(ts(Demanda_level), 1)  1.2059e+00  3.5606e-01  3.3868 0.001457 **
## L(ts(Demanda_level), 2) -1.8057e-01  3.4780e-01 -0.5192 0.606130   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Modelo AR(4)

Y4 <- dynlm(ts(Demanda_level) ~ L(ts(Demanda_level), 1) + L(ts(Demanda_level), 2) + L(ts(Demanda_level), 3) + L(ts(Demanda_level), 4))
coeftest(Y4, vcov. = vcovHC, type = "HC4")
## 
## t test of coefficients:
## 
##                            Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)              6.2096e-06  6.3918e-06  0.9715   0.33686    
## L(ts(Demanda_level), 1)  1.3360e+00  2.6036e-01  5.1314 6.943e-06 ***
## L(ts(Demanda_level), 2) -8.6759e-01  3.8459e-01 -2.2559   0.02935 *  
## L(ts(Demanda_level), 3)  8.3457e-01  5.4220e-01  1.5392   0.13125    
## L(ts(Demanda_level), 4) -2.7637e-01  3.6304e-01 -0.7613   0.45076    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Punto d

Para la prueba BIC

order <- 1:4
BICs <- sapply(order, function(x){
  "AR" = BIC(dynlm(
    ts(Demanda_level) ~ L(ts(Demanda_level), 1:x)
  ))
}
)
BICs
## [1] -874.1743 -852.9859 -841.6040 -821.7150

Criterio de información de Akaike (AIC)

AICs <- sapply(order, function(x){
  "AR" = AIC(dynlm(
    ts(Demanda_level) ~ L(ts(Demanda_level), 1:x)
  ))
}
)
AICs
## [1] -879.9103 -860.5532 -850.9600 -832.8159

Corrección de autocorrelación

Prueba de Durbin Watson

dwtest(armod)
## 
##  Durbin-Watson test
## 
## data:  armod
## DW = 1.5843, p-value = 0.04872
## alternative hypothesis: true autocorrelation is greater than 0

Prueba de Breusch-Godfrey

bgtest(armod)
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  armod
## LM test = 1.3738, df = 1, p-value = 0.2412

Ljung box

Box.test(residuals(armod), type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  residuals(armod)
## X-squared = 1.2934, df = 1, p-value = 0.2554