if(!require(pacman)) install.packages(pacman)
p_load(tibble,
       dplyr,
       magrittr,
       purrr,
       DBI,
       RPostgres,
       RSQLite,
       dbplyr,
       rstudioapi,
       readr,
       readxl,
       tidyr,
       readr,
       ggplot2,
       wordcloud,
       tidyverse,
       tibble,
       RSQlite,
       dbplyr,
       ggbeeswarm,
       gapminder,
       kableExtra,
       broom,
       GGally,
       modelr,
       ggstance,
       )


rm(list=ls())

Regla de Taylor

El modelo describe el nivel nominal de tasa de interes que el banco central fija como funcion del equilibrio real entre tasa de interes e inflación, considerando el nivel de producción:

\(\begin{aligned}it=r∗+πt+0.5(πt−π∗)+0.5gt\end{aligned}\)

Tomando la tasa de interes al momento t, r∗ es el equilibrio real de la tasa de interes, πt es una medida de inflacion, π∗ es el target de inflacion y GT el gap entre produccion potencial y real.

Econometric Methods with Applications in Business and Economics 1st Edition by Christiaan Heij (Author), Paul de Boer (Author), Philip Hans Franses (Author), Teun Kloek (Author), Herman K. van Dijk (Author)

Para simplificar la formula. Vamos a sacar r* y π∗, vamos a buscar una regresión lineal multiple en donde el intercepto es el efecto de estas dos variable (así como de otros desvios de la media) por otro lado vamos a considerar la produccion como Real Gross Domestic Product y no un gap

\(\begin{aligned}it=β1+β2πt+β3yt+ϵt(1)\end{aligned}\)

INTRATEA <- read_csv("INTRATE.csv")
GDPA <- read_csv("GDP.csv")
INFLA <- read_csv("INFL.csv")
PCEA <- read_csv("PCE.csv")
COMMPRIA <- read_csv("COMMPRI.csv")
glimpse(INTRATEA)
## Observations: 21,980
## Variables: 2
## $ DATE <date> 1960-01-01, 1960-01-02, 1960-01-03, 1960-01-04, 1960-01-05, 1...
## $ DFF  <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,...
glimpse(GDPA)
## Observations: 292
## Variables: 2
## $ DATE <date> 1947-01-01, 1947-04-01, 1947-07-01, 1947-10-01, 1948-01-01, 1...
## $ GDP  <dbl> 243.164, 245.968, 249.585, 259.745, 265.742, 272.567, 279.196,...
data2 <- INTRATEA %>% inner_join(GDPA, by = c("DATE" = "DATE"))
data3 <- data2 %>% inner_join(INFLA, by = c("DATE" = "DATE"))
data4 <- data3 %>% inner_join(PCEA, by = c("DATE" = "DATE"))
df <- data4 %>% inner_join(COMMPRIA, by = c("DATE" = "DATE"))
glimpse(df)
## Observations: 60
## Variables: 6
## $ DATE           <date> 1960-01-01, 1961-01-01, 1962-01-01, 1963-01-01, 196...
## $ DFF            <dbl> 4.00, 3.00, 2.50, 3.00, 3.25, 4.00, 4.63, 5.00, 4.50...
## $ GDP            <dbl> 542.648, 545.018, 594.013, 621.672, 669.822, 717.790...
## $ FPCPITOTLZGUSA <dbl> 1.457976, 1.070724, 1.198773, 1.239669, 1.278912, 1....
## $ PCE            <dbl> 323.6, 332.2, 353.2, 374.4, 396.8, 424.4, 465.2, 494...
## $ PPIACO         <dbl> 31.6, 31.8, 31.7, 31.6, 31.8, 31.8, 32.9, 33.4, 33.8...
df %<>% rename("INFL" ="FPCPITOTLZGUSA", "INTRATE" ="DFF")
summary(df)
##       DATE               INTRATE            GDP               INFL        
##  Min.   :1960-01-01   Min.   : 0.040   Min.   :  542.6   Min.   :-0.3555  
##  1st Qu.:1974-10-01   1st Qu.: 2.475   1st Qu.: 1584.9   1st Qu.: 1.8766  
##  Median :1989-07-02   Median : 4.080   Median : 5692.0   Median : 2.9834  
##  Mean   :1989-07-02   Mean   : 4.997   Mean   : 7316.5   Mean   : 3.7208  
##  3rd Qu.:2004-04-01   3rd Qu.: 5.945   3rd Qu.:12130.5   3rd Qu.: 4.2947  
##  Max.   :2019-01-01   Max.   :22.000   Max.   :21098.8   Max.   :13.5492  
##       PCE              PPIACO     
##  Min.   :  323.6   Min.   : 31.6  
##  1st Qu.:  952.8   1st Qu.: 55.3  
##  Median : 3607.2   Median :112.7  
##  Mean   : 4826.2   Mean   :108.6  
##  3rd Qu.: 8108.1   3rd Qu.:143.8  
##  Max.   :14227.6   Max.   :203.8
sum(is.na(df))
## [1] 0

Regression multiple


\(\begin{aligned}Interes Nominal = \beta_{0}+\beta_{1}.GDP+\beta_{2}.INFL+\beta_{3}.PCE\\+\beta_{4}.PCOMM\end{aligned}\)

fit1 = lm(INTRATE ~GDP+INFL+PCE+PPIACO, data = df)
summary(fit1)
## 
## Call:
## lm(formula = INTRATE ~ GDP + INFL + PCE + PPIACO, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1266 -1.6577 -0.2489  1.1809  9.2988 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.744241   1.194219  -0.623   0.5357    
## GDP          0.007218   0.003224   2.239   0.0292 *  
## INFL         0.609066   0.142826   4.264 7.94e-05 ***
## PCE         -0.011797   0.004508  -2.617   0.0114 *  
## PPIACO       0.069971   0.027724   2.524   0.0145 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.631 on 55 degrees of freedom
## Multiple R-squared:  0.6573, Adjusted R-squared:  0.6324 
## F-statistic: 26.38 on 4 and 55 DF,  p-value: 3.085e-12
anova(fit1)
## Analysis of Variance Table
## 
## Response: INTRATE
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## GDP        1 270.67 270.673 39.1111 6.240e-08 ***
## INFL       1 245.29 245.286 35.4429 1.913e-07 ***
## PCE        1 170.14 170.142 24.5849 7.183e-06 ***
## PPIACO     1  44.08  44.084  6.3699   0.01452 *  
## Residuals 55 380.63   6.921                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Version para markdown

if(!require(pacman)) install.packages(pacman)
p_load(jtools)

summ(fit1) #Jtolls library, una manera de presentarlo mejor
Observations 60
Dependent variable INTRATE
Type OLS linear regression
F(4,55) 26.38
0.66
Adj. R² 0.63
Est. S.E. t val. p
(Intercept) -0.74 1.19 -0.62 0.54
GDP 0.01 0.00 2.24 0.03
INFL 0.61 0.14 4.26 0.00
PCE -0.01 0.00 -2.62 0.01
PPIACO 0.07 0.03 2.52 0.01
Standard errors: OLS

Residuos

pro_res <- add_residuals(df, fit1, "lresid2") #calculo los residuos entre el DF original y mi regresion multiple fit1

Extraigo las muestras de mayor residuo.

¿Tienen algo particular?

pro_res %>%
  filter(abs(lresid2) > 1) %>%
  add_predictions(fit1) %>%
  mutate(pred = round(pred))  %>%
  arrange(-lresid2)
## # A tibble: 38 x 8
##    DATE       INTRATE    GDP  INFL    PCE PPIACO lresid2  pred
##    <date>       <dbl>  <dbl> <dbl>  <dbl>  <dbl>   <dbl> <dbl>
##  1 1981-01-01   22     3124. 10.3   1870    95.2    9.30    13
##  2 1986-01-01   13.5   4508.  1.90  2827.  103.     6.64     7
##  3 1987-01-01   14.4   4722.  3.66  2936.  100.     6.38     8
##  4 2019-01-01    2.4  21099.  1.81 14228.  199.     3.66    -1
##  5 1983-01-01   11.2   3473.  3.21  2174   100.     3.55     8
##  6 1982-01-01   13.1   3274.  6.13  1997.   99.7    3.09    10
##  7 2018-01-01    1.33 20163.  2.44 13698.  198.     2.80    -1
##  8 2017-01-01    0.55 19190.  2.13 13079.  191.     2.44    -2
##  9 2007-01-01    5.17 14209.  2.85  9516.  164      2.41     3
## 10 2016-01-01    0.2  18424.  1.26 12480.  183.     1.65    -1
## # ... with 28 more rows
#MODELO Numero 2, otras variables PIN
fit2 = lm(INTRATE ~INFL + PCE + GDP, data = df)

Grafico de Residuos

df %>%
  gather_residuals(fit1, fit2) %>%
  ggplot(aes(x = DATE, y = resid, color = model)) +
  geom_line(alpha = 0.75)

Tidy Model

[..] you start with the most general model, including as many variables as are at hand. Then, check whether one or more variables can be removed from the model. This can be based on individual t-tests, or a joint F-test in case of multiple variables. In case you remove one variable at a time, the variable with the lowest absolute t-value is removed from the model. The model is estimated again without that variable, and the procedure is repeated. The procedure continues until all remaining variables are significant.


\(\begin{aligned}Interes Nominal = \beta_{0}+\beta_{1}.GDP+\beta_{2}.INFL+\beta_{3}.PCE\\+\beta_{4}.PCOMM\end{aligned}\)

Varias forumlas de general a especifico

# Fórmula de los modelos
lm_formulas = formulas(.response = ~INTRATE,
                         GDP1 = ~GDP, #formula solo con GDP
                         TAYLOR = ~GDP+ INFL, #inflacion y variable hdp
                         PIN = ~INFL + PCE + GDP, #agregando consumo
                         total = ~GDP+INFL+PCE+PPIACO, # el modelo mas general
                          )

Mapeo de formulas

modelos <- data_frame(lm_formulas) %>% # dataframe a partir del objeto formulas
  mutate(modelos = names(lm_formulas), # columna con los nombres de las formulas
         expression = paste(lm_formulas), # columna con las expresiones de las formulas
         mod = map(lm_formulas, ~ lm(.,data = df))) #Mapeo de las formulas definidas de modelos
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
modelos$mod
## $GDP1
## 
## Call:
## lm(formula = ., data = df)
## 
## Coefficients:
## (Intercept)          GDP  
##   7.5176224   -0.0003446  
## 
## 
## $TAYLOR
## 
## Call:
## lm(formula = ., data = df)
## 
## Coefficients:
## (Intercept)          GDP         INFL  
##   3.3227944   -0.0001858    0.8153039  
## 
## 
## $PIN
## 
## Call:
## lm(formula = ., data = df)
## 
## Coefficients:
## (Intercept)         INFL          PCE          GDP  
##     0.86366      0.68850     -0.01832      0.01230  
## 
## 
## $total
## 
## Call:
## lm(formula = ., data = df)
## 
## Coefficients:
## (Intercept)          GDP         INFL          PCE       PPIACO  
##   -0.744241     0.007218     0.609066    -0.011797     0.069971

Coeficientes de cada modelo

Tidy: map(mod,tidy)

modelos <- modelos %>% 
  mutate(tidy = map(mod,tidy)) #Mapeo los modelos definidos arriba y los estoy agregando como data frame de data frame

modelos
## # A tibble: 4 x 5
##   lm_formulas  modelos expression                      mod        tidy          
##   <named list> <chr>   <chr>                           <named li> <named list>  
## 1 <formula>    GDP1    INTRATE ~ GDP                   <lm>       <tibble [2 x ~
## 2 <formula>    TAYLOR  INTRATE ~ GDP + INFL            <lm>       <tibble [3 x ~
## 3 <formula>    PIN     INTRATE ~ INFL + PCE + GDP      <lm>       <tibble [4 x ~
## 4 <formula>    total   INTRATE ~ GDP + INFL + PCE + P~ <lm>       <tibble [5 x ~

A esta salida se le llama hacer un nest Ver una columna, es ver un data frame. En este caso la columna tidy es la columna del mapeo de las formulas definidas en funcion de una lm

mod = map(lm_formulas, ~ lm(.,data = df))

modelos$tidy
## $GDP1
## # A tibble: 2 x 5
##   term         estimate std.error statistic  p.value
##   <chr>           <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)  7.52     0.763          9.86 5.26e-14
## 2 GDP         -0.000345 0.0000797     -4.32 6.13e- 5
## 
## $TAYLOR
## # A tibble: 3 x 5
##   term         estimate std.error statistic   p.value
##   <chr>           <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)  3.32     1.08           3.08 0.00323  
## 2 GDP         -0.000186 0.0000752     -2.47 0.0164   
## 3 INFL         0.815    0.168          4.85 0.0000100
## 
## $PIN
## # A tibble: 4 x 5
##   term        estimate std.error statistic   p.value
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)   0.864    1.06        0.817 0.418    
## 2 INFL          0.689    0.146       4.72  0.0000161
## 3 PCE          -0.0183   0.00387    -4.74  0.0000153
## 4 GDP           0.0123   0.00264     4.66  0.0000196
## 
## $total
## # A tibble: 5 x 5
##   term        estimate std.error statistic   p.value
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept) -0.744     1.19       -0.623 0.536    
## 2 GDP          0.00722   0.00322     2.24  0.0292   
## 3 INFL         0.609     0.143       4.26  0.0000794
## 4 PCE         -0.0118    0.00451    -2.62  0.0114   
## 5 PPIACO       0.0700    0.0277      2.52  0.0145

Para poder observar los valores, realizamos el Unnest. Donde podemos ver todos los modelos en este caso en funcion de los coeficientes y su pvalor.

modelos %>%  
  unnest(tidy, .drop = TRUE) %>% 
  # columns of
  select(term, estimate, p.value)%>%
  # Chequeo la significatividad de los coeficientes
  mutate(is_sign = p.value <= 0.05) 
## Warning: The `.drop` argument of `unnest()` is deprecated as of tidyr 1.0.0.
## All list-columns are now preserved.
## This warning is displayed once per session.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## # A tibble: 14 x 4
##    term         estimate  p.value is_sign
##    <chr>           <dbl>    <dbl> <lgl>  
##  1 (Intercept)  7.52     5.26e-14 TRUE   
##  2 GDP         -0.000345 6.13e- 5 TRUE   
##  3 (Intercept)  3.32     3.23e- 3 TRUE   
##  4 GDP         -0.000186 1.64e- 2 TRUE   
##  5 INFL         0.815    1.00e- 5 TRUE   
##  6 (Intercept)  0.864    4.18e- 1 FALSE  
##  7 INFL         0.689    1.61e- 5 TRUE   
##  8 PCE         -0.0183   1.53e- 5 TRUE   
##  9 GDP          0.0123   1.96e- 5 TRUE   
## 10 (Intercept) -0.744    5.36e- 1 FALSE  
## 11 GDP          0.00722  2.92e- 2 TRUE   
## 12 INFL         0.609    7.94e- 5 TRUE   
## 13 PCE         -0.0118   1.14e- 2 TRUE   
## 14 PPIACO       0.0700   1.45e- 2 TRUE

Mapeo con glance.

Glance posee todas las metricas que puede tener nuestro modelo. Acá selecciono un caso particular, en el punto siguiente desarrollo el modo.

glance(fit1) %>% select(r.squared, sigma, AIC)
## # A tibble: 1 x 3
##   r.squared sigma   AIC
##       <dbl> <dbl> <dbl>
## 1     0.657  2.63  293.

Defino entonces, que va a ser mapeado con glance, el cual tiene todas las metricas posibles de nuestro modelo

# Calculo medidas de evaluación para cada modelo con glance
modelos <- modelos %>% mutate(glance = map(mod,glance))
modelos$glance
## $GDP1
## # A tibble: 1 x 11
##   r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>
## 1     0.244         0.231  3.81      18.7 6.13e-5     2  -164.  335.  341.
## # ... with 2 more variables: deviance <dbl>, df.residual <int>
## 
## $TAYLOR
## # A tibble: 1 x 11
##   r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>
## 1     0.464         0.446  3.23      24.7 1.86e-8     3  -154.  316.  324.
## # ... with 2 more variables: deviance <dbl>, df.residual <int>
## 
## $PIN
## # A tibble: 1 x 11
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <int>  <dbl> <dbl> <dbl>
## 1     0.618         0.597  2.75      30.2 9.78e-12     4  -144.  298.  308.
## # ... with 2 more variables: deviance <dbl>, df.residual <int>
## 
## $total
## # A tibble: 1 x 11
##   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <int>  <dbl> <dbl> <dbl>
## 1     0.657         0.632  2.63      26.4 3.09e-12     5  -141.  293.  306.
## # ... with 2 more variables: deviance <dbl>, df.residual <int>

Ordeno para presentar mi test de selección de modelo en función de R2.

# ordeno por R
modelos %>% 
  unnest(glance, .drop = TRUE) %>% # desanido las medidas de evaluación
  mutate(r.squared) %>% # Calculo R
  select(c(modelos, r.squared,adj.r.squared)) %>% 
  arrange(-r.squared)
## # A tibble: 4 x 3
##   modelos r.squared adj.r.squared
##   <chr>       <dbl>         <dbl>
## 1 total       0.657         0.632
## 2 PIN         0.618         0.597
## 3 TAYLOR      0.464         0.446
## 4 GDP1        0.244         0.231