TFM - Test Regressions

Load libraries and data

library(ExPanDaR)
library(dplyr)
library(tidyr)
library(plyr)
library(ggplot2)
library(kableExtra)
library(magrittr)
library(plm)
library(panelr)
library(ggplot2)
library(tibble)
library(fastDummies)
#Load the data
dta_wide <- readRDS(url("https://github.com/AndresMtnezGlez/heptaomicron/raw/main/dta_wide.rds"))

User defined functions

plot_res_plm <- function (plm_residuals){
  
  df_res <- as.data.frame(plm_residuals)
  
  df_res %<>%   rownames_to_column( var = "rowname")   %>%
    separate(rowname,  c("id", "year"))    %>% 
    group_by(id, year)
  
  names(df_res)  <- c("id", "year" , "residuals")
  
  df_res %<>%  transform(id = as.numeric(id)) %>% 
    transform(year = as.numeric(year)) %>% 
    transform(residuals = as.numeric(residuals)) %>% 
    group_by(id, year) %>% 
    mutate(dta_wide, cntry = case_when((id == 1) ~ "AUT",
                                       (id ==   2) ~ "BEL",
                                       (id == 3) ~ "CYP",
                                       (id == 4) ~ "FIN",
                                       (id == 5) ~ "FRA",
                                       (id == 6) ~ "DEU",
                                       (id == 7) ~ "GRC",
                                       (id == 8) ~ "IRL",
                                       (id == 9) ~ "ITA",
                                       (id == 10) ~ "ESP",
                                       (id == 11) ~ "NLD",
                                       (id == 12) ~ "PRT"))
  
  
  ggplot(data = df_res, aes(year, residuals)) +
    geom_line(color = "#cb4154", size = 1) +
    geom_hline(yintercept=0, linetype="dashed", color = "red")+
    labs(title = "Regression Residuals",
         subtitle = 'EZ Selected countries',
         y = "Percentaje of GDP", x = "") + 
    facet_wrap(~ cntry,  scales="free") 
  
  return(df_res)

}

plot_pred_plm <- function (dependent, model) {
  
  PDEB <- as.data.frame(dependent)
  names(PDEB) <- "model_PDEB"
  
  df_pred <- as.data.frame(predict(model))
  
  df_pred$year <- df_res$year
  df_pred$id <- df_res$id
  df_pred$PDEB <- PDEB$model_PDEB
  
  names(df_pred)  <- c("predicted", "year" , "id", "model_PDEB")
  
  df_pred %<>%  transform(id = as.numeric(id)) %>% 
    transform(year = as.numeric(year)) %>% 
    transform(predicted = as.numeric(predicted)) %>% 
    group_by(id, year) %>% 
    mutate(dta_wide, cntry = case_when((id == 1) ~ "AUT",
                                       (id ==   2) ~ "BEL",
                                       (id == 3) ~ "CYP",
                                       (id == 4) ~ "FIN",
                                       (id == 5) ~ "FRA",
                                       (id == 6) ~ "DEU",
                                       (id == 7) ~ "GRC",
                                       (id == 8) ~ "IRL",
                                       (id == 9) ~ "ITA",
                                       (id == 10) ~ "ESP",
                                       (id == 11) ~ "NLD",
                                       (id == 12) ~ "PRT"))
  
  
  ggplot(data = df_pred, aes(x=year)) +
    geom_line(aes(y = predicted), color = "#cb4154", size = 1 ) + 
    geom_line(aes(y = model_PDEB), color="#4199cb", size = 1 ) +
    geom_hline(yintercept=60, linetype="dashed", color = "red")+
    labs(title = "Regression Predicted values (red) and Observed (Blue)",
         subtitle = 'EZ Selected countries',
         y = "Percentaje of GDP", x = "") + 
    theme(legend.position="bottom")  +
    facet_wrap(~ cntry,  scales="free") 
  
}

Transform the variables

Plot the variables

#Plot the variables 
ggplot(data = dta_wide, aes(year, PDEB)) +
  geom_line(color = "steelblue", size = 1) +
  geom_hline(yintercept=60, linetype="dashed", color = "red")+
  labs(title = "General government consolidated gross debt (based on ESA 2010)",
       subtitle = 'EZ Selected countries',
       y = "Percentaje of GDP", x = "") + 
  facet_wrap(~ cntry,  scales="free")

ggplot(data = dta_wide, aes(year, g_TFFP)) +
  geom_line(color = "steelblue", size = 1) +
  geom_hline(yintercept=0, linetype="dashed", color = "red")+
  labs(title = "Potential TFP Growth",
       subtitle = 'EZ Selected countries',
       y = "Percentaje of GDP", x = "") + 
  facet_wrap(~ cntry,  scales="free")

ggplot(data = dta_wide, aes(year, g_POPU)) +
  geom_line(color = "steelblue", size = 1) +
  geom_hline(yintercept=0, linetype="dashed", color = "red")+
  labs(title = "Population Growth",
       subtitle = 'EZ Selected countries',
       y = "Growth per 1.000 people", x = "") + 
  facet_wrap(~ cntry,  scales="free")

ggplot(data = dta_wide, aes(year, LTIR)) +
  geom_line(color = "steelblue", size = 1) +
  geom_hline(yintercept=0, linetype="dashed", color = "red")+
  labs(title = "Long Term Interest Rate",
       subtitle = 'EZ Selected countries',
       y = "Percentaje", x = "") + 
  facet_wrap(~ cntry,  scales="free")

ggplot(data = dta_wide, aes(year, MKTO)) +
  geom_line(color = "steelblue", size = 1) +
  geom_hline(yintercept=0, linetype="dashed", color = "red")+
  labs(title = "Market Openess",
       subtitle = 'EZ Selected countries',
       y = "Percentaje of GDP", x = "") + 
  facet_wrap(~ cntry,  scales="free")

Test the regressions

Fixed effects

###Regression
panel_pdta_wide <- pdata.frame(dta_wide, index=c("id", "year"))
Reg_1 <- plm(PDEB~ TFFP + PGDP + POPU + LTIR +  MKTO + STIR, 
                   model = "within",  data=panel_pdta_wide)
summary(Reg_1)

## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = PDEB ~ TFFP + PGDP + POPU + LTIR + MKTO + STIR, 
##     data = panel_pdta_wide, model = "within")
## 
## Unbalanced Panel: n = 12, T = 20-54, N = 334
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -45.8934  -8.9898  -1.1181   8.3899  43.3150 
## 
## Coefficients:
##         Estimate  Std. Error  t-value  Pr(>|t|)    
## TFFP  -0.1011756   0.1226822  -0.8247    0.4102    
## PGDP   0.0557349   0.0088103   6.3261 8.588e-10 ***
## POPU   0.0015801   0.0009680   1.6323    0.1036    
## LTIR   3.6177393   0.4628679   7.8159 8.179e-14 ***
## MKTO 205.1578658  27.6121926   7.4300 1.023e-12 ***
## STIR  -4.2825955   0.3899427 -10.9826 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    175880
## Residual Sum of Squares: 63471
## R-Squared:      0.63913
## Adj. R-Squared: 0.61971
## F-statistic: 93.2757 on 6 and 316 DF, p-value: < 2.22e-16

fixef(Reg_1)

##        1        2        3        4        5        6        7        8 
##   45.126   69.906   76.033   24.888 -125.684 -205.196  114.882   18.077 
##        9       10       11       12 
##  -65.109  -51.536  -11.820   71.954

###Residual Analysis 
df_res <- plot_res_plm(Reg_1$residuals)
plot_pred_plm(Reg_1$model$PDEB,Reg_1 )

Pooling

###Regression
panel_pdta_wide <- pdata.frame(dta_wide, index=c("id", "year"))
Reg_2 <- plm(PDEB~ TFFP + PGDP + POPU + YIDC +  MKTO, 
                   model = "pooling",  data=panel_pdta_wide)
summary(Reg_2)

## Pooling Model
## 
## Call:
## plm(formula = PDEB ~ TFFP + PGDP + POPU + YIDC + MKTO, data = panel_pdta_wide, 
##     model = "pooling")
## 
## Unbalanced Panel: n = 12, T = 20-54, N = 334
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -59.0913 -18.0657  -4.4756  16.6959  89.3256 
## 
## Coefficients:
##                Estimate  Std. Error t-value  Pr(>|t|)    
## (Intercept) -4.0544e+01  1.3884e+01 -2.9202   0.00374 ** 
## TFFP         1.0479e+00  1.4363e-01  7.2955 2.254e-12 ***
## PGDP        -2.3178e-03  7.2142e-03 -0.3213   0.74820    
## POPU         2.1279e-04  2.0425e-04  1.0418   0.29828    
## YIDC         7.1250e+00  6.0380e-01 11.8001 < 2.2e-16 ***
## MKTO        -1.1548e+02  2.3558e+01 -4.9019 1.495e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    365280
## Residual Sum of Squares: 210390
## R-Squared:      0.42404
## Adj. R-Squared: 0.41526
## F-statistic: 48.2962 on 5 and 328 DF, p-value: < 2.22e-16

#fixef(Reg_2)
###Residual Analysis 
df_res <- plot_res_plm(Reg_2$residuals)
plot_pred_plm(Reg_2$model$PDEB,Reg_2 )

Fixed effects, constructing the predicted by hand

panel_pdta_wide <- pdata.frame(dta_wide, index=c("id", "year"))
fe <- plm(PDEB~ TFFP + PGDP + POPU + LTIR +  MKTO + STIR, 
             model = "within",  data=panel_pdta_wide)
summary(fe)

## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = PDEB ~ TFFP + PGDP + POPU + LTIR + MKTO + STIR, 
##     data = panel_pdta_wide, model = "within")
## 
## Unbalanced Panel: n = 12, T = 20-54, N = 334
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -45.8934  -8.9898  -1.1181   8.3899  43.3150 
## 
## Coefficients:
##         Estimate  Std. Error  t-value  Pr(>|t|)    
## TFFP  -0.1011756   0.1226822  -0.8247    0.4102    
## PGDP   0.0557349   0.0088103   6.3261 8.588e-10 ***
## POPU   0.0015801   0.0009680   1.6323    0.1036    
## LTIR   3.6177393   0.4628679   7.8159 8.179e-14 ***
## MKTO 205.1578658  27.6121926   7.4300 1.023e-12 ***
## STIR  -4.2825955   0.3899427 -10.9826 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    175880
## Residual Sum of Squares: 63471
## R-Squared:      0.63913
## Adj. R-Squared: 0.61971
## F-statistic: 93.2757 on 6 and 316 DF, p-value: < 2.22e-16

fixef(fe)

##        1        2        3        4        5        6        7        8 
##   45.126   69.906   76.033   24.888 -125.684 -205.196  114.882   18.077 
##        9       10       11       12 
##  -65.109  -51.536  -11.820   71.954

df_fe <- data.frame(fixef_id = names(fixef(fe)), fixef = as.numeric(fixef(fe)))

temp <- merge(panel_pdta_wide,df_fe, all.x =T, by.x = c("id"), by.y=c("fixef_id"))

fitted_by_hand <- temp$fixef + fe$coefficients[1] * panel_pdta_wide$TFFP +
                              fe$coefficients[2] * panel_pdta_wide$PGDP 

df_fitted_by_hand <- as.data.frame(fitted_by_hand)                            

df_fitted_by_hand %<>%   rownames_to_column( var = "rowname")   %>%
  separate(rowname,  c("id", "year"))    %>% 
  group_by(id, year)

names(df_fitted_by_hand)  <- c("id", "year" , "fitted_by_hand")

df_fitted_by_hand %<>%  transform(id = as.numeric(id)) %>% 
  transform(year = as.numeric(year)) %>% 
  transform(fitted_by_hand = as.numeric(fitted_by_hand)) %>% 
  group_by(id, year) %>% 
  mutate(dta_wide, cntry = case_when((id == 1) ~ "AUT",
                                     (id ==   2) ~ "BEL",
                                     (id == 3) ~ "CYP",
                                     (id == 4) ~ "FIN",
                                     (id == 5) ~ "FRA",
                                     (id == 6) ~ "DEU",
                                     (id == 7) ~ "GRC",
                                     (id == 8) ~ "IRL",
                                     (id == 9) ~ "ITA",
                                     (id == 10) ~ "ESP",
                                     (id == 11) ~ "NLD",
                                     (id == 12) ~ "PRT"))

ggplot(data = df_fitted_by_hand, aes(year, fitted_by_hand)) +
  geom_line(color = "#cb4154", size = 1) +
  geom_hline(yintercept=60, linetype="dashed", color = "red")+
  labs(title = "Fitted by hand",
       subtitle = 'EZ Selected countries',
       y = "Percentaje of GDP", x = "") + 
  facet_wrap(~ cntry,  scales="free")