Load libraries and data
library(ExPanDaR)
library(dplyr)
library(tidyr)
library(plyr)
library(ggplot2)
library(kableExtra)
library(magrittr)
library(plm)
library(panelr)
library(ggplot2)
library(tibble)
library(fastDummies)
library(broom)
library(mFilter)
#Load the data
dta_wide <- readRDS(url("https://github.com/AndresMtnezGlez/heptaomicron/raw/main/dta_wide.rds"))
User defined functions
plot_res_plm <- function (plm_residuals){
df_res <- as.data.frame(plm_residuals)
df_res %<>% rownames_to_column( var = "rowname") %>%
separate(rowname, c("id", "year")) %>%
group_by(id, year)
names(df_res) <- c("id", "year" , "residuals")
df_res %<>% transform(id = as.numeric(id)) %>%
transform(year = as.numeric(year)) %>%
transform(residuals = as.numeric(residuals)) %>%
group_by(id, year) %>%
mutate(dta_wide, cntry = case_when((id == 1) ~ "AUT",
(id == 2) ~ "BEL",
(id == 3) ~ "CYP",
(id == 4) ~ "FIN",
(id == 5) ~ "FRA",
(id == 6) ~ "DEU",
(id == 7) ~ "GRC",
(id == 8) ~ "IRL",
(id == 9) ~ "ITA",
(id == 10) ~ "ESP",
(id == 11) ~ "NLD",
(id == 12) ~ "PRT"))
ggplot(data = df_res, aes(year, residuals)) +
geom_line(color = "#cb4154", size = 1) +
geom_hline(yintercept=0, linetype="dashed", color = "red")+
labs(title = "Regression Residuals",
subtitle = 'EZ Selected countries',
y = "Percentaje of GDP", x = "") +
facet_wrap(~ cntry, scales="free")
return(df_res)
}
plot_pred_plm <- function (dependent, model_object) {
PDEB <- as.data.frame(dependent)
names(PDEB) <- "model_PDEB"
df_pred <- as.data.frame(model_object$model[ , 1] - model_object$residuals)
df_pred$year <- df_res$year
df_pred$id <- df_res$id
df_pred$PDEB <- PDEB$model_PDEB
names(df_pred) <- c("predicted", "year" , "id", "model_PDEB")
df_pred %<>% transform(id = as.numeric(id)) %>%
transform(year = as.numeric(year)) %>%
transform(predicted = as.numeric(predicted)) %>%
group_by(id, year) %>%
mutate(dta_wide, cntry = case_when((id == 1) ~ "AUT",
(id == 2) ~ "BEL",
(id == 3) ~ "CYP",
(id == 4) ~ "FIN",
(id == 5) ~ "FRA",
(id == 6) ~ "DEU",
(id == 7) ~ "GRC",
(id == 8) ~ "IRL",
(id == 9) ~ "ITA",
(id == 10) ~ "ESP",
(id == 11) ~ "NLD",
(id == 12) ~ "PRT"))
ggplot(data = df_pred, aes(x=year)) +
geom_line(aes(y = predicted), color = "#cb4154", size = 1 ) +
geom_line(aes(y = model_PDEB), color="#4199cb", size = 1 ) +
geom_hline(yintercept=60, linetype="dashed", color = "red")+
labs(title = "Regression Predicted values (red) and Observed (Blue)",
subtitle = 'EZ Selected countries',
y = "Percentaje of GDP", x = "") +
theme(legend.position="bottom") +
facet_wrap(~ cntry, scales="free")
}
Transform the variables
HP filter
Variables’ units
First thing we can do is to produce a descriptive statistics table which will help us to understand what are the variables’ units and what transformations should we apply.
t <- prepare_descriptive_table(dta_wide)
t$kable_ret %>% kable_styling("condensed", full_width = F, position = "center")
N | Mean | Std. dev. | Min. | 25 % | Median | 75 % | Max. | |
---|---|---|---|---|---|---|---|---|
year | 732 | 1,990.000 | 17.619 | 1,960.000 | 1,975.000 | 1,990.000 | 2,005.000 | 2,020.000 |
CPIH | 355 | 85.059 | 14.739 | 40.903 | 72.905 | 86.132 | 99.462 | 110.152 |
DGDP | 702 | 61.616 | 34.658 | 6.949 | 25.111 | 66.341 | 94.222 | 119.537 |
DMGT | 701 | 137.596 | 218.756 | 0.159 | 8.456 | 48.412 | 168.434 | 1,454.179 |
DXGT | 701 | 145.801 | 243.550 | 0.126 | 8.052 | 45.089 | 177.303 | 1,668.459 |
LTIR | 594 | 7.257 | 4.178 | 0.090 | 4.310 | 6.565 | 9.278 | 27.740 |
NAWU | 639 | 7.096 | 3.761 | 0.198 | 4.344 | 6.882 | 9.069 | 17.761 |
PDEB | 562 | 66.427 | 34.322 | 6.082 | 42.738 | 60.508 | 91.452 | 181.130 |
PDEF | 353 | 0.616 | 3.518 | -29.231 | -0.998 | 0.738 | 2.394 | 9.570 |
PGDP | 613 | 574.182 | 677.564 | 12.798 | 136.478 | 240.323 | 774.272 | 3,040.828 |
POPU | 732 | 24,618.448 | 25,591.056 | 497.800 | 6,670.152 | 10,360.400 | 47,010.279 | 83,124.069 |
REER | 732 | 0.932 | 0.371 | 0.090 | 0.794 | 1.000 | 1.000 | 2.268 |
STIR | 597 | 6.179 | 5.214 | -0.330 | 2.330 | 4.630 | 9.350 | 24.560 |
TFFP | 697 | 84.662 | 18.839 | 35.186 | 72.432 | 89.531 | 100.000 | 153.084 |
UNEM | 664 | 7.316 | 4.661 | 0.500 | 4.200 | 6.700 | 9.300 | 27.500 |
UVGD | 701 | 403.214 | 647.889 | 0.271 | 25.454 | 140.612 | 391.462 | 3,602.780 |
id | 732 | 6.500 | 3.454 | 1.000 | 3.750 | 6.500 | 9.250 | 12.000 |
CC.EST | 288 | 1.340 | 0.638 | -0.189 | 0.999 | 1.394 | 1.838 | 2.465 |
GE.EST | 288 | 1.397 | 0.489 | 0.198 | 1.080 | 1.503 | 1.765 | 2.261 |
PV.EST | 288 | 0.809 | 0.469 | -0.474 | 0.490 | 0.858 | 1.146 | 1.760 |
RL.EST | 288 | 1.354 | 0.469 | 0.084 | 1.040 | 1.413 | 1.762 | 2.100 |
RQ.EST | 288 | 1.303 | 0.389 | 0.148 | 1.016 | 1.286 | 1.606 | 2.098 |
VA.EST | 288 | 1.268 | 0.212 | 0.619 | 1.085 | 1.316 | 1.416 | 1.784 |
VA.STD.ERR | 288 | 0.166 | 0.029 | 0.118 | 0.142 | 0.154 | 0.195 | 0.214 |
MKTO | 701 | -0.005 | 0.065 | -0.261 | -0.037 | -0.002 | 0.024 | 0.312 |
g_PGDP | 601 | 0.026 | 0.020 | -0.028 | 0.014 | 0.023 | 0.037 | 0.231 |
g_POPU | 720 | 0.005 | 0.008 | -0.122 | 0.002 | 0.005 | 0.008 | 0.039 |
g_TFFP | 685 | 0.014 | 0.023 | -0.087 | 0.002 | 0.011 | 0.025 | 0.221 |
g_DGDP | 690 | 0.041 | 0.047 | -0.131 | 0.014 | 0.031 | 0.059 | 0.268 |
YIDC | 574 | 1.247 | 2.166 | -5.560 | 0.225 | 1.210 | 2.020 | 21.930 |
g_POPU_trend | 720 | 0.005 | 0.005 | -0.020 | 0.003 | 0.005 | 0.007 | 0.020 |
Share of missing values
Plot the variables
#Plot the variables
ggplot(data = dta_wide, aes(year, PDEB)) +
geom_line(color = "steelblue", size = 1) +
geom_hline(yintercept=60, linetype="dashed", color = "red")+
labs(title = "General government consolidated gross debt (based on ESA 2010)",
subtitle = 'EZ Selected countries',
y = "Percentaje of GDP", x = "") +
facet_wrap(~ cntry, scales="free")
ggplot(data = dta_wide, aes(year, g_TFFP)) +
geom_line(color = "steelblue", size = 1) +
geom_hline(yintercept=0, linetype="dashed", color = "red")+
labs(title = "Potential TFP Growth",
subtitle = 'EZ Selected countries',
y = "Percentaje of GDP", x = "") +
facet_wrap(~ cntry, scales="free")
ggplot(data = dta_wide, aes(year, g_POPU)) +
geom_line(color = "steelblue", size = 1) +
geom_hline(yintercept=0, linetype="dashed", color = "red")+
labs(title = "Population Growth",
subtitle = 'EZ Selected countries',
y = "Growth per 1.000 people", x = "") +
facet_wrap(~ cntry, scales="free")
ggplot(data = dta_wide, aes(year, LTIR)) +
geom_line(color = "steelblue", size = 1) +
geom_hline(yintercept=0, linetype="dashed", color = "red")+
labs(title = "Long Term Interest Rate",
subtitle = 'EZ Selected countries',
y = "Percentaje", x = "") +
facet_wrap(~ cntry, scales="free")
ggplot(data = dta_wide, aes(year, MKTO)) +
geom_line(color = "steelblue", size = 1) +
geom_hline(yintercept=0, linetype="dashed", color = "red")+
labs(title = "Market Openess",
subtitle = 'EZ Selected countries',
y = "Percentaje of GDP", x = "") +
facet_wrap(~ cntry, scales="free")
Panel Regressions
Panel data gathers information about several individuals (cross-sectional units) over several periods. The panel is balanced if all units are observed in all periods; if some units are missing in some periods, the panel is unbalanced
Pooling effects
\[ \begin{equation} y_{it}=\beta_{1}+\beta_{2}x_{2it}+...+\beta_{K}x_{Kit}+e_{it} \end{equation} \]
A pooled model has the specification above , which does not allow for intercept or slope differences among individuals. Such a model can be estimated in R using the specification pooling in the plm()
function, as the following code sequence illustrates.
###Regression
panel_pdta_wide <- pdata.frame(dta_wide, index=c("id", "year"))
Reg_1 <- plm(PDEB~ TFFP + PGDP + POPU + LTIR + MKTO + STIR,
model = "pooling", data=panel_pdta_wide)
summary(Reg_1)
## Pooling Model
##
## Call:
## plm(formula = PDEB ~ TFFP + PGDP + POPU + LTIR + MKTO + STIR,
## data = panel_pdta_wide, model = "pooling")
##
## Unbalanced Panel: n = 12, T = 20-54, N = 495
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -51.7934 -20.0490 -6.8985 19.4050 93.1332
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 3.9182e+00 1.1813e+01 0.3317 0.7402702
## TFFP 6.5183e-01 1.1496e-01 5.6700 2.447e-08 ***
## PGDP 2.2428e-02 7.4466e-03 3.0119 0.0027313 **
## POPU -6.1456e-04 2.0267e-04 -3.0323 0.0025563 **
## LTIR 5.5181e+00 6.9665e-01 7.9210 1.606e-14 ***
## MKTO -8.1239e+01 2.3852e+01 -3.4060 0.0007137 ***
## STIR -5.4397e+00 5.7489e-01 -9.4621 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 541900
## Residual Sum of Squares: 392340
## R-Squared: 0.27599
## Adj. R-Squared: 0.26709
## F-statistic: 31.004 on 6 and 488 DF, p-value: < 2.22e-16
###Regression
panel_pdta_wide <- pdata.frame(dta_wide, index=c("id", "year"))
Reg_2_PO <- plm(PDEB~ NAWU + PGDP + POPU + LTIR + MKTO + STIR,
model = "pooling", data=panel_pdta_wide)
summary(Reg_2_PO)
## Pooling Model
##
## Call:
## plm(formula = PDEB ~ NAWU + PGDP + POPU + LTIR + MKTO + STIR,
## data = panel_pdta_wide, model = "pooling")
##
## Unbalanced Panel: n = 12, T = 20-54, N = 495
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -53.9117 -17.5489 -3.2712 16.4145 73.6905
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 38.5966192 3.8498881 10.0254 < 2.2e-16 ***
## NAWU 4.2036940 0.3401259 12.3592 < 2.2e-16 ***
## PGDP 0.0386573 0.0068089 5.6775 2.350e-08 ***
## POPU -0.0010916 0.0001870 -5.8376 9.666e-09 ***
## LTIR 3.6428747 0.6309144 5.7740 1.379e-08 ***
## MKTO -61.7248652 21.4503279 -2.8776 0.004183 **
## STIR -4.3738152 0.5275006 -8.2916 1.094e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 541900
## Residual Sum of Squares: 318490
## R-Squared: 0.41226
## Adj. R-Squared: 0.40504
## F-statistic: 57.0504 on 6 and 488 DF, p-value: < 2.22e-16
###Residual Analysis
df_res <- plot_res_plm(Reg_2_PO$residuals)
plot_pred_plm(Reg_2_PO$model$PDEB,Reg_2_PO )
Fixed Effects
The fixed effects model takes into account individual differences, translated into different intercepts of the regression line for different individuals. The model in this case assigns the subscript \(i\) the constant term \(\beta_1\), as shown in the equation below, the constant terms calculated in this way are called fixed effects.
\[y_{it}=\beta_{1i}+\beta_{2i}x_{2it}+\beta_{3i}x_{3it}+e_{it}\]
to estimate a fixed effects in RR we do not need to create the dummy variables, but use the option model="within"
in the plm()
function
###Regression
panel_pdta_wide <- pdata.frame(dta_wide, index=c("id", "year"))
Reg_2_FE <- plm(PDEB~ NAWU + PGDP + POPU + YIDC + MKTO,
model = "within", data=panel_pdta_wide)
summary(Reg_2_FE)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = PDEB ~ NAWU + PGDP + POPU + YIDC + MKTO, data = panel_pdta_wide,
## model = "within")
##
## Unbalanced Panel: n = 12, T = 20-54, N = 495
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -34.69486 -7.32246 -0.62059 7.24267 42.98565
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## NAWU 7.22364002 0.25292831 28.5600 < 2.2e-16 ***
## PGDP 0.06128924 0.00594318 10.3125 < 2.2e-16 ***
## POPU -0.00180576 0.00065019 -2.7773 0.005697 **
## YIDC 1.91605587 0.27047373 7.0841 5.04e-12 ***
## MKTO 38.36663022 12.84950181 2.9858 0.002973 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 337580
## Residual Sum of Squares: 69559
## R-Squared: 0.79395
## Adj. R-Squared: 0.78705
## F-statistic: 368.366 on 5 and 478 DF, p-value: < 2.22e-16
## 1 2 3 4 5 6 7 8
## 28.7398 45.9096 15.6784 -16.2944 6.6250 2.9953 40.0745 -9.1350
## 9 10 11 12
## 52.6869 -28.7517 11.2507 23.1186
###Residual Analysis
df_res <- plot_res_plm(Reg_2_FE$residuals)
plot_pred_plm(Reg_2_FE$model$PDEB,Reg_2_FE )
Testing whether pooled or fixied.
Testing if fixed effects are necessary is to compare the fixed effects model wage.within
with the pooled model wage.pooled
. The function pFtest()
does this comparison, as in the following code lines
##
## F test for individual effects
##
## data: PDEB ~ NAWU + PGDP + POPU + YIDC + MKTO
## F = 171.06, df1 = 10, df2 = 478, p-value < 2.2e-16
## alternative hypothesis: significant effects
## [1] "Fixed effects test: Ho:'No fixed effects'"
Fixed effects, constructing the predicted by hand
panel_pdta_wide <- pdata.frame(dta_wide, index=c("id", "year"))
# panel_pdta_wide %<>% transform(., l_PDEF=lag(PDEF,1))
fe <- plm(PDEB~ PGDP + TFFP + POPU + STIR + YIDC + PDEF + MKTO ,
model = "within", data=panel_pdta_wide)
#fixef(fe)
summary(fe)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = PDEB ~ PGDP + TFFP + POPU + STIR + YIDC + PDEF +
## MKTO, data = panel_pdta_wide, model = "within")
##
## Unbalanced Panel: n = 12, T = 20-44, N = 325
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -32.7941 -7.6867 -1.0566 7.8090 39.2740
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## PGDP 4.6165e-04 8.9951e-03 0.0513 0.9591026
## TFFP -5.5131e-01 1.2161e-01 -4.5333 8.343e-06 ***
## POPU 3.0277e-03 8.1456e-04 3.7170 0.0002396 ***
## STIR -2.8431e+00 3.8480e-01 -7.3884 1.424e-12 ***
## YIDC 2.0416e+00 4.6673e-01 4.3742 1.673e-05 ***
## PDEF -1.5668e-01 2.3543e-01 -0.6655 0.5062315
## MKTO 2.4335e+02 2.5052e+01 9.7138 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 145290
## Residual Sum of Squares: 47335
## R-Squared: 0.6742
## Adj. R-Squared: 0.65503
## F-statistic: 90.4608 on 7 and 306 DF, p-value: < 2.22e-16
panel_pdta_wide <- pdata.frame(dta_wide, index=c("id", "year"))
# panel_pdta_wide %<>% transform(., l_PDEF=lag(PDEF,1))
fe <- plm(PDEB~ PGDP + TFFP + POPU + NAWU + STIR + YIDC + PDEF + MKTO ,
model = "within", data=panel_pdta_wide)
#fixef(fe)
summary(fe)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = PDEB ~ PGDP + TFFP + POPU + NAWU + STIR + YIDC +
## PDEF + MKTO, data = panel_pdta_wide, model = "within")
##
## Unbalanced Panel: n = 12, T = 20-44, N = 325
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -25.79088 -5.85854 -0.16457 6.22232 27.28377
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## PGDP 2.9890e-02 7.4965e-03 3.9871 8.376e-05 ***
## TFFP -5.2378e-01 9.6875e-02 -5.4067 1.298e-07 ***
## POPU -2.4799e-05 6.8799e-04 -0.0360 0.971269
## NAWU 5.2137e+00 3.9137e-01 13.3216 < 2.2e-16 ***
## STIR -2.0842e+00 3.1170e-01 -6.6864 1.090e-10 ***
## YIDC 1.1681e+00 3.7744e-01 3.0947 0.002152 **
## PDEF 4.5388e-01 1.9301e-01 2.3516 0.019331 *
## MKTO 1.6774e+02 2.0742e+01 8.0870 1.455e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 145290
## Residual Sum of Squares: 29924
## R-Squared: 0.79404
## Adj. R-Squared: 0.78121
## F-statistic: 146.983 on 8 and 305 DF, p-value: < 2.22e-16
df_fe <- data.frame(fixef_id = names(fixef(fe)), fixef = as.numeric(fixef(fe)))
temp <- merge(panel_pdta_wide,df_fe, all.x =T, by.x = c("id"), by.y=c("fixef_id"))
fitted_by_hand <- temp$fixef + fe$coefficients[1] * panel_pdta_wide$PGDP +
fe$coefficients[2] * panel_pdta_wide$TFFP +
fe$coefficients[4] * panel_pdta_wide$NAWU
df_fitted_by_hand <- as.data.frame(fitted_by_hand)
df_fitted_by_hand %<>% rownames_to_column( var = "rowname") %>%
separate(rowname, c("id", "year")) %>%
group_by(id, year)
names(df_fitted_by_hand) <- c("id", "year" , "fitted_by_hand")
# PDEB <- as.data.frame(fe$model$PDEB)
# names(PDEB) <- "model_PDEB"
df_fitted_by_hand$PDEB <- dta_wide$PDEB
df_fitted_by_hand$LTIR <- dta_wide$LTIR
df_fitted_by_hand$UVGD <- dta_wide$UVGD
df_fitted_by_hand$DGDP <- dta_wide$DGDP
df_fitted_by_hand %<>% transform(id = as.numeric(id)) %>%
transform(year = as.numeric(year)) %>%
transform(fitted_by_hand = as.numeric(fitted_by_hand)) %>%
group_by(id, year) %>%
mutate(dta_wide, cntry = case_when((id == 1) ~ "AUT",
(id == 2) ~ "BEL",
(id == 3) ~ "CYP",
(id == 4) ~ "FIN",
(id == 5) ~ "FRA",
(id == 6) ~ "DEU",
(id == 7) ~ "GRC",
(id == 8) ~ "IRL",
(id == 9) ~ "ITA",
(id == 10) ~ "ESP",
(id == 11) ~ "NLD",
(id == 12) ~ "PRT"))
ggplot(data = df_fitted_by_hand, aes(year)) +
geom_line(aes(y = fitted_by_hand), color = "#cb4154", size = 1) +
geom_line(aes(y = PDEB), color="#4199cb", size = 1 ) +
geom_hline(yintercept=60, linetype="dashed", color = "red")+
labs(title = "Fitted by hand (red) and observed values(blue)",
subtitle = 'EZ Selected countries',
y = "Percentaje of GDP", x = "") +
facet_wrap(~ cntry)
df_var <- df_fitted_by_hand %>% mutate(deviation = PDEB - fitted_by_hand ) %>%
mutate(real_GDP = UVGD/DGDP) %>%
mutate(real_LTIR = LTIR/DGDP)
df_var %<>% ddply(.,"id",transform,
g_real_GDP=c(NA,exp(diff(log(real_GDP)))-1))
df_var %<>% group_by(cntry) %>%
filter(cntry == "ITA") %>%
ungroup()
df_var %<>% select(deviation, real_GDP, real_LTIR)
df_var %<>% drop_na()
library(svars)
#Run the VAR
var_m <- VAR(df_var, p = 3, type = "const", season = NULL, exog = NULL)
#Identification via Cholesky
svar_m <- id.chol(var_m)
#Plot the IRFs
irf.svar_m <- irf(svar_m, n.ahead = 10)
plot(irf.svar_m, scales = 'free_y')+
ggtitle("IRF's from SVAR(3) NLD")