Đọc dữ liệu
library(haven)
# Đọc tệp .dta
data <- read_dta("/Users/thuphan/Downloads/covid2024 (1).dta")
# Xem dữ liệu
head(data)
## # A tibble: 6 × 29
## Năm MãCK Tên SAN YEAR NGANH SIZE LN VCSH ETA ROA
## <dbl> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2011 ACB Ngân hàng … HoSE 1993 Trun… 2.81e14 3.21e12 1.20e13 0.0426 1.14
## 2 2012 ACB Ngân hàng … HoSE 1993 Trun… 1.76e14 7.84e11 1.26e13 0.0716 0.445
## 3 2013 ACB Ngân hàng … HoSE 1993 Trun… 1.67e14 8.26e11 1.25e13 0.0751 0.496
## 4 2014 ACB Ngân hàng … HoSE 1993 Trun… 1.80e14 9.52e11 1.24e13 0.0690 0.530
## 5 2015 ACB Ngân hàng … HoSE 1993 Trun… 2.01e14 1.03e12 1.28e13 0.0635 0.510
## 6 2016 ACB Ngân hàng … HoSE 1993 Trun… 2.34e14 1.33e12 1.41e13 0.0602 0.567
## # ℹ 18 more variables: NETOP <dbl>, NCV <dbl>, LTA <dbl>, NET <dbl>, cp <dbl>,
## # lnt <dbl>, NON <dbl>, HHI <dbl>, COVID <dbl>, TAM <dbl>, SD_ROA <dbl>,
## # SH <dbl>, CTY <dbl+lbl>, Zcore <dbl>, LnSIZE <dbl>, `_est_FEM` <dbl>,
## # `_est_REM` <dbl>, NAM <dbl>
congthuc <- Zcore ~ HHI + LTA + LnSIZE + ETA + COVID
hoiquy <- lm( congthuc, data)
summary(hoiquy)
##
## Call:
## lm(formula = congthuc, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5838 -0.8480 -0.1820 0.8008 3.9951
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -21.176734 3.594973 -5.891 1.29e-08 ***
## HHI 0.001594 0.077229 0.021 0.984
## LTA 0.302374 0.355542 0.850 0.396
## LnSIZE 0.684068 0.108669 6.295 1.44e-09 ***
## ETA 2.195816 3.030767 0.725 0.469
## COVID 0.337578 0.221091 1.527 0.128
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.394 on 241 degrees of freedom
## Multiple R-squared: 0.2623, Adjusted R-squared: 0.2469
## F-statistic: 17.13 on 5 and 241 DF, p-value: 1.7e-14
Xử lý GLS
library(plm)
library(sandwich)
data_p <- pdata.frame(data, index = c("CTY", "NAM"))
# Hồi quy GLS
hoiquy_gls <- plm(congthuc, data = data_p, model = "random", effect = "twoways", method = "gls")
# Tóm tắt kết quả
summary(hoiquy_gls)
## Twoways effects Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = congthuc, data = data_p, effect = "twoways", model = "random",
## method = "gls")
##
## Balanced Panel: n = 19, T = 13, N = 247
##
## Effects:
## var std.dev share
## idiosyncratic 0.57592 0.75890 0.347
## individual 1.04974 1.02457 0.633
## time 0.03284 0.18123 0.020
## theta: 0.7988 (id) 0.3072 (time) 0.3029 (total)
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -2.60806 -0.47517 -0.10399 0.44031 2.75844
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) -21.657273 4.805564 -4.5067 6.584e-06 ***
## HHI 0.032836 0.044905 0.7312 0.4646
## LTA -0.348185 0.267933 -1.2995 0.1938
## LnSIZE 0.690012 0.145296 4.7490 2.044e-06 ***
## ETA 9.995331 2.342125 4.2676 1.976e-05 ***
## COVID 0.255550 0.208335 1.2266 0.2200
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 210.57
## Residual Sum of Squares: 143.15
## R-Squared: 0.32017
## Adj. R-Squared: 0.30606
## Chisq: 113.499 on 5 DF, p-value: < 2.22e-16
robust_se <- vcovHC(hoiquy_gls, type = "HC1")
#robust_se_hac <- vcovHAC(hoiquy_gls)
#summary(robust_se)
robust_se
## (Intercept) HHI LTA LnSIZE ETA
## (Intercept) 22.97496536 0.0311427090 0.2538856534 -0.700675525 -4.710038340
## HHI 0.03114271 0.0002982378 0.0007179254 -0.000936603 -0.008044691
## LTA 0.25388565 0.0007179254 0.1101088210 -0.007308684 -0.989311360
## LnSIZE -0.70067553 -0.0009366030 -0.0073086840 0.021442148 0.140517362
## ETA -4.71003834 -0.0080446905 -0.9893113605 0.140517362 9.293981685
## COVID 0.38827698 -0.0011739313 0.0339495623 -0.012187445 -0.361141794
## COVID
## (Intercept) 0.388276983
## HHI -0.001173931
## LTA 0.033949562
## LnSIZE -0.012187445
## ETA -0.361141794
## COVID 0.057403243
## attr(,"cluster")
## [1] "group"
#robust_se_hac
Tóm tắt
# Hồi quy OLS
ols <- plm(congthuc, data=data_p, model ="pooling")
summary(ols)
## Pooling Model
##
## Call:
## plm(formula = congthuc, data = data_p, model = "pooling")
##
## Balanced Panel: n = 19, T = 13, N = 247
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -3.58380 -0.84799 -0.18201 0.80076 3.99508
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -21.1767339 3.5949730 -5.8907 1.286e-08 ***
## HHI 0.0015936 0.0772291 0.0206 0.9836
## LTA 0.3023742 0.3555419 0.8505 0.3959
## LnSIZE 0.6840684 0.1086693 6.2950 1.442e-09 ***
## ETA 2.1958164 3.0307669 0.7245 0.4695
## COVID 0.3375776 0.2210912 1.5269 0.1281
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 635.13
## Residual Sum of Squares: 468.56
## R-Squared: 0.26226
## Adj. R-Squared: 0.24695
## F-statistic: 17.1342 on 5 and 241 DF, p-value: 1.6996e-14
# Hồi quy REM
rem <- plm(congthuc, data=data_p, model ="random")
summary(rem)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = congthuc, data = data_p, model = "random")
##
## Balanced Panel: n = 19, T = 13, N = 247
##
## Effects:
## var std.dev share
## idiosyncratic 0.6087 0.7802 0.368
## individual 1.0472 1.0233 0.632
## theta: 0.7931
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -2.44912 -0.49935 -0.13697 0.45407 2.97464
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) -21.294193 4.262883 -4.9953 5.876e-07 ***
## HHI 0.028293 0.045479 0.6221 0.53387
## LTA -0.476393 0.265566 -1.7939 0.07283 .
## LnSIZE 0.678535 0.128749 5.2702 1.363e-07 ***
## ETA 11.013675 2.340682 4.7053 2.535e-06 ***
## COVID 0.268548 0.165955 1.6182 0.10562
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 254.51
## Residual Sum of Squares: 151.05
## R-Squared: 0.40651
## Adj. R-Squared: 0.3942
## Chisq: 165.073 on 5 DF, p-value: < 2.22e-16
# Hồi quy FEM
fem <- plm(congthuc, data=data_p, model ="within")
summary(fem)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = congthuc, data = data_p, model = "within")
##
## Balanced Panel: n = 19, T = 13, N = 247
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -2.385927 -0.491820 -0.057323 0.441532 2.639793
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## HHI 0.029251 0.044895 0.6515 0.51536
## LTA -0.542136 0.266471 -2.0345 0.04308 *
## LnSIZE 0.665543 0.138361 4.8102 2.776e-06 ***
## ETA 11.628405 2.363182 4.9207 1.675e-06 ***
## COVID 0.277678 0.171922 1.6151 0.10769
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 237.5
## Residual Sum of Squares: 135.74
## R-Squared: 0.42847
## Adj. R-Squared: 0.36952
## F-statistic: 33.4358 on 5 and 223 DF, p-value: < 2.22e-16
# hồi quy FGLS
fgls <- pggls(congthuc, data=data_p, effect = c("time"), model = c("within"))
summary(fgls)
## Oneway (time) effect Within FGLS model
##
## Call:
## pggls(formula = congthuc, data = data_p, effect = c("time"),
## model = c("within"))
##
## Balanced Panel: n = 19, T = 13, N = 247
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -3.7479718 -0.8270558 -0.1485583 0.7851150 3.8563080
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## HHI 0.0082940 0.0023958 3.4619 0.0005364 ***
## LTA 0.5351352 0.0285982 18.7122 < 2.2e-16 ***
## LnSIZE 0.7094991 0.0126883 55.9177 < 2.2e-16 ***
## ETA 0.2049909 0.2594170 0.7902 0.4294120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Total Sum of Squares: 635.13
## Residual Sum of Squares: 447.13
## Multiple R-squared: 0.29599
Vài kiểm định lựa chọn model
# ols với rem
plmtest(ols, effect = "individual", type = "bp")
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: congthuc
## chisq = 620.76, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
# fem với ols
pFtest(fem, ols)
##
## F test for individual effects
##
## data: congthuc
## F = 30.377, df1 = 18, df2 = 223, p-value < 2.2e-16
## alternative hypothesis: significant effects
# fem với rem
phtest(fem, rem)
##
## Hausman Test
##
## data: congthuc
## chisq = 16.36, df = 5, p-value = 0.005888
## alternative hypothesis: one model is inconsistent
1. Kiểm định sụ phục thuộc chéo
# test cho fem sự phụ thuộc chéo
pcdtest(fem, type ="cd")
##
## Pesaran CD test for cross-sectional dependence in panels
##
## data: Zcore ~ HHI + LTA + LnSIZE + ETA + COVID
## z = 2.3775, p-value = 0.01743
## alternative hypothesis: cross-sectional dependence
# Tính toán sai số chuẩn cụm theo cá nhân
#clustered_se <- vcovHC(fem, type = "HC1", cluster = "group")
# Hoặc tính toán sai số chuẩn cụm theo thời gian
clustered_se <- vcovHC(fem, type = "HC1", cluster = "time")
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# Sử dụng hàm coeftest để kiểm tra hệ số với sai số chuẩn cụm
coeftest_result <- coeftest(fem, vcov = clustered_se)
# In kết quả
print(coeftest_result)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## HHI 0.029251 0.012068 2.4238 0.01615 *
## LTA -0.542136 0.230011 -2.3570 0.01929 *
## LnSIZE 0.665543 0.122683 5.4249 1.506e-07 ***
## ETA 11.628405 2.240943 5.1891 4.746e-07 ***
## COVID 0.277678 0.187943 1.4775 0.14096
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
2. Kiểm tra tự tương quan chuỗi
# Kiểm tra tự tương quan chuỗi bậc nhất
bgtest_result <- pbgtest(fem)
# In kết quả kiểm định
print(bgtest_result)
##
## Breusch-Godfrey/Wooldridge test for serial correlation in panel models
##
## data: congthuc
## chisq = 87.871, df = 13, p-value = 3.564e-13
## alternative hypothesis: serial correlation in idiosyncratic errors
# Dùng GLS hay Driscoll-Kraay
# Tính toán sai số chuẩn Driscoll-Kraay
dk_se <- vcovSCC(fem, type = "HC1", maxlag = 3)
# Sử dụng coeftest để kiểm tra hệ số với Driscoll-Kraay Standard Errors
coeftest_result <- coeftest(fem, vcov = dk_se)
# In kết quả
print(coeftest_result)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## HHI 0.029251 0.016454 1.7777 0.07681 .
## LTA -0.542136 0.254161 -2.1330 0.03401 *
## LnSIZE 0.665543 0.112731 5.9038 1.312e-08 ***
## ETA 11.628405 2.487475 4.6748 5.094e-06 ***
## COVID 0.277678 0.206590 1.3441 0.18028
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
3. Kiểm tra phương sai không đồng nhất
# Kiểm tra phương sai không đồng nhất với Breusch-Pagan test
bp_test <- bptest(fem)
# In kết quả kiểm định
print(bp_test)
##
## studentized Breusch-Pagan test
##
## data: fem
## BP = 10.66, df = 5, p-value = 0.05856
# Tính toán sai số chuẩn điều chỉnh White (HC0)
robust_se <- vcovHC(fem, type = "HC1")
# Sử dụng coeftest để kiểm tra hệ số với sai số chuẩn điều chỉnh
coeftest_result <- coeftest(fem, vcov = robust_se)
# In kết quả
print(coeftest_result)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## HHI 0.029251 0.015872 1.8430 0.0666596 .
## LTA -0.542136 0.340465 -1.5923 0.1127250
## LnSIZE 0.665543 0.184225 3.6127 0.0003745 ***
## ETA 11.628405 3.157887 3.6823 0.0002900 ***
## COVID 0.277678 0.216934 1.2800 0.2018703
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
4. Kiểm định phụ thuộc chéo & yếu tố chung không quan sát
được
# Kiểm định Breusch-Pagan LM
bp_test <- pcdtest(fem, test = "lm")
# In kết quả kiểm định
print(bp_test)
##
## Breusch-Pagan LM test for cross-sectional dependence in panels
##
## data: Zcore ~ HHI + LTA + LnSIZE + ETA + COVID
## chisq = 409.17, df = 171, p-value < 2.2e-16
## alternative hypothesis: cross-sectional dependence
# Kiểm định LM cho yếu tố chung
lm_test <- plmtest(fem, type = "honda")
# In kết quả kiểm định
print(lm_test)
##
## Lagrange Multiplier Test - (Honda)
##
## data: congthuc
## normal = 24.915, p-value < 2.2e-16
## alternative hypothesis: significant effects
# khắc phục
pcce <- pcce(congthuc, data=data_p, model = "mg")
summary(pcce, vcoh = robust_se )
## Common Correlated Effects Mean Groups model
##
## Call:
## pcce(formula = congthuc, data = data_p, model = "mg")
##
## Balanced Panel: n = 19, T = 13, N = 247
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.3516239424 -0.0254027672 0.0002011638 0.0280472602 0.3604132439
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## HHI 1.0447e+01 3.6672e+00 2.8488 0.004388 **
## LTA 1.4391e+00 2.2955e+00 0.6269 0.530717
## LnSIZE 3.0958e+00 2.2967e+00 1.3479 0.177681
## ETA 4.0431e+01 2.6118e+01 1.5480 0.121624
## COVID 1.4483e-06 1.1549e-06 1.2540 0.209849
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Total Sum of Squares: 635.13
## Residual Sum of Squares: 1.7462
## HPY R-squared: 0.91177
Granger test
pgrangertest(Zcore ~ HHI ,data = data_p, test = c("Ztilde"))
##
## Panel Granger (Non-)Causality Test (Dumitrescu/Hurlin (2012))
##
## data: Zcore ~ HHI
## Ztilde = 2.6478, p-value = 0.008102
## alternative hypothesis: Granger causality for at least one individual
Tìm hiểu về hiệu ứng cố định
# Hiệu ứng cố định theo cá nhân
# Mô hình hiệu ứng cố định cá nhân
fe_model_individual <- plm(congthuc, data = data_p, model = "within", effect = "individual")
# F-test cho hiệu ứng cá nhân
f_test_individual <- pFtest(fe_model_individual, ols)
# In kết quả kiểm định
print(f_test_individual)
##
## F test for individual effects
##
## data: congthuc
## F = 30.377, df1 = 18, df2 = 223, p-value < 2.2e-16
## alternative hypothesis: significant effects
# Mô hình hiệu ứng cố định thời gian
fe_model_time <- plm(congthuc, data = data_p, model = "within", effect = "time")
# F-test cho hiệu ứng thời gian
f_test_time <- pFtest(fe_model_time, ols)
# In kết quả kiểm định
print(f_test_time)
##
## F test for time effects
##
## data: congthuc
## F = 1.003, df1 = 11, df2 = 230, p-value = 0.4444
## alternative hypothesis: significant effects