Đọc dữ liệu

library(haven)

# Đọc tệp .dta
data <- read_dta("/Users/thuphan/Downloads/covid2024 (1).dta")

# Xem dữ liệu
head(data)
## # A tibble: 6 × 29
##     Năm MãCK  Tên         SAN    YEAR NGANH    SIZE      LN    VCSH    ETA   ROA
##   <dbl> <chr> <chr>       <chr> <dbl> <chr>   <dbl>   <dbl>   <dbl>  <dbl> <dbl>
## 1  2011 ACB   Ngân hàng … HoSE   1993 Trun… 2.81e14 3.21e12 1.20e13 0.0426 1.14 
## 2  2012 ACB   Ngân hàng … HoSE   1993 Trun… 1.76e14 7.84e11 1.26e13 0.0716 0.445
## 3  2013 ACB   Ngân hàng … HoSE   1993 Trun… 1.67e14 8.26e11 1.25e13 0.0751 0.496
## 4  2014 ACB   Ngân hàng … HoSE   1993 Trun… 1.80e14 9.52e11 1.24e13 0.0690 0.530
## 5  2015 ACB   Ngân hàng … HoSE   1993 Trun… 2.01e14 1.03e12 1.28e13 0.0635 0.510
## 6  2016 ACB   Ngân hàng … HoSE   1993 Trun… 2.34e14 1.33e12 1.41e13 0.0602 0.567
## # ℹ 18 more variables: NETOP <dbl>, NCV <dbl>, LTA <dbl>, NET <dbl>, cp <dbl>,
## #   lnt <dbl>, NON <dbl>, HHI <dbl>, COVID <dbl>, TAM <dbl>, SD_ROA <dbl>,
## #   SH <dbl>, CTY <dbl+lbl>, Zcore <dbl>, LnSIZE <dbl>, `_est_FEM` <dbl>,
## #   `_est_REM` <dbl>, NAM <dbl>
congthuc <- Zcore ~ HHI +  LTA +  LnSIZE +  ETA + COVID

hoiquy <- lm( congthuc, data)
summary(hoiquy)
## 
## Call:
## lm(formula = congthuc, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5838 -0.8480 -0.1820  0.8008  3.9951 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -21.176734   3.594973  -5.891 1.29e-08 ***
## HHI           0.001594   0.077229   0.021    0.984    
## LTA           0.302374   0.355542   0.850    0.396    
## LnSIZE        0.684068   0.108669   6.295 1.44e-09 ***
## ETA           2.195816   3.030767   0.725    0.469    
## COVID         0.337578   0.221091   1.527    0.128    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.394 on 241 degrees of freedom
## Multiple R-squared:  0.2623, Adjusted R-squared:  0.2469 
## F-statistic: 17.13 on 5 and 241 DF,  p-value: 1.7e-14

Xử lý GLS

library(plm)
library(sandwich)
data_p <- pdata.frame(data, index = c("CTY", "NAM"))

# Hồi quy GLS
hoiquy_gls <- plm(congthuc, data = data_p, model = "random", effect = "twoways", method = "gls")

# Tóm tắt kết quả
summary(hoiquy_gls)
## Twoways effects Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = congthuc, data = data_p, effect = "twoways", model = "random", 
##     method = "gls")
## 
## Balanced Panel: n = 19, T = 13, N = 247
## 
## Effects:
##                   var std.dev share
## idiosyncratic 0.57592 0.75890 0.347
## individual    1.04974 1.02457 0.633
## time          0.03284 0.18123 0.020
## theta: 0.7988 (id) 0.3072 (time) 0.3029 (total)
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -2.60806 -0.47517 -0.10399  0.44031  2.75844 
## 
## Coefficients:
##               Estimate Std. Error z-value  Pr(>|z|)    
## (Intercept) -21.657273   4.805564 -4.5067 6.584e-06 ***
## HHI           0.032836   0.044905  0.7312    0.4646    
## LTA          -0.348185   0.267933 -1.2995    0.1938    
## LnSIZE        0.690012   0.145296  4.7490 2.044e-06 ***
## ETA           9.995331   2.342125  4.2676 1.976e-05 ***
## COVID         0.255550   0.208335  1.2266    0.2200    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    210.57
## Residual Sum of Squares: 143.15
## R-Squared:      0.32017
## Adj. R-Squared: 0.30606
## Chisq: 113.499 on 5 DF, p-value: < 2.22e-16
robust_se <- vcovHC(hoiquy_gls, type = "HC1")
#robust_se_hac <- vcovHAC(hoiquy_gls)

#summary(robust_se)

robust_se
##             (Intercept)           HHI           LTA       LnSIZE          ETA
## (Intercept) 22.97496536  0.0311427090  0.2538856534 -0.700675525 -4.710038340
## HHI          0.03114271  0.0002982378  0.0007179254 -0.000936603 -0.008044691
## LTA          0.25388565  0.0007179254  0.1101088210 -0.007308684 -0.989311360
## LnSIZE      -0.70067553 -0.0009366030 -0.0073086840  0.021442148  0.140517362
## ETA         -4.71003834 -0.0080446905 -0.9893113605  0.140517362  9.293981685
## COVID        0.38827698 -0.0011739313  0.0339495623 -0.012187445 -0.361141794
##                    COVID
## (Intercept)  0.388276983
## HHI         -0.001173931
## LTA          0.033949562
## LnSIZE      -0.012187445
## ETA         -0.361141794
## COVID        0.057403243
## attr(,"cluster")
## [1] "group"
#robust_se_hac

Tóm tắt

# Hồi quy OLS
ols <- plm(congthuc, data=data_p, model ="pooling")
summary(ols)
## Pooling Model
## 
## Call:
## plm(formula = congthuc, data = data_p, model = "pooling")
## 
## Balanced Panel: n = 19, T = 13, N = 247
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -3.58380 -0.84799 -0.18201  0.80076  3.99508 
## 
## Coefficients:
##                Estimate  Std. Error t-value  Pr(>|t|)    
## (Intercept) -21.1767339   3.5949730 -5.8907 1.286e-08 ***
## HHI           0.0015936   0.0772291  0.0206    0.9836    
## LTA           0.3023742   0.3555419  0.8505    0.3959    
## LnSIZE        0.6840684   0.1086693  6.2950 1.442e-09 ***
## ETA           2.1958164   3.0307669  0.7245    0.4695    
## COVID         0.3375776   0.2210912  1.5269    0.1281    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    635.13
## Residual Sum of Squares: 468.56
## R-Squared:      0.26226
## Adj. R-Squared: 0.24695
## F-statistic: 17.1342 on 5 and 241 DF, p-value: 1.6996e-14
# Hồi quy REM
rem <- plm(congthuc, data=data_p, model ="random")
summary(rem)
## Oneway (individual) effect Random Effect Model 
##    (Swamy-Arora's transformation)
## 
## Call:
## plm(formula = congthuc, data = data_p, model = "random")
## 
## Balanced Panel: n = 19, T = 13, N = 247
## 
## Effects:
##                  var std.dev share
## idiosyncratic 0.6087  0.7802 0.368
## individual    1.0472  1.0233 0.632
## theta: 0.7931
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -2.44912 -0.49935 -0.13697  0.45407  2.97464 
## 
## Coefficients:
##               Estimate Std. Error z-value  Pr(>|z|)    
## (Intercept) -21.294193   4.262883 -4.9953 5.876e-07 ***
## HHI           0.028293   0.045479  0.6221   0.53387    
## LTA          -0.476393   0.265566 -1.7939   0.07283 .  
## LnSIZE        0.678535   0.128749  5.2702 1.363e-07 ***
## ETA          11.013675   2.340682  4.7053 2.535e-06 ***
## COVID         0.268548   0.165955  1.6182   0.10562    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    254.51
## Residual Sum of Squares: 151.05
## R-Squared:      0.40651
## Adj. R-Squared: 0.3942
## Chisq: 165.073 on 5 DF, p-value: < 2.22e-16
# Hồi quy FEM
fem <- plm(congthuc, data=data_p, model ="within")
summary(fem)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = congthuc, data = data_p, model = "within")
## 
## Balanced Panel: n = 19, T = 13, N = 247
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -2.385927 -0.491820 -0.057323  0.441532  2.639793 
## 
## Coefficients:
##         Estimate Std. Error t-value  Pr(>|t|)    
## HHI     0.029251   0.044895  0.6515   0.51536    
## LTA    -0.542136   0.266471 -2.0345   0.04308 *  
## LnSIZE  0.665543   0.138361  4.8102 2.776e-06 ***
## ETA    11.628405   2.363182  4.9207 1.675e-06 ***
## COVID   0.277678   0.171922  1.6151   0.10769    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    237.5
## Residual Sum of Squares: 135.74
## R-Squared:      0.42847
## Adj. R-Squared: 0.36952
## F-statistic: 33.4358 on 5 and 223 DF, p-value: < 2.22e-16
# hồi quy FGLS
fgls <- pggls(congthuc, data=data_p, effect = c("time"), model = c("within"))
summary(fgls)
## Oneway (time) effect Within FGLS model
## 
## Call:
## pggls(formula = congthuc, data = data_p, effect = c("time"), 
##     model = c("within"))
## 
## Balanced Panel: n = 19, T = 13, N = 247
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -3.7479718 -0.8270558 -0.1485583  0.7851150  3.8563080 
## 
## Coefficients:
##         Estimate Std. Error z-value  Pr(>|z|)    
## HHI    0.0082940  0.0023958  3.4619 0.0005364 ***
## LTA    0.5351352  0.0285982 18.7122 < 2.2e-16 ***
## LnSIZE 0.7094991  0.0126883 55.9177 < 2.2e-16 ***
## ETA    0.2049909  0.2594170  0.7902 0.4294120    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Total Sum of Squares: 635.13
## Residual Sum of Squares: 447.13
## Multiple R-squared: 0.29599

Vài kiểm định lựa chọn model

# ols với rem
plmtest(ols, effect = "individual", type = "bp")
## 
##  Lagrange Multiplier Test - (Breusch-Pagan)
## 
## data:  congthuc
## chisq = 620.76, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
# fem với ols
pFtest(fem, ols)
## 
##  F test for individual effects
## 
## data:  congthuc
## F = 30.377, df1 = 18, df2 = 223, p-value < 2.2e-16
## alternative hypothesis: significant effects
# fem với rem
phtest(fem, rem)
## 
##  Hausman Test
## 
## data:  congthuc
## chisq = 16.36, df = 5, p-value = 0.005888
## alternative hypothesis: one model is inconsistent

1. Kiểm định sụ phục thuộc chéo

# test cho fem sự phụ thuộc chéo
pcdtest(fem, type ="cd")
## 
##  Pesaran CD test for cross-sectional dependence in panels
## 
## data:  Zcore ~ HHI + LTA + LnSIZE + ETA + COVID
## z = 2.3775, p-value = 0.01743
## alternative hypothesis: cross-sectional dependence
# Tính toán sai số chuẩn cụm theo cá nhân
#clustered_se <- vcovHC(fem, type = "HC1", cluster = "group")

# Hoặc tính toán sai số chuẩn cụm theo thời gian
 clustered_se <- vcovHC(fem, type = "HC1", cluster = "time")

library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# Sử dụng hàm coeftest để kiểm tra hệ số với sai số chuẩn cụm
coeftest_result <- coeftest(fem, vcov = clustered_se)

# In kết quả
print(coeftest_result)
## 
## t test of coefficients:
## 
##         Estimate Std. Error t value  Pr(>|t|)    
## HHI     0.029251   0.012068  2.4238   0.01615 *  
## LTA    -0.542136   0.230011 -2.3570   0.01929 *  
## LnSIZE  0.665543   0.122683  5.4249 1.506e-07 ***
## ETA    11.628405   2.240943  5.1891 4.746e-07 ***
## COVID   0.277678   0.187943  1.4775   0.14096    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2. Kiểm tra tự tương quan chuỗi

# Kiểm tra tự tương quan chuỗi bậc nhất
bgtest_result <- pbgtest(fem)

# In kết quả kiểm định
print(bgtest_result)
## 
##  Breusch-Godfrey/Wooldridge test for serial correlation in panel models
## 
## data:  congthuc
## chisq = 87.871, df = 13, p-value = 3.564e-13
## alternative hypothesis: serial correlation in idiosyncratic errors
# Dùng GLS hay Driscoll-Kraay
# Tính toán sai số chuẩn Driscoll-Kraay
dk_se <- vcovSCC(fem, type = "HC1", maxlag = 3)

# Sử dụng coeftest để kiểm tra hệ số với Driscoll-Kraay Standard Errors
coeftest_result <- coeftest(fem, vcov = dk_se)

# In kết quả
print(coeftest_result)
## 
## t test of coefficients:
## 
##         Estimate Std. Error t value  Pr(>|t|)    
## HHI     0.029251   0.016454  1.7777   0.07681 .  
## LTA    -0.542136   0.254161 -2.1330   0.03401 *  
## LnSIZE  0.665543   0.112731  5.9038 1.312e-08 ***
## ETA    11.628405   2.487475  4.6748 5.094e-06 ***
## COVID   0.277678   0.206590  1.3441   0.18028    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3. Kiểm tra phương sai không đồng nhất

# Kiểm tra phương sai không đồng nhất với Breusch-Pagan test
bp_test <- bptest(fem)

# In kết quả kiểm định
print(bp_test)
## 
##  studentized Breusch-Pagan test
## 
## data:  fem
## BP = 10.66, df = 5, p-value = 0.05856
# Tính toán sai số chuẩn điều chỉnh White (HC0)
robust_se <- vcovHC(fem, type = "HC1")

# Sử dụng coeftest để kiểm tra hệ số với sai số chuẩn điều chỉnh
coeftest_result <- coeftest(fem, vcov = robust_se)

# In kết quả
print(coeftest_result)
## 
## t test of coefficients:
## 
##         Estimate Std. Error t value  Pr(>|t|)    
## HHI     0.029251   0.015872  1.8430 0.0666596 .  
## LTA    -0.542136   0.340465 -1.5923 0.1127250    
## LnSIZE  0.665543   0.184225  3.6127 0.0003745 ***
## ETA    11.628405   3.157887  3.6823 0.0002900 ***
## COVID   0.277678   0.216934  1.2800 0.2018703    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4. Kiểm định phụ thuộc chéo & yếu tố chung không quan sát được

# Kiểm định Breusch-Pagan LM
bp_test <- pcdtest(fem, test = "lm")

# In kết quả kiểm định
print(bp_test)
## 
##  Breusch-Pagan LM test for cross-sectional dependence in panels
## 
## data:  Zcore ~ HHI + LTA + LnSIZE + ETA + COVID
## chisq = 409.17, df = 171, p-value < 2.2e-16
## alternative hypothesis: cross-sectional dependence
# Kiểm định LM cho yếu tố chung
lm_test <- plmtest(fem, type = "honda")

# In kết quả kiểm định
print(lm_test)
## 
##  Lagrange Multiplier Test - (Honda)
## 
## data:  congthuc
## normal = 24.915, p-value < 2.2e-16
## alternative hypothesis: significant effects
# khắc phục
pcce <- pcce(congthuc, data=data_p, model = "mg")
summary(pcce, vcoh = robust_se )
## Common Correlated Effects Mean Groups model
## 
## Call:
## pcce(formula = congthuc, data = data_p, model = "mg")
## 
## Balanced Panel: n = 19, T = 13, N = 247
## 
## Residuals:
##          Min.       1st Qu.        Median       3rd Qu.          Max. 
## -0.3516239424 -0.0254027672  0.0002011638  0.0280472602  0.3604132439 
## 
## Coefficients:
##          Estimate Std. Error z-value Pr(>|z|)   
## HHI    1.0447e+01 3.6672e+00  2.8488 0.004388 **
## LTA    1.4391e+00 2.2955e+00  0.6269 0.530717   
## LnSIZE 3.0958e+00 2.2967e+00  1.3479 0.177681   
## ETA    4.0431e+01 2.6118e+01  1.5480 0.121624   
## COVID  1.4483e-06 1.1549e-06  1.2540 0.209849   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Total Sum of Squares: 635.13
## Residual Sum of Squares: 1.7462
## HPY R-squared: 0.91177

Granger test

pgrangertest(Zcore ~ HHI ,data = data_p, test = c("Ztilde"))
## 
##  Panel Granger (Non-)Causality Test (Dumitrescu/Hurlin (2012))
## 
## data:  Zcore ~ HHI
## Ztilde = 2.6478, p-value = 0.008102
## alternative hypothesis: Granger causality for at least one individual

Tìm hiểu về hiệu ứng cố định

# Hiệu ứng cố định theo cá nhân
# Mô hình hiệu ứng cố định cá nhân
fe_model_individual <- plm(congthuc, data = data_p, model = "within", effect = "individual")

# F-test cho hiệu ứng cá nhân
f_test_individual <- pFtest(fe_model_individual, ols)

# In kết quả kiểm định
print(f_test_individual)
## 
##  F test for individual effects
## 
## data:  congthuc
## F = 30.377, df1 = 18, df2 = 223, p-value < 2.2e-16
## alternative hypothesis: significant effects
# Mô hình hiệu ứng cố định thời gian
fe_model_time <- plm(congthuc, data = data_p, model = "within", effect = "time")

# F-test cho hiệu ứng thời gian
f_test_time <- pFtest(fe_model_time, ols)

# In kết quả kiểm định
print(f_test_time)
## 
##  F test for time effects
## 
## data:  congthuc
## F = 1.003, df1 = 11, df2 = 230, p-value = 0.4444
## alternative hypothesis: significant effects