D1

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

library(tidyverse)      # 数据处理与绘图
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.2.0
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)         # 读取Excel文件
library(plm)            # 面板数据模型

Attaching package: 'plm'

The following objects are masked from 'package:dplyr':

    between, lag, lead
library(lmtest)         # 异方差稳健标准误
Loading required package: zoo

Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
library(sandwich)       # 聚类稳健标准误
library(stargazer)      # 输出回归表格

Please cite as: 

 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
library(corrplot)       # 相关性图
corrplot 0.95 loaded
library(DescTools)      # 缩尾处理
library(dplyr)

You can add options to executable code like this

income_raw <- read_excel("C:/Users/az214/Desktop/论文数据/核心控制变量:农村居民人均可支配收入.xlsx", sheet = "Sheet1")
income <- income_raw %>%
  pivot_longer(cols = -1, names_to = "year", values_to = "income_nominal") %>%
  rename(city = 1) %>%
  mutate(year = as.numeric(year))

index_raw <- read_excel("C:/Users/az214/Desktop/论文数据/核心解释变量:广东省数字普惠金融总指数.xlsx", sheet = "Sheet1")
index <- index_raw %>%
  select(year, city_name, index_aggregate, coverage_breadth, usage_depth, digitization_level) %>%
  rename(city = city_name) %>%
  mutate(year = as.numeric(year))

gdp_raw <- read_excel("C:/Users/az214/Desktop/论文数据/其他控制变量4:人均GDP_的对数.xlsx", sheet = "Sheet1")
gdp <- gdp_raw %>%
  select(city, year, ln_per_gdp) %>%
  mutate(year = as.numeric(year))

urban_raw <- read_excel("C:/Users/az214/Desktop/论文数据/其他控制变量2:城镇化率.xlsx", sheet = "Sheet1")
urban <- urban_raw %>%
  pivot_longer(cols = -1, names_to = "year", values_to = "urban") %>%
  rename(city = 1) %>%
  mutate(year = as.numeric(year))

fiscal_raw <- read_excel("C:/Users/az214/Desktop/论文数据/其他控制变量3:地方一般公共预算收入.xlsx", sheet = "Sheet1", skip = 2, col_names = FALSE)
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
# 将第一行作为列名
new_names <- as.character(fiscal_raw[1, ])
new_names[1] <- "city"   # 第一个单元格空,设为"city"
colnames(fiscal_raw) <- new_names
# 删除第一行(原年份行),并只保留城市和2011-2023列(去掉可能的空列)
fiscal <- fiscal_raw[-1, ] %>%
  select(city, `2011`:`2023`) %>%
  pivot_longer(cols = -city, names_to = "year", values_to = "gov_rev") %>%
  mutate(
    year = as.numeric(year),
    gov_rev = as.numeric(gov_rev)
  )

cpi_raw <- read_excel("C:/Users/az214/Desktop/论文数据/价格平减数据:CPI 居民消费价格指数 6.xlsx", sheet = "Sheet1", skip = 5)
New names:
• `` -> `...1`
• `` -> `...2`
colnames(cpi_raw) <- c("year", "cpi_total", "cpi_urban", "cpi_rural")
cpi <- cpi_raw %>%
  filter(!is.na(year)) %>%          # 去除空行
  mutate(across(-year, as.numeric))

cpi <- cpi %>%
  arrange(year) %>%
  mutate(
    # 将环比指数转为定基指数 (2011=100)
    cpi_rural_base = 100 * cumprod(cpi_rural / 100)
  )
select(cpi, year, cpi_rural, cpi_rural_base)
# A tibble: 13 × 3
   year  cpi_rural cpi_rural_base
   <chr>     <dbl>          <dbl>
 1 2011      106.            106.
 2 2012      103.            109.
 3 2013      103.            112.
 4 2014      102.            114.
 5 2015      101.            115.
 6 2016      102             118.
 7 2017      101.            119.
 8 2018      102.            121.
 9 2019      105.            126.
10 2020      103             130.
11 2021      100.            130.
12 2022      102.            133.
13 2023       99.6           133.
#合并所有数据为面板
df <- consump %>%
  left_join(income, by = c("city", "year")) %>%
  left_join(index, by = c("city", "year")) %>%
  left_join(gdp, by = c("city", "year")) %>%
  left_join(urban, by = c("city", "year")) %>%
  left_join(fiscal, by = c("city", "year"))

cpi <- cpi %>%
  mutate(year = as.numeric(year))
df <- df %>%
  left_join(select(cpi, year, cpi_rural_base), by = "year")
# 对名义变量进行平减(实际值 = 名义值 / 定基CPI * 100)
df <- df %>%
  mutate(
    consump_real = consump_nominal / cpi_rural_base * 100,
    income_real  = income_nominal / cpi_rural_base * 100,
    gov_rev_real = gov_rev / cpi_rural_base * 100      # 财政收入也平减
  )
# 检查缺失值
summary(df)
     city                year      consump_nominal income_nominal 
 Length:260         Min.   :2011   Min.   : 5189   Min.   : 6775  
 Class :character   1st Qu.:2014   1st Qu.: 9628   1st Qu.:12199  
 Mode  :character   Median :2017   Median :13059   Median :16870  
                    Mean   :2017   Mean   :13932   Mean   :18455  
                    3rd Qu.:2020   3rd Qu.:16865   3rd Qu.:22174  
                    Max.   :2023   Max.   :32111   Max.   :46865  
 index_aggregate  coverage_breadth  usage_depth     digitization_level
 Min.   : 41.63   Min.   : 22.51   Min.   : 56.11   Min.   : 44.63    
 1st Qu.:147.15   1st Qu.:146.47   1st Qu.:141.26   1st Qu.:174.82    
 Median :221.79   Median :210.30   Median :241.62   Median :261.12    
 Mean   :211.25   Mean   :206.19   Mean   :208.25   Mean   :233.40    
 3rd Qu.:281.60   3rd Qu.:280.48   3rd Qu.:271.33   3rd Qu.:295.23    
 Max.   :345.47   Max.   :374.82   Max.   :332.49   Max.   :342.37    
   ln_per_gdp         urban          gov_rev        cpi_rural_base 
 Min.   : 9.716   Min.   :35.92   Min.   :  27.27   Min.   :105.6  
 1st Qu.:10.425   1st Qu.:47.98   1st Qu.:  72.55   1st Qu.:113.9  
 Median :10.747   Median :55.73   Median : 114.31   Median :118.7  
 Mean   :10.825   Mean   :62.35   Mean   : 249.31   Mean   :120.5  
 3rd Qu.:11.191   3rd Qu.:76.01   3rd Qu.: 284.44   3rd Qu.:130.3  
 Max.   :12.042   Max.   :95.39   Max.   :1945.06   Max.   :133.4  
  consump_real    income_real     gov_rev_real    
 Min.   : 4914   Min.   : 6415   Min.   :  24.98  
 1st Qu.: 8462   1st Qu.:10568   1st Qu.:  59.14  
 Median :10786   Median :13720   Median :  95.75  
 Mean   :11362   Mean   :15052   Mean   : 204.73  
 3rd Qu.:13061   3rd Qu.:17398   3rd Qu.: 226.60  
 Max.   :24166   Max.   :35269   Max.   :1463.78  
#描述性统计
# 选择分析所用变量
vars <- c("consump_real", "income_real", "index_aggregate", 
          "coverage_breadth", "usage_depth", "digitization_level",
          "ln_per_gdp", "urban", "gov_rev_real")
desc_df <- df[, vars]

summary(desc_df)
  consump_real    income_real    index_aggregate  coverage_breadth
 Min.   : 4914   Min.   : 6415   Min.   : 41.63   Min.   : 22.51  
 1st Qu.: 8462   1st Qu.:10568   1st Qu.:147.15   1st Qu.:146.47  
 Median :10786   Median :13720   Median :221.79   Median :210.30  
 Mean   :11362   Mean   :15052   Mean   :211.25   Mean   :206.19  
 3rd Qu.:13061   3rd Qu.:17398   3rd Qu.:281.60   3rd Qu.:280.48  
 Max.   :24166   Max.   :35269   Max.   :345.47   Max.   :374.82  
  usage_depth     digitization_level   ln_per_gdp         urban      
 Min.   : 56.11   Min.   : 44.63     Min.   : 9.716   Min.   :35.92  
 1st Qu.:141.26   1st Qu.:174.82     1st Qu.:10.425   1st Qu.:47.98  
 Median :241.62   Median :261.12     Median :10.747   Median :55.73  
 Mean   :208.25   Mean   :233.40     Mean   :10.825   Mean   :62.35  
 3rd Qu.:271.33   3rd Qu.:295.23     3rd Qu.:11.191   3rd Qu.:76.01  
 Max.   :332.49   Max.   :342.37     Max.   :12.042   Max.   :95.39  
  gov_rev_real    
 Min.   :  24.98  
 1st Qu.:  59.14  
 Median :  95.75  
 Mean   : 204.73  
 3rd Qu.: 226.60  
 Max.   :1463.78  
calc_stats <- function(x) {
  c(样本量 = sum(!is.na(x)),
    均值   = mean(x, na.rm = TRUE),
    标准差 = sd(x, na.rm = TRUE),
    最小值 = min(x, na.rm = TRUE),
    中位数 = median(x, na.rm = TRUE),
    最大值 = max(x, na.rm = TRUE))
}
stats_matrix <- sapply(desc_df, calc_stats)
stats_df <- as.data.frame(t(stats_matrix))
knitr::kable(stats_df, caption = "描述性统计", digits = 2)
描述性统计
样本量 均值 标准差 最小值 中位数 最大值
consump_real 260 11362.32 4212.05 4913.83 10785.56 24165.74
income_real 260 15051.99 6100.59 6415.34 13719.80 35268.62
index_aggregate 260 211.25 79.14 41.63 221.79 345.47
coverage_breadth 260 206.19 85.60 22.51 210.30 374.82
usage_depth 260 208.25 75.08 56.11 241.62 332.49
digitization_level 260 233.40 78.10 44.63 261.12 342.37
ln_per_gdp 260 10.83 0.54 9.72 10.75 12.04
urban 260 62.35 17.97 35.92 55.73 95.39
gov_rev_real 260 204.73 277.14 24.98 95.75 1463.78
#相关性分析
cor_matrix <- cor(desc_df, use = "complete.obs")
corrplot(cor_matrix, method = "number", type = "upper", tl.cex = 0.8)

pdata <- pdata.frame(df, index = c("city", "year"))
form_base <- consump_real ~ index_aggregate + income_real + ln_per_gdp + urban + gov_rev_real
#模型选择检验
# 混合OLS
pool <- plm(form_base, data = pdata, model = "pooling")
# 固定效应
fe <- plm(form_base, data = pdata, model = "within")
# 随机效应
re <- plm(form_base, data = pdata, model = "random")


stargazer(pool, fe, re, type = "text", 
          column.labels = c("混合OLS", "固定效应", "随机效应"),
          keep.stat = c("n", "rsq", "adj.rsq"))

==================================================
                       Dependent variable:        
                ----------------------------------
                           consump_real           
                   混合OLS       固定效应       随机效应    
                    (1)        (2)         (3)    
--------------------------------------------------
index_aggregate  12.115***  17.933***   15.132*** 
                  (1.034)    (2.044)     (1.526)  
                                                  
income_real      0.532***    0.377***   0.437***  
                  (0.018)    (0.027)     (0.025)  
                                                  
ln_per_gdp      -553.854***  419.887    -189.045  
                 (208.536)  (459.777)   (362.824) 
                                                  
urban            26.828***  -93.130***    9.146   
                  (5.517)    (24.515)   (12.315)  
                                                  
gov_rev_real       0.413     3.892***   2.241***  
                  (0.251)    (0.845)     (0.526)  
                                                  
Constant        5,032.098**             2,612.564 
                (1,947.324)            (3,285.383)
                                                  
--------------------------------------------------
Observations        260        260         260    
R2                 0.967      0.971       0.965   
Adjusted R2        0.967      0.968       0.965   
==================================================
Note:                  *p<0.1; **p<0.05; ***p<0.01
#F检验:比较混合OLS与固定效应(原假设:个体效应不显著)
pFtest(fe, pool)   # p<0.05 则拒绝混合OLS,选择固定效应

    F test for individual effects

data:  form_base
F = 17.014, df1 = 19, df2 = 235, p-value < 2.2e-16
alternative hypothesis: significant effects
#LM检验(Breusch-Pagan):比较混合OLS与随机效应
plmtest(pool, type = "bp")   # p<0.05 则拒绝混合OLS,选择随机效应

    Lagrange Multiplier Test - (Breusch-Pagan)

data:  form_base
chisq = 247.23, df = 1, p-value < 2.2e-16
alternative hypothesis: significant effects
#Hausman检验:比较固定效应与随机效应
phtest(fe, re)     # p<0.05 则拒绝随机效应,选择固定效应

    Hausman Test

data:  form_base
chisq = 45.881, df = 5, p-value = 9.603e-09
alternative hypothesis: one model is inconsistent
# 按固定效应模型,聚类稳健标准误(按城市聚类)
fe_robust <- coeftest(fe, vcov = vcovHC(fe, type = "HC1", cluster = "group"))
print(fe_robust)

t test of coefficients:

                  Estimate Std. Error t value  Pr(>|t|)    
index_aggregate  17.933114   3.709177  4.8348 2.409e-06 ***
income_real       0.377216   0.076003  4.9632 1.332e-06 ***
ln_per_gdp      419.887152 628.017243  0.6686  0.504412    
urban           -93.129674  34.635538 -2.6888  0.007683 ** 
gov_rev_real      3.892290   1.592813  2.4437  0.015276 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 输出标准回归表
stargazer(fe, type = "text", 
          se = list(fe_robust[, 2]), 
          p = list(fe_robust[, 4]),
          title = "基准回归结果(固定效应,聚类稳健标准误)")

基准回归结果(固定效应,聚类稳健标准误)
===========================================
                    Dependent variable:    
                ---------------------------
                       consump_real        
-------------------------------------------
index_aggregate          17.933***         
                          (3.709)          
                                           
income_real              0.377***          
                          (0.076)          
                                           
ln_per_gdp                419.887          
                         (628.017)         
                                           
urban                   -93.130***         
                         (34.636)          
                                           
gov_rev_real              3.892**          
                          (1.593)          
                                           
-------------------------------------------
Observations                260            
R2                         0.971           
Adjusted R2                0.968           
F Statistic     1,548.796*** (df = 5; 235) 
===========================================
Note:           *p<0.1; **p<0.05; ***p<0.01

异质性分析

1 分维度回归(覆盖广度、使用深度、数字化程度)

# 分别替换核心解释变量
form_cov <- consump_real ~ coverage_breadth + income_real + ln_per_gdp + urban + gov_rev_real
form_use <- consump_real ~ usage_depth + income_real + ln_per_gdp + urban + gov_rev_real
form_dig <- consump_real ~ digitization_level + income_real + ln_per_gdp + urban + gov_rev_real

fe_cov <- plm(form_cov, data = pdata, model = "within")
fe_use <- plm(form_use, data = pdata, model = "within")
fe_dig <- plm(form_dig, data = pdata, model = "within")

# 输出结果
stargazer(fe_cov, fe_use, fe_dig, type = "text",
          column.labels = c("覆盖广度", "使用深度", "数字化程度"),
          keep.stat = c("n", "rsq"))

========================================================
                            Dependent variable:         
                   -------------------------------------
                               consump_real             
                      覆盖广度         使用深度        数字化程度    
                       (1)         (2)          (3)     
--------------------------------------------------------
coverage_breadth    15.915***                           
                     (2.481)                            
                                                        
usage_depth                      8.965***               
                                 (1.344)                
                                                        
digitization_level                            7.639***  
                                              (1.139)   
                                                        
income_real         0.380***     0.475***     0.484***  
                     (0.033)     (0.022)      (0.022)   
                                                        
ln_per_gdp          881.009*   1,827.748*** 2,092.269***
                    (513.345)   (402.999)    (380.114)  
                                                        
urban              -126.408***  -67.065**    -69.264*** 
                    (26.807)     (26.033)     (25.979)  
                                                        
gov_rev_real        4.829***     4.403***     3.967***  
                     (0.884)     (0.887)      (0.900)   
                                                        
--------------------------------------------------------
Observations           260         260          260     
R2                    0.967       0.967        0.967    
========================================================
Note:                        *p<0.1; **p<0.05; ***p<0.01

2 分地区回归(珠三角 vs 非珠三角)

珠三角城市(基于消费数据中的城市):广州、珠海、佛山、惠州、东莞、中山、江门、肇庆(共8个)。

# 定义珠三角城市
prd_cities <- c("广州市", "珠海市", "佛山市", "惠州市", "东莞市", "中山市", "江门市", "肇庆市")

df <- df %>%
  mutate(region = ifelse(city %in% prd_cities, "珠三角", "非珠三角"))

# 分别回归
pdata_prd <- pdata.frame(df[df$region == "珠三角", ], index = c("city", "year"))
pdata_nonprd <- pdata.frame(df[df$region == "非珠三角", ], index = c("city", "year"))

fe_prd <- plm(form_base, data = pdata_prd, model = "within")
fe_nonprd <- plm(form_base, data = pdata_nonprd, model = "within")

# 输出结果
stargazer(fe_prd, fe_nonprd, type = "text",
          column.labels = c("珠三角", "非珠三角"),
          keep.stat = c("n", "rsq"))

============================================
                    Dependent variable:     
                ----------------------------
                        consump_real        
                     珠三角           非珠三角     
                     (1)            (2)     
--------------------------------------------
index_aggregate   18.183***      15.731***  
                   (3.855)        (2.231)   
                                            
income_real        0.378***      0.430***   
                   (0.043)        (0.061)   
                                            
ln_per_gdp         485.060        233.583   
                 (1,016.062)     (527.118)  
                                            
urban            -178.380***    -65.347***  
                   (62.192)      (23.845)   
                                            
gov_rev_real       4.287***       9.812**   
                   (1.394)        (3.872)   
                                            
--------------------------------------------
Observations         104            156     
R2                  0.965          0.981    
============================================
Note:            *p<0.1; **p<0.05; ***p<0.01

稳健性检验

1 替换变量:使用对数形式的消费和收入

pdata$ln_consump <- log(pdata$consump_real)
pdata$ln_income  <- log(pdata$income_real)

form_log <- ln_consump ~ index_aggregate + ln_income + ln_per_gdp + urban + gov_rev_real
fe_log <- plm(form_log, data = pdata, model = "within")
coeftest(fe_log, vcov = vcovHC(fe_log, type = "HC1", cluster = "group"))

t test of coefficients:

                   Estimate  Std. Error t value  Pr(>|t|)    
index_aggregate  1.7826e-03  3.3434e-04  5.3316 2.283e-07 ***
ln_income        4.7601e-01  8.3522e-02  5.6992 3.588e-08 ***
ln_per_gdp       1.1319e-01  4.6965e-02  2.4100   0.01672 *  
urban           -7.3138e-03  2.9957e-03 -2.4415   0.01537 *  
gov_rev_real    -6.1567e-05  1.3705e-04 -0.4492   0.65368    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2 缩尾处理(1%和99%分位数)

winsorize <- function(x, lower = 0.01, upper = 0.99) {
  q <- quantile(x, probs = c(lower, upper), na.rm = TRUE)
  x[x < q[1]] <- q[1]
  x[x > q[2]] <- q[2]
  x
}
df_winsor <- df %>%
  mutate(
    consump_real_w   = winsorize(consump_real),
    income_real_w    = winsorize(income_real),
    index_aggregate_w = winsorize(index_aggregate)
  )
# 将 df_winsor 转换为面板数据
pdata_w <- pdata.frame(df_winsor, index = c("city", "year"))

# 估计缩尾后的模型
fe_w <- plm(consump_real_w ~ index_aggregate_w + income_real_w + ln_per_gdp + urban + gov_rev_real,
            data = pdata_w, model = "within")

# 查看模型摘要
summary(fe_w)
Oneway (individual) effect Within Model

Call:
plm(formula = consump_real_w ~ index_aggregate_w + income_real_w + 
    ln_per_gdp + urban + gov_rev_real, data = pdata_w, model = "within")

Balanced Panel: n = 20, T = 13, N = 260

Residuals:
      Min.    1st Qu.     Median    3rd Qu.       Max. 
-2181.1466  -277.8391     2.1383   288.2251  1502.1335 

Coefficients:
                    Estimate Std. Error t-value  Pr(>|t|)    
index_aggregate_w  17.744528   2.047704  8.6656 7.390e-16 ***
income_real_w       0.383155   0.027301 14.0346 < 2.2e-16 ***
ln_per_gdp        432.922105 459.700666  0.9417 0.3472888    
urban             -94.979589  24.568754 -3.8659 0.0001433 ***
gov_rev_real        3.837435   0.845601  4.5381 9.061e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Total Sum of Squares:    2139800000
Residual Sum of Squares: 63646000
R-Squared:      0.97026
Adj. R-Squared: 0.96722
F-statistic: 1533.19 on 5 and 235 DF, p-value: < 2.22e-16

3 滞后一期解释变量(缓解内生性)

#生成滞后项
df_lag <- df %>%
  arrange(city, year) %>%
  group_by(city) %>%
  mutate(
    index_aggregate_lag = lag(index_aggregate),
    income_real_lag = lag(income_real)
  ) %>%
  ungroup() %>%
  filter(!is.na(index_aggregate_lag))   # 删除缺失滞后的第一年

pdata_lag <- pdata.frame(df_lag, index = c("city", "year"))
fe_lag <- plm(consump_real ~ index_aggregate_lag + income_real_lag + ln_per_gdp + urban + gov_rev_real,
              data = pdata_lag, model = "within")
coeftest(fe_lag, vcov = vcovHC(fe_lag, type = "HC1", cluster = "group"))

t test of coefficients:

                      Estimate Std. Error t value  Pr(>|t|)    
index_aggregate_lag  17.933114   3.709177  4.8348 2.409e-06 ***
income_real_lag       0.377216   0.076003  4.9632 1.332e-06 ***
ln_per_gdp          419.887152 628.017243  0.6686  0.504412    
urban               -93.129674  34.635538 -2.6888  0.007683 ** 
gov_rev_real          3.892290   1.592813  2.4437  0.015276 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

将主要回归结果整理为表格

stargazer(fe, fe_log, fe_w, fe_lag, type = "text",
          column.labels = c("基准", "对数形式", "缩尾", "滞后一期"),
          keep.stat = c("n", "rsq"))

=======================================================================
                                    Dependent variable:                
                    ---------------------------------------------------
                    consump_real ln_consump consump_real_w consump_real
                         基准         对数形式          缩尾           滞后一期    
                        (1)         (2)          (3)           (4)     
-----------------------------------------------------------------------
index_aggregate      17.933***    0.002***                             
                      (2.044)     (0.0003)                             
                                                                       
income_real           0.377***                                         
                      (0.027)                                          
                                                                       
ln_income                         0.476***                             
                                  (0.073)                              
                                                                       
index_aggregate_w                             17.745***                
                                               (2.048)                 
                                                                       
income_real_w                                  0.383***                
                                               (0.027)                 
                                                                       
index_aggregate_lag                                         17.933***  
                                                             (2.044)   
                                                                       
income_real_lag                                              0.377***  
                                                             (0.027)   
                                                                       
ln_per_gdp            419.887     0.113***     432.922       419.887   
                     (459.777)    (0.043)     (459.701)     (459.777)  
                                                                       
urban                -93.130***  -0.007***    -94.980***    -93.130*** 
                      (24.515)    (0.002)      (24.569)      (24.515)  
                                                                       
gov_rev_real          3.892***    -0.0001      3.837***      3.892***  
                      (0.845)     (0.0001)     (0.846)       (0.845)   
                                                                       
-----------------------------------------------------------------------
Observations            260         260          260           260     
R2                     0.971       0.967        0.970         0.971    
=======================================================================
Note:                                       *p<0.1; **p<0.05; ***p<0.01

可视化

library(ggplot2)
library(dplyr)
library(tidyr)
library(corrplot)
library(broom)        # 用于提取模型系数
library(modelsummary) # 用于绘制系数图

Attaching package: 'modelsummary'
The following objects are masked from 'package:DescTools':

    Format, Mean, Median, N, SD, Var
library(gridExtra)    # 用于组合多个图形

Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':

    combine
  1. 时间趋势图:各变量均值随年份变化(分地区
# 计算分地区各年份的均值
trend_data <- df %>%
  group_by(region, year) %>%
  summarise(
    consump_mean = mean(consump_real, na.rm = TRUE),
    income_mean = mean(income_real, na.rm = TRUE),
    index_mean = mean(index_aggregate, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_longer(cols = ends_with("_mean"), 
               names_to = "variable", 
               values_to = "value") %>%
  mutate(variable = recode(variable,
                           "consump_mean" = "农村居民实际消费支出",
                           "income_mean" = "农村居民实际可支配收入",
                           "index_mean" = "数字普惠金融总指数"))

2.绘制分面折线图

p_trend <- ggplot(trend_data, aes(x = year, y = value, color = region, group = region)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2) +
  facet_wrap(~ variable, scales = "free_y", ncol = 1) +
  labs(x = "年份", y = "均值", color = "地区",
       title = "关键变量时间趋势(按地区分组)") +
  scale_color_manual(values = c("珠三角" = "#E41A1C", "非珠三角" = "#377EB8")) +
  theme(legend.position = "bottom",
        strip.text = element_text(size = 12, face = "bold"))
print(p_trend)

3.相关性热图

cor_vars <- c("consump_real", "income_real", "index_aggregate", 
              "coverage_breadth", "usage_depth", "digitization_level",
              "ln_per_gdp", "urban", "gov_rev_real")

cor_data <- df[, cor_vars]
cor_matrix <- cor(cor_data, use = "complete.obs")

# 设置中文明细标签
colnames(cor_matrix) <- c("消费", "收入", "总指数", "覆盖广度", 
                           "使用深度", "数字化程度", "人均GDP(对数)", 
                           "城镇化率", "财政收入")
rownames(cor_matrix) <- colnames(cor_matrix)

# 自定义颜色梯度(之前成功使用的配色)
my_colors <- colorRampPalette(c("#053061", "#2166AC", "#4393C3", 
                                 "#92C5DE", "#D1E5F0", "#F7F7F7", 
                                 "#FDDBC7", "#F4A582", "#D6604D", 
                                 "#B2182B", "#67001F"))(200)

# 绘制热图
corrplot(cor_matrix, 
         method = "color", 
         type = "upper", 
         order = "hclust",        # 按聚类排序
         tl.col = "black", 
         tl.srt = 45,             # 旋转标签
         addCoef.col = "black",    # 显示相关系数
         number.cex = 0.8,
         col = my_colors,
         title = "图:变量相关性热图",
         mar = c(0, 0, 2, 0))     # 为标题留出边距

  1. 系数估计图(森林图):展示多个模型的系数对比
# 检查所需的模型对象是否存在
model_names <- c("fe", "fe_cov", "fe_use", "fe_dig", "fe_prd", "fe_nonprd")
missing_models <- model_names[!sapply(model_names, exists)]
if(length(missing_models) > 0) {
  stop("以下模型对象不存在,请先运行对应的回归代码:", 
       paste(missing_models, collapse = ", "))
}

# 准备模型列表和标签
models_list <- list(
  "基准"       = fe,
  "覆盖广度"   = fe_cov,
  "使用深度"   = fe_use,
  "数字化程度" = fe_dig,
  "珠三角"     = fe_prd,
  "非珠三角"   = fe_nonprd
)

# 指定需要绘制的系数及其显示名称
coef_map <- c(
  "index_aggregate"    = "数字普惠金融总指数",
  "coverage_breadth"   = "覆盖广度",
  "usage_depth"        = "使用深度",
  "digitization_level" = "数字化程度"
)

# 绘制系数图(带95%置信区间)
p_coef <- modelplot(models_list, 
                    coef_map = coef_map,
                    conf_level = 0.95) +
  labs(x = "系数估计值", y = "", 
       title = "核心解释变量系数对比(95%置信区间)") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  scale_color_discrete(name = "模型")

print(p_coef)

  1. 系数稳定性图(展示核心变量系数随控制变量加入的变化)
# 逐步回归模型(如果尚未存在,则临时估计)
if(!exists("m1")) m1 <- plm(consump_real ~ index_aggregate, data = pdata, model = "within")
if(!exists("m2")) m2 <- plm(consump_real ~ index_aggregate + income_real, data = pdata, model = "within")
if(!exists("m3")) m3 <- plm(consump_real ~ index_aggregate + income_real + ln_per_gdp, data = pdata, model = "within")
if(!exists("m4")) m4 <- plm(consump_real ~ index_aggregate + income_real + ln_per_gdp + urban, data = pdata, model = "within")
# m5 就是 fe(基准模型)

step_models <- list(m1, m2, m3, m4, fe)
step_names <- c("仅数字金融", "+收入", "+人均GDP", "+城镇化率", "全部控制变量")

# 提取核心变量 index_aggregate 的系数和置信区间
coef_df <- bind_rows(lapply(seq_along(step_models), function(i) {
  tidy(step_models[[i]], conf.int = TRUE) %>%
    filter(term == "index_aggregate") %>%
    mutate(model = step_names[i])
}))

# 绘制系数稳定性图
p_stability <- ggplot(coef_df, aes(x = model, y = estimate, 
                                    ymin = conf.low, ymax = conf.high)) +
  geom_pointrange(size = 1, color = "#E41A1C") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  labs(x = "", y = "数字普惠金融总指数的系数",
       title = "核心解释变量系数稳定性检验") +
  coord_flip() +  # 横向展示
  theme(axis.text.y = element_text(size = 12))

print(p_stability)

grid.arrange(p_coef, p_stability, ncol = 2,
             top = "数字普惠金融对农村消费影响的实证结果")

The echo: false option disables the printing of code (only output is displayed).