Quarto
Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org .
Running Code
When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:
library (tidyverse) # 数据处理与绘图
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.0 ✔ readr 2.2.0
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.2 ✔ tibble 3.3.1
✔ lubridate 1.9.5 ✔ tidyr 1.3.2
✔ purrr 1.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (readxl) # 读取Excel文件
library (plm) # 面板数据模型
Attaching package: 'plm'
The following objects are masked from 'package:dplyr':
between, lag, lead
library (lmtest) # 异方差稳健标准误
Loading required package: zoo
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
library (sandwich) # 聚类稳健标准误
library (stargazer) # 输出回归表格
Please cite as:
Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library (DescTools) # 缩尾处理
library (dplyr)
You can add options to executable code like this
income_raw <- read_excel ("C:/Users/az214/Desktop/论文数据/核心控制变量:农村居民人均可支配收入.xlsx" , sheet = "Sheet1" )
income <- income_raw %>%
pivot_longer (cols = - 1 , names_to = "year" , values_to = "income_nominal" ) %>%
rename (city = 1 ) %>%
mutate (year = as.numeric (year))
index_raw <- read_excel ("C:/Users/az214/Desktop/论文数据/核心解释变量:广东省数字普惠金融总指数.xlsx" , sheet = "Sheet1" )
index <- index_raw %>%
select (year, city_name, index_aggregate, coverage_breadth, usage_depth, digitization_level) %>%
rename (city = city_name) %>%
mutate (year = as.numeric (year))
gdp_raw <- read_excel ("C:/Users/az214/Desktop/论文数据/其他控制变量4:人均GDP_的对数.xlsx" , sheet = "Sheet1" )
gdp <- gdp_raw %>%
select (city, year, ln_per_gdp) %>%
mutate (year = as.numeric (year))
urban_raw <- read_excel ("C:/Users/az214/Desktop/论文数据/其他控制变量2:城镇化率.xlsx" , sheet = "Sheet1" )
urban <- urban_raw %>%
pivot_longer (cols = - 1 , names_to = "year" , values_to = "urban" ) %>%
rename (city = 1 ) %>%
mutate (year = as.numeric (year))
fiscal_raw <- read_excel ("C:/Users/az214/Desktop/论文数据/其他控制变量3:地方一般公共预算收入.xlsx" , sheet = "Sheet1" , skip = 2 , col_names = FALSE )
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
# 将第一行作为列名
new_names <- as.character (fiscal_raw[1 , ])
new_names[1 ] <- "city" # 第一个单元格空,设为"city"
colnames (fiscal_raw) <- new_names
# 删除第一行(原年份行),并只保留城市和2011-2023列(去掉可能的空列)
fiscal <- fiscal_raw[- 1 , ] %>%
select (city, ` 2011 ` : ` 2023 ` ) %>%
pivot_longer (cols = - city, names_to = "year" , values_to = "gov_rev" ) %>%
mutate (
year = as.numeric (year),
gov_rev = as.numeric (gov_rev)
)
cpi_raw <- read_excel ("C:/Users/az214/Desktop/论文数据/价格平减数据:CPI 居民消费价格指数 6.xlsx" , sheet = "Sheet1" , skip = 5 )
New names:
• `` -> `...1`
• `` -> `...2`
colnames (cpi_raw) <- c ("year" , "cpi_total" , "cpi_urban" , "cpi_rural" )
cpi <- cpi_raw %>%
filter (! is.na (year)) %>% # 去除空行
mutate (across (- year, as.numeric))
cpi <- cpi %>%
arrange (year) %>%
mutate (
# 将环比指数转为定基指数 (2011=100)
cpi_rural_base = 100 * cumprod (cpi_rural / 100 )
)
select (cpi, year, cpi_rural, cpi_rural_base)
# A tibble: 13 × 3
year cpi_rural cpi_rural_base
<chr> <dbl> <dbl>
1 2011 106. 106.
2 2012 103. 109.
3 2013 103. 112.
4 2014 102. 114.
5 2015 101. 115.
6 2016 102 118.
7 2017 101. 119.
8 2018 102. 121.
9 2019 105. 126.
10 2020 103 130.
11 2021 100. 130.
12 2022 102. 133.
13 2023 99.6 133.
#合并所有数据为面板
df <- consump %>%
left_join (income, by = c ("city" , "year" )) %>%
left_join (index, by = c ("city" , "year" )) %>%
left_join (gdp, by = c ("city" , "year" )) %>%
left_join (urban, by = c ("city" , "year" )) %>%
left_join (fiscal, by = c ("city" , "year" ))
cpi <- cpi %>%
mutate (year = as.numeric (year))
df <- df %>%
left_join (select (cpi, year, cpi_rural_base), by = "year" )
# 对名义变量进行平减(实际值 = 名义值 / 定基CPI * 100)
df <- df %>%
mutate (
consump_real = consump_nominal / cpi_rural_base * 100 ,
income_real = income_nominal / cpi_rural_base * 100 ,
gov_rev_real = gov_rev / cpi_rural_base * 100 # 财政收入也平减
)
# 检查缺失值
summary (df)
city year consump_nominal income_nominal
Length:260 Min. :2011 Min. : 5189 Min. : 6775
Class :character 1st Qu.:2014 1st Qu.: 9628 1st Qu.:12199
Mode :character Median :2017 Median :13059 Median :16870
Mean :2017 Mean :13932 Mean :18455
3rd Qu.:2020 3rd Qu.:16865 3rd Qu.:22174
Max. :2023 Max. :32111 Max. :46865
index_aggregate coverage_breadth usage_depth digitization_level
Min. : 41.63 Min. : 22.51 Min. : 56.11 Min. : 44.63
1st Qu.:147.15 1st Qu.:146.47 1st Qu.:141.26 1st Qu.:174.82
Median :221.79 Median :210.30 Median :241.62 Median :261.12
Mean :211.25 Mean :206.19 Mean :208.25 Mean :233.40
3rd Qu.:281.60 3rd Qu.:280.48 3rd Qu.:271.33 3rd Qu.:295.23
Max. :345.47 Max. :374.82 Max. :332.49 Max. :342.37
ln_per_gdp urban gov_rev cpi_rural_base
Min. : 9.716 Min. :35.92 Min. : 27.27 Min. :105.6
1st Qu.:10.425 1st Qu.:47.98 1st Qu.: 72.55 1st Qu.:113.9
Median :10.747 Median :55.73 Median : 114.31 Median :118.7
Mean :10.825 Mean :62.35 Mean : 249.31 Mean :120.5
3rd Qu.:11.191 3rd Qu.:76.01 3rd Qu.: 284.44 3rd Qu.:130.3
Max. :12.042 Max. :95.39 Max. :1945.06 Max. :133.4
consump_real income_real gov_rev_real
Min. : 4914 Min. : 6415 Min. : 24.98
1st Qu.: 8462 1st Qu.:10568 1st Qu.: 59.14
Median :10786 Median :13720 Median : 95.75
Mean :11362 Mean :15052 Mean : 204.73
3rd Qu.:13061 3rd Qu.:17398 3rd Qu.: 226.60
Max. :24166 Max. :35269 Max. :1463.78
#描述性统计
# 选择分析所用变量
vars <- c ("consump_real" , "income_real" , "index_aggregate" ,
"coverage_breadth" , "usage_depth" , "digitization_level" ,
"ln_per_gdp" , "urban" , "gov_rev_real" )
desc_df <- df[, vars]
summary (desc_df)
consump_real income_real index_aggregate coverage_breadth
Min. : 4914 Min. : 6415 Min. : 41.63 Min. : 22.51
1st Qu.: 8462 1st Qu.:10568 1st Qu.:147.15 1st Qu.:146.47
Median :10786 Median :13720 Median :221.79 Median :210.30
Mean :11362 Mean :15052 Mean :211.25 Mean :206.19
3rd Qu.:13061 3rd Qu.:17398 3rd Qu.:281.60 3rd Qu.:280.48
Max. :24166 Max. :35269 Max. :345.47 Max. :374.82
usage_depth digitization_level ln_per_gdp urban
Min. : 56.11 Min. : 44.63 Min. : 9.716 Min. :35.92
1st Qu.:141.26 1st Qu.:174.82 1st Qu.:10.425 1st Qu.:47.98
Median :241.62 Median :261.12 Median :10.747 Median :55.73
Mean :208.25 Mean :233.40 Mean :10.825 Mean :62.35
3rd Qu.:271.33 3rd Qu.:295.23 3rd Qu.:11.191 3rd Qu.:76.01
Max. :332.49 Max. :342.37 Max. :12.042 Max. :95.39
gov_rev_real
Min. : 24.98
1st Qu.: 59.14
Median : 95.75
Mean : 204.73
3rd Qu.: 226.60
Max. :1463.78
calc_stats <- function (x) {
c (样本量 = sum (! is.na (x)),
均值 = mean (x, na.rm = TRUE ),
标准差 = sd (x, na.rm = TRUE ),
最小值 = min (x, na.rm = TRUE ),
中位数 = median (x, na.rm = TRUE ),
最大值 = max (x, na.rm = TRUE ))
}
stats_matrix <- sapply (desc_df, calc_stats)
stats_df <- as.data.frame (t (stats_matrix))
knitr:: kable (stats_df, caption = "描述性统计" , digits = 2 )
描述性统计
consump_real
260
11362.32
4212.05
4913.83
10785.56
24165.74
income_real
260
15051.99
6100.59
6415.34
13719.80
35268.62
index_aggregate
260
211.25
79.14
41.63
221.79
345.47
coverage_breadth
260
206.19
85.60
22.51
210.30
374.82
usage_depth
260
208.25
75.08
56.11
241.62
332.49
digitization_level
260
233.40
78.10
44.63
261.12
342.37
ln_per_gdp
260
10.83
0.54
9.72
10.75
12.04
urban
260
62.35
17.97
35.92
55.73
95.39
gov_rev_real
260
204.73
277.14
24.98
95.75
1463.78
#相关性分析
cor_matrix <- cor (desc_df, use = "complete.obs" )
corrplot (cor_matrix, method = "number" , type = "upper" , tl.cex = 0.8 )
pdata <- pdata.frame (df, index = c ("city" , "year" ))
form_base <- consump_real ~ index_aggregate + income_real + ln_per_gdp + urban + gov_rev_real
#模型选择检验
# 混合OLS
pool <- plm (form_base, data = pdata, model = "pooling" )
# 固定效应
fe <- plm (form_base, data = pdata, model = "within" )
# 随机效应
re <- plm (form_base, data = pdata, model = "random" )
stargazer (pool, fe, re, type = "text" ,
column.labels = c ("混合OLS" , "固定效应" , "随机效应" ),
keep.stat = c ("n" , "rsq" , "adj.rsq" ))
==================================================
Dependent variable:
----------------------------------
consump_real
混合OLS 固定效应 随机效应
(1) (2) (3)
--------------------------------------------------
index_aggregate 12.115*** 17.933*** 15.132***
(1.034) (2.044) (1.526)
income_real 0.532*** 0.377*** 0.437***
(0.018) (0.027) (0.025)
ln_per_gdp -553.854*** 419.887 -189.045
(208.536) (459.777) (362.824)
urban 26.828*** -93.130*** 9.146
(5.517) (24.515) (12.315)
gov_rev_real 0.413 3.892*** 2.241***
(0.251) (0.845) (0.526)
Constant 5,032.098** 2,612.564
(1,947.324) (3,285.383)
--------------------------------------------------
Observations 260 260 260
R2 0.967 0.971 0.965
Adjusted R2 0.967 0.968 0.965
==================================================
Note: *p<0.1; **p<0.05; ***p<0.01
#F检验:比较混合OLS与固定效应(原假设:个体效应不显著)
pFtest (fe, pool) # p<0.05 则拒绝混合OLS,选择固定效应
F test for individual effects
data: form_base
F = 17.014, df1 = 19, df2 = 235, p-value < 2.2e-16
alternative hypothesis: significant effects
#LM检验(Breusch-Pagan):比较混合OLS与随机效应
plmtest (pool, type = "bp" ) # p<0.05 则拒绝混合OLS,选择随机效应
Lagrange Multiplier Test - (Breusch-Pagan)
data: form_base
chisq = 247.23, df = 1, p-value < 2.2e-16
alternative hypothesis: significant effects
#Hausman检验:比较固定效应与随机效应
phtest (fe, re) # p<0.05 则拒绝随机效应,选择固定效应
Hausman Test
data: form_base
chisq = 45.881, df = 5, p-value = 9.603e-09
alternative hypothesis: one model is inconsistent
# 按固定效应模型,聚类稳健标准误(按城市聚类)
fe_robust <- coeftest (fe, vcov = vcovHC (fe, type = "HC1" , cluster = "group" ))
print (fe_robust)
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
index_aggregate 17.933114 3.709177 4.8348 2.409e-06 ***
income_real 0.377216 0.076003 4.9632 1.332e-06 ***
ln_per_gdp 419.887152 628.017243 0.6686 0.504412
urban -93.129674 34.635538 -2.6888 0.007683 **
gov_rev_real 3.892290 1.592813 2.4437 0.015276 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 输出标准回归表
stargazer (fe, type = "text" ,
se = list (fe_robust[, 2 ]),
p = list (fe_robust[, 4 ]),
title = "基准回归结果(固定效应,聚类稳健标准误)" )
基准回归结果(固定效应,聚类稳健标准误)
===========================================
Dependent variable:
---------------------------
consump_real
-------------------------------------------
index_aggregate 17.933***
(3.709)
income_real 0.377***
(0.076)
ln_per_gdp 419.887
(628.017)
urban -93.130***
(34.636)
gov_rev_real 3.892**
(1.593)
-------------------------------------------
Observations 260
R2 0.971
Adjusted R2 0.968
F Statistic 1,548.796*** (df = 5; 235)
===========================================
Note: *p<0.1; **p<0.05; ***p<0.01
异质性分析
1 分维度回归(覆盖广度、使用深度、数字化程度)
# 分别替换核心解释变量
form_cov <- consump_real ~ coverage_breadth + income_real + ln_per_gdp + urban + gov_rev_real
form_use <- consump_real ~ usage_depth + income_real + ln_per_gdp + urban + gov_rev_real
form_dig <- consump_real ~ digitization_level + income_real + ln_per_gdp + urban + gov_rev_real
fe_cov <- plm (form_cov, data = pdata, model = "within" )
fe_use <- plm (form_use, data = pdata, model = "within" )
fe_dig <- plm (form_dig, data = pdata, model = "within" )
# 输出结果
stargazer (fe_cov, fe_use, fe_dig, type = "text" ,
column.labels = c ("覆盖广度" , "使用深度" , "数字化程度" ),
keep.stat = c ("n" , "rsq" ))
========================================================
Dependent variable:
-------------------------------------
consump_real
覆盖广度 使用深度 数字化程度
(1) (2) (3)
--------------------------------------------------------
coverage_breadth 15.915***
(2.481)
usage_depth 8.965***
(1.344)
digitization_level 7.639***
(1.139)
income_real 0.380*** 0.475*** 0.484***
(0.033) (0.022) (0.022)
ln_per_gdp 881.009* 1,827.748*** 2,092.269***
(513.345) (402.999) (380.114)
urban -126.408*** -67.065** -69.264***
(26.807) (26.033) (25.979)
gov_rev_real 4.829*** 4.403*** 3.967***
(0.884) (0.887) (0.900)
--------------------------------------------------------
Observations 260 260 260
R2 0.967 0.967 0.967
========================================================
Note: *p<0.1; **p<0.05; ***p<0.01
2 分地区回归(珠三角 vs 非珠三角)
珠三角城市(基于消费数据中的城市):广州、珠海、佛山、惠州、东莞、中山、江门、肇庆(共8个)。
# 定义珠三角城市
prd_cities <- c ("广州市" , "珠海市" , "佛山市" , "惠州市" , "东莞市" , "中山市" , "江门市" , "肇庆市" )
df <- df %>%
mutate (region = ifelse (city %in% prd_cities, "珠三角" , "非珠三角" ))
# 分别回归
pdata_prd <- pdata.frame (df[df$ region == "珠三角" , ], index = c ("city" , "year" ))
pdata_nonprd <- pdata.frame (df[df$ region == "非珠三角" , ], index = c ("city" , "year" ))
fe_prd <- plm (form_base, data = pdata_prd, model = "within" )
fe_nonprd <- plm (form_base, data = pdata_nonprd, model = "within" )
# 输出结果
stargazer (fe_prd, fe_nonprd, type = "text" ,
column.labels = c ("珠三角" , "非珠三角" ),
keep.stat = c ("n" , "rsq" ))
============================================
Dependent variable:
----------------------------
consump_real
珠三角 非珠三角
(1) (2)
--------------------------------------------
index_aggregate 18.183*** 15.731***
(3.855) (2.231)
income_real 0.378*** 0.430***
(0.043) (0.061)
ln_per_gdp 485.060 233.583
(1,016.062) (527.118)
urban -178.380*** -65.347***
(62.192) (23.845)
gov_rev_real 4.287*** 9.812**
(1.394) (3.872)
--------------------------------------------
Observations 104 156
R2 0.965 0.981
============================================
Note: *p<0.1; **p<0.05; ***p<0.01
稳健性检验
1 替换变量:使用对数形式的消费和收入
pdata$ ln_consump <- log (pdata$ consump_real)
pdata$ ln_income <- log (pdata$ income_real)
form_log <- ln_consump ~ index_aggregate + ln_income + ln_per_gdp + urban + gov_rev_real
fe_log <- plm (form_log, data = pdata, model = "within" )
coeftest (fe_log, vcov = vcovHC (fe_log, type = "HC1" , cluster = "group" ))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
index_aggregate 1.7826e-03 3.3434e-04 5.3316 2.283e-07 ***
ln_income 4.7601e-01 8.3522e-02 5.6992 3.588e-08 ***
ln_per_gdp 1.1319e-01 4.6965e-02 2.4100 0.01672 *
urban -7.3138e-03 2.9957e-03 -2.4415 0.01537 *
gov_rev_real -6.1567e-05 1.3705e-04 -0.4492 0.65368
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
2 缩尾处理(1%和99%分位数)
winsorize <- function (x, lower = 0.01 , upper = 0.99 ) {
q <- quantile (x, probs = c (lower, upper), na.rm = TRUE )
x[x < q[1 ]] <- q[1 ]
x[x > q[2 ]] <- q[2 ]
x
}
df_winsor <- df %>%
mutate (
consump_real_w = winsorize (consump_real),
income_real_w = winsorize (income_real),
index_aggregate_w = winsorize (index_aggregate)
)
# 将 df_winsor 转换为面板数据
pdata_w <- pdata.frame (df_winsor, index = c ("city" , "year" ))
# 估计缩尾后的模型
fe_w <- plm (consump_real_w ~ index_aggregate_w + income_real_w + ln_per_gdp + urban + gov_rev_real,
data = pdata_w, model = "within" )
# 查看模型摘要
summary (fe_w)
Oneway (individual) effect Within Model
Call:
plm(formula = consump_real_w ~ index_aggregate_w + income_real_w +
ln_per_gdp + urban + gov_rev_real, data = pdata_w, model = "within")
Balanced Panel: n = 20, T = 13, N = 260
Residuals:
Min. 1st Qu. Median 3rd Qu. Max.
-2181.1466 -277.8391 2.1383 288.2251 1502.1335
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
index_aggregate_w 17.744528 2.047704 8.6656 7.390e-16 ***
income_real_w 0.383155 0.027301 14.0346 < 2.2e-16 ***
ln_per_gdp 432.922105 459.700666 0.9417 0.3472888
urban -94.979589 24.568754 -3.8659 0.0001433 ***
gov_rev_real 3.837435 0.845601 4.5381 9.061e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Total Sum of Squares: 2139800000
Residual Sum of Squares: 63646000
R-Squared: 0.97026
Adj. R-Squared: 0.96722
F-statistic: 1533.19 on 5 and 235 DF, p-value: < 2.22e-16
3 滞后一期解释变量(缓解内生性)
#生成滞后项
df_lag <- df %>%
arrange (city, year) %>%
group_by (city) %>%
mutate (
index_aggregate_lag = lag (index_aggregate),
income_real_lag = lag (income_real)
) %>%
ungroup () %>%
filter (! is.na (index_aggregate_lag)) # 删除缺失滞后的第一年
pdata_lag <- pdata.frame (df_lag, index = c ("city" , "year" ))
fe_lag <- plm (consump_real ~ index_aggregate_lag + income_real_lag + ln_per_gdp + urban + gov_rev_real,
data = pdata_lag, model = "within" )
coeftest (fe_lag, vcov = vcovHC (fe_lag, type = "HC1" , cluster = "group" ))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
index_aggregate_lag 17.933114 3.709177 4.8348 2.409e-06 ***
income_real_lag 0.377216 0.076003 4.9632 1.332e-06 ***
ln_per_gdp 419.887152 628.017243 0.6686 0.504412
urban -93.129674 34.635538 -2.6888 0.007683 **
gov_rev_real 3.892290 1.592813 2.4437 0.015276 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
将主要回归结果整理为表格
stargazer (fe, fe_log, fe_w, fe_lag, type = "text" ,
column.labels = c ("基准" , "对数形式" , "缩尾" , "滞后一期" ),
keep.stat = c ("n" , "rsq" ))
=======================================================================
Dependent variable:
---------------------------------------------------
consump_real ln_consump consump_real_w consump_real
基准 对数形式 缩尾 滞后一期
(1) (2) (3) (4)
-----------------------------------------------------------------------
index_aggregate 17.933*** 0.002***
(2.044) (0.0003)
income_real 0.377***
(0.027)
ln_income 0.476***
(0.073)
index_aggregate_w 17.745***
(2.048)
income_real_w 0.383***
(0.027)
index_aggregate_lag 17.933***
(2.044)
income_real_lag 0.377***
(0.027)
ln_per_gdp 419.887 0.113*** 432.922 419.887
(459.777) (0.043) (459.701) (459.777)
urban -93.130*** -0.007*** -94.980*** -93.130***
(24.515) (0.002) (24.569) (24.515)
gov_rev_real 3.892*** -0.0001 3.837*** 3.892***
(0.845) (0.0001) (0.846) (0.845)
-----------------------------------------------------------------------
Observations 260 260 260 260
R2 0.971 0.967 0.970 0.971
=======================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
可视化
library (ggplot2)
library (dplyr)
library (tidyr)
library (corrplot)
library (broom) # 用于提取模型系数
library (modelsummary) # 用于绘制系数图
Attaching package: 'modelsummary'
The following objects are masked from 'package:DescTools':
Format, Mean, Median, N, SD, Var
library (gridExtra) # 用于组合多个图形
Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':
combine
时间趋势图:各变量均值随年份变化(分地区
# 计算分地区各年份的均值
trend_data <- df %>%
group_by (region, year) %>%
summarise (
consump_mean = mean (consump_real, na.rm = TRUE ),
income_mean = mean (income_real, na.rm = TRUE ),
index_mean = mean (index_aggregate, na.rm = TRUE ),
.groups = "drop"
) %>%
pivot_longer (cols = ends_with ("_mean" ),
names_to = "variable" ,
values_to = "value" ) %>%
mutate (variable = recode (variable,
"consump_mean" = "农村居民实际消费支出" ,
"income_mean" = "农村居民实际可支配收入" ,
"index_mean" = "数字普惠金融总指数" ))
2.绘制分面折线图
p_trend <- ggplot (trend_data, aes (x = year, y = value, color = region, group = region)) +
geom_line (linewidth = 1.2 ) +
geom_point (size = 2 ) +
facet_wrap (~ variable, scales = "free_y" , ncol = 1 ) +
labs (x = "年份" , y = "均值" , color = "地区" ,
title = "关键变量时间趋势(按地区分组)" ) +
scale_color_manual (values = c ("珠三角" = "#E41A1C" , "非珠三角" = "#377EB8" )) +
theme (legend.position = "bottom" ,
strip.text = element_text (size = 12 , face = "bold" ))
print (p_trend)
3.相关性热图
cor_vars <- c ("consump_real" , "income_real" , "index_aggregate" ,
"coverage_breadth" , "usage_depth" , "digitization_level" ,
"ln_per_gdp" , "urban" , "gov_rev_real" )
cor_data <- df[, cor_vars]
cor_matrix <- cor (cor_data, use = "complete.obs" )
# 设置中文明细标签
colnames (cor_matrix) <- c ("消费" , "收入" , "总指数" , "覆盖广度" ,
"使用深度" , "数字化程度" , "人均GDP(对数)" ,
"城镇化率" , "财政收入" )
rownames (cor_matrix) <- colnames (cor_matrix)
# 自定义颜色梯度(之前成功使用的配色)
my_colors <- colorRampPalette (c ("#053061" , "#2166AC" , "#4393C3" ,
"#92C5DE" , "#D1E5F0" , "#F7F7F7" ,
"#FDDBC7" , "#F4A582" , "#D6604D" ,
"#B2182B" , "#67001F" ))(200 )
# 绘制热图
corrplot (cor_matrix,
method = "color" ,
type = "upper" ,
order = "hclust" , # 按聚类排序
tl.col = "black" ,
tl.srt = 45 , # 旋转标签
addCoef.col = "black" , # 显示相关系数
number.cex = 0.8 ,
col = my_colors,
title = "图:变量相关性热图" ,
mar = c (0 , 0 , 2 , 0 )) # 为标题留出边距
系数估计图(森林图):展示多个模型的系数对比
# 检查所需的模型对象是否存在
model_names <- c ("fe" , "fe_cov" , "fe_use" , "fe_dig" , "fe_prd" , "fe_nonprd" )
missing_models <- model_names[! sapply (model_names, exists)]
if (length (missing_models) > 0 ) {
stop ("以下模型对象不存在,请先运行对应的回归代码:" ,
paste (missing_models, collapse = ", " ))
}
# 准备模型列表和标签
models_list <- list (
"基准" = fe,
"覆盖广度" = fe_cov,
"使用深度" = fe_use,
"数字化程度" = fe_dig,
"珠三角" = fe_prd,
"非珠三角" = fe_nonprd
)
# 指定需要绘制的系数及其显示名称
coef_map <- c (
"index_aggregate" = "数字普惠金融总指数" ,
"coverage_breadth" = "覆盖广度" ,
"usage_depth" = "使用深度" ,
"digitization_level" = "数字化程度"
)
# 绘制系数图(带95%置信区间)
p_coef <- modelplot (models_list,
coef_map = coef_map,
conf_level = 0.95 ) +
labs (x = "系数估计值" , y = "" ,
title = "核心解释变量系数对比(95%置信区间)" ) +
geom_vline (xintercept = 0 , linetype = "dashed" , color = "gray50" ) +
theme (legend.position = "bottom" ,
legend.title = element_blank ()) +
scale_color_discrete (name = "模型" )
print (p_coef)
系数稳定性图(展示核心变量系数随控制变量加入的变化)
# 逐步回归模型(如果尚未存在,则临时估计)
if (! exists ("m1" )) m1 <- plm (consump_real ~ index_aggregate, data = pdata, model = "within" )
if (! exists ("m2" )) m2 <- plm (consump_real ~ index_aggregate + income_real, data = pdata, model = "within" )
if (! exists ("m3" )) m3 <- plm (consump_real ~ index_aggregate + income_real + ln_per_gdp, data = pdata, model = "within" )
if (! exists ("m4" )) m4 <- plm (consump_real ~ index_aggregate + income_real + ln_per_gdp + urban, data = pdata, model = "within" )
# m5 就是 fe(基准模型)
step_models <- list (m1, m2, m3, m4, fe)
step_names <- c ("仅数字金融" , "+收入" , "+人均GDP" , "+城镇化率" , "全部控制变量" )
# 提取核心变量 index_aggregate 的系数和置信区间
coef_df <- bind_rows (lapply (seq_along (step_models), function (i) {
tidy (step_models[[i]], conf.int = TRUE ) %>%
filter (term == "index_aggregate" ) %>%
mutate (model = step_names[i])
}))
# 绘制系数稳定性图
p_stability <- ggplot (coef_df, aes (x = model, y = estimate,
ymin = conf.low, ymax = conf.high)) +
geom_pointrange (size = 1 , color = "#E41A1C" ) +
geom_hline (yintercept = 0 , linetype = "dashed" , color = "gray50" ) +
labs (x = "" , y = "数字普惠金融总指数的系数" ,
title = "核心解释变量系数稳定性检验" ) +
coord_flip () + # 横向展示
theme (axis.text.y = element_text (size = 12 ))
print (p_stability)
grid.arrange (p_coef, p_stability, ncol = 2 ,
top = "数字普惠金融对农村消费影响的实证结果" )
The echo: false option disables the printing of code (only output is displayed).