# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))
library(tidyverse)
library(lubridate) # For working with dates
library(scales)
library(ggridges) # For ridge plots
library(gghalves)
library(mapchina)
library(modelsummary)
library(ggrepel)
library(nnet)
trust_raw <- read_csv("data/trust_2023_coded.csv")
trust_data <- trust_raw %>%
mutate(term_limit_raw = parse_number(term)) %>%
mutate(term_limit = case_when(
str_detect(term, "月") ~ term_limit_raw / 12,
TRUE ~ term_limit_raw)) %>%
mutate(asset_num = parse_number(asset)*10000, # convert to 1 RMB unit
asset_size_log = log(asset_num),
asset_size_1000 = asset_num/1000) %>%
mutate(file_year = year(mdy(file_time)),
Year = as.factor(file_year)) %>%
mutate(term_category = case_when(
term_limit <= 5 ~ "1-5 years",
term_limit >= 6 ~ "6+ years",
TRUE ~ "no limit")) %>%
mutate(term_category = ordered(term_category,
levels = c("1-5 years", "6+ years", "no limit")),
collaboration_type = case_when(
structure == 0 ~ "loose/no",
structure == 1 ~ "same role",
structure == 2 ~ "different roles"),
collaboration_type = fct_rev(collaboration_type),
collaborate = ifelse(structure == 0, FALSE, TRUE),
league = ifelse(str_detect(settlor, "慈善会|慈善协会|慈善总会|慈善联合会") |
str_detect(trustee, "慈善会|慈善协会|慈善总会|慈善联合会"), TRUE, FALSE),
league_rev = !league,
charity_league = fct_rev(factor(league)))
#self reference:distribution of trusts with terms of <= 10 years
trust_data %>%
filter(!is.na(term_limit) & term_limit <= 10) %>%
ggplot() +
geom_histogram(aes(x = term_limit))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

trust_data %>%
count(collaboration_type)
## # A tibble: 4 × 2
## collaboration_type n
## <fct> <int>
## 1 same role 96
## 2 loose/no 568
## 3 different roles 515
## 4 <NA> 1
descriptive analysis
table1 <- datasummary((asset_size_1000 * (Mean + SD) + Year + charity_league + league_rev + term_category) ~
(collaboration_type + 1) ,
data = trust_data, fmt = 0)
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
table1
|
|
|
same role
|
loose/no
|
different roles
|
All
|
|
asset_size_1000
|
Mean
|
9539
|
3269
|
4648
|
4378
|
|
|
SD
|
54767
|
25825
|
28918
|
30493
|
|
Year
|
2016
|
1
|
13
|
8
|
22
|
|
|
2017
|
6
|
25
|
12
|
43
|
|
|
2018
|
9
|
49
|
29
|
87
|
|
|
2019
|
14
|
79
|
33
|
126
|
|
|
2020
|
11
|
164
|
93
|
268
|
|
|
2021
|
18
|
126
|
101
|
245
|
|
|
2022
|
37
|
112
|
239
|
389
|
|
charity_league
|
TRUE
|
54
|
6
|
287
|
347
|
|
|
FALSE
|
40
|
473
|
228
|
741
|
|
league_rev
|
FALSE
|
54
|
6
|
287
|
347
|
|
|
TRUE
|
40
|
473
|
228
|
741
|
|
term_category
|
1-5 years
|
38
|
215
|
167
|
421
|
|
|
6+ years
|
13
|
70
|
80
|
163
|
|
|
no limit
|
45
|
283
|
268
|
596
|
descriptive analysis
#benchmark model
model_logit1 <- glm(collaborate ~ asset_size_log + file_year + term_category + charity_league,
data = trust_data, family = "binomial")
summary(model_logit1)
##
## Call:
## glm(formula = collaborate ~ asset_size_log + file_year + term_category +
## charity_league, family = "binomial", data = trust_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.9437 -0.8224 0.1597 0.2817 2.0297
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -367.20821 108.47712 -3.385 0.000711 ***
## asset_size_log 0.16850 0.03997 4.215 2.49e-05 ***
## file_year 0.18270 0.05363 3.407 0.000658 ***
## term_category6+ years 1.08246 0.22411 4.830 1.36e-06 ***
## term_categoryno limit -0.21869 0.17608 -1.242 0.214236
## charity_leagueFALSE -4.73626 0.42389 -11.173 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1492.72 on 1087 degrees of freedom
## Residual deviance: 970.98 on 1082 degrees of freedom
## (92 observations deleted due to missingness)
## AIC: 982.98
##
## Number of Fisher Scoring iterations: 6
logit regression
#add province fixed effect
model_logit2 <- glm(collaborate ~ asset_size_log + file_year + term_category + charity_league + province,
data = trust_data, family = "binomial")
summary(model_logit2)
##
## Call:
## glm(formula = collaborate ~ asset_size_log + file_year + term_category +
## charity_league + province, family = "binomial", data = trust_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0030 -0.7113 0.1487 0.2645 2.4255
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -460.17309 118.59285 -3.880 0.000104 ***
## asset_size_log 0.17179 0.04496 3.821 0.000133 ***
## file_year 0.22771 0.05864 3.883 0.000103 ***
## term_category6+ years 1.23237 0.27505 4.480 7.45e-06 ***
## term_categoryno limit 0.08822 0.21790 0.405 0.685565
## charity_leagueFALSE -4.55634 0.44113 -10.329 < 2e-16 ***
## provinceBeijing 1.77734 0.84122 2.113 0.034617 *
## provinceChongqing 0.78126 1.09395 0.714 0.475127
## provinceFujian -15.09876 1373.62413 -0.011 0.991230
## provinceGansu 1.04803 0.86862 1.207 0.227603
## provinceGuangdong 1.56956 0.87424 1.795 0.072598 .
## provinceHainan -15.74327 3956.18041 -0.004 0.996825
## provinceHebei 2.60638 1.51357 1.722 0.085068 .
## provinceHeilongjiang -16.39681 2281.75282 -0.007 0.994266
## provinceHenan 1.42694 0.95124 1.500 0.133593
## provinceHubei 1.46566 1.61267 0.909 0.363436
## provinceHunan 0.41442 1.38532 0.299 0.764823
## provinceInnerMogolia -14.68020 2796.53176 -0.005 0.995812
## provinceJiangsu 18.68683 776.83894 0.024 0.980809
## provinceJilin -15.07440 1592.59094 -0.009 0.992448
## provinceLiaoning -14.96041 2268.30013 -0.007 0.994738
## provinceNanjing 1.95252 0.89357 2.185 0.028883 *
## provinceQinghai 1.43176 0.89886 1.593 0.111191
## provinceShaanxi 1.12868 0.87649 1.288 0.197840
## provinceShandong 3.26043 1.11429 2.926 0.003433 **
## provinceShanghai 2.21388 0.91745 2.413 0.015818 *
## provinceSichuan 1.96041 1.04404 1.878 0.060419 .
## provinceTianjin 4.17696 0.95805 4.360 1.30e-05 ***
## provinceXinjiang -15.07959 2283.12193 -0.007 0.994730
## provinceYunnan 2.47884 1.47953 1.675 0.093851 .
## provinceZhejiang 1.76986 0.85879 2.061 0.039315 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1492.72 on 1087 degrees of freedom
## Residual deviance: 860.61 on 1057 degrees of freedom
## (92 observations deleted due to missingness)
## AIC: 922.61
##
## Number of Fisher Scoring iterations: 16
# use year fixed effect instead of linear trend
model_logit2Y <- glm(collaborate ~ asset_size_log +Year + term_category + charity_league + province,
data = trust_data, family = "binomial")
summary(model_logit2Y)
##
## Call:
## glm(formula = collaborate ~ asset_size_log + Year + term_category +
## charity_league + province, family = "binomial", data = trust_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0919 -0.7071 0.1299 0.2944 2.0536
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.09878 1.27583 -0.077 0.938284
## asset_size_log 0.16526 0.04539 3.641 0.000272 ***
## Year2017 0.01891 0.69298 0.027 0.978225
## Year2018 -0.06847 0.61208 -0.112 0.910934
## Year2019 -0.19547 0.59795 -0.327 0.743742
## Year2020 -0.08794 0.58077 -0.151 0.879651
## Year2021 0.26045 0.58318 0.447 0.655156
## Year2022 1.00013 0.57379 1.743 0.081328 .
## term_category6+ years 1.09201 0.27897 3.914 9.06e-05 ***
## term_categoryno limit 0.03734 0.22238 0.168 0.866662
## charity_leagueFALSE -4.51572 0.44266 -10.201 < 2e-16 ***
## provinceBeijing 1.40955 0.83876 1.681 0.092857 .
## provinceChongqing 0.68999 1.09238 0.632 0.527624
## provinceFujian -15.18347 1388.44866 -0.011 0.991275
## provinceGansu 0.99523 0.85915 1.158 0.246705
## provinceGuangdong 1.33725 0.86672 1.543 0.122860
## provinceHainan -16.34561 3956.18043 -0.004 0.996703
## provinceHebei 2.29923 1.55629 1.477 0.139574
## provinceHeilongjiang -16.25833 2278.53078 -0.007 0.994307
## provinceHenan 1.18224 0.94567 1.250 0.211238
## provinceHubei 1.55110 1.61332 0.961 0.336336
## provinceHunan 0.26113 1.39290 0.187 0.851292
## provinceInnerMogolia -15.11000 2796.62335 -0.005 0.995689
## provinceJiangsu 18.52995 782.40203 0.024 0.981105
## provinceJilin -15.48232 1557.69938 -0.010 0.992070
## provinceLiaoning -15.02753 2279.69477 -0.007 0.994740
## provinceNanjing 1.68919 0.88653 1.905 0.056727 .
## provinceQinghai 1.21292 0.89083 1.362 0.173333
## provinceShaanxi 0.85277 0.87010 0.980 0.327049
## provinceShandong 3.16567 1.11855 2.830 0.004652 **
## provinceShanghai 1.98842 0.90928 2.187 0.028757 *
## provinceSichuan 1.80442 1.03901 1.737 0.082445 .
## provinceTianjin 4.02437 0.94759 4.247 2.17e-05 ***
## provinceXinjiang -15.10362 2278.07814 -0.007 0.994710
## provinceYunnan 2.34230 1.47194 1.591 0.111541
## provinceZhejiang 1.54960 0.85022 1.823 0.068366 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1492.72 on 1087 degrees of freedom
## Residual deviance: 850.41 on 1052 degrees of freedom
## (92 observations deleted due to missingness)
## AIC: 922.41
##
## Number of Fisher Scoring iterations: 16
trust_data %>%
group_by(collaboration_type, Year) %>%
summarise(count = n()) %>%
ggplot(aes(x=Year, color = collaboration_type, group = collaboration_type, y = count))+
geom_line(size = 1)+
geom_text_repel(aes(label = count, x = Year), color = "grey20", size = 3)+
scale_color_viridis_d(option = "plasma", begin = 0.1, end = 0.8) +
guides(color=guide_legend(title="Charities' Roles"),
label = "none") +
labs(x = NULL, y = NULL) +
theme_minimal(base_size = 10) +
theme(legend.position="bottom")

models_table <- modelsummary(list("Model 1" = model_logit1, "Model 2" = model_logit2,
"Model 3" = model_logit2Y),
coef_omit = "province",
statistic = "({p.value}) {stars}")
models_table
|
|
Model 1
|
Model 2
|
Model 3
|
|
(Intercept)
|
−367.208***
|
−460.173***
|
−0.099
|
|
|
(0.001) ***
|
(0.000) ***
|
(0.938)
|
|
asset_size_log
|
0.168***
|
0.172***
|
0.165***
|
|
|
(0.000) ***
|
(0.000) ***
|
(0.000) ***
|
|
file_year
|
0.183***
|
0.228***
|
|
|
|
(0.001) ***
|
(0.000) ***
|
|
|
term_category6+ years
|
1.082***
|
1.232***
|
1.092***
|
|
|
(0.000) ***
|
(0.000) ***
|
(0.000) ***
|
|
term_categoryno limit
|
−0.219
|
0.088
|
0.037
|
|
|
(0.214)
|
(0.686)
|
(0.867)
|
|
charity_leagueFALSE
|
−4.736***
|
−4.556***
|
−4.516***
|
|
|
(0.000) ***
|
(0.000) ***
|
(0.000) ***
|
|
Year2017
|
|
|
0.019
|
|
|
|
|
(0.978)
|
|
Year2018
|
|
|
−0.068
|
|
|
|
|
(0.911)
|
|
Year2019
|
|
|
−0.195
|
|
|
|
|
(0.744)
|
|
Year2020
|
|
|
−0.088
|
|
|
|
|
(0.880)
|
|
Year2021
|
|
|
0.260
|
|
|
|
|
(0.655)
|
|
Year2022
|
|
|
1.000+
|
|
|
|
|
(0.081) +
|
|
Num.Obs.
|
1088
|
1088
|
1088
|
|
AIC
|
983.0
|
922.6
|
922.4
|
|
BIC
|
1012.9
|
1077.4
|
1102.1
|
|
Log.Lik.
|
−485.491
|
−430.303
|
−425.205
|
|
F
|
33.987
|
6.996
|
6.108
|
|
RMSE
|
0.39
|
0.36
|
0.36
|