# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))
library(tidyverse)
library(lubridate)  # For working with dates
library(scales)
library(ggridges) # For ridge plots
library(gghalves)
library(mapchina)
library(modelsummary)
library(ggrepel)
library(nnet)
trust_raw <- read_csv("data/trust_2023_coded.csv")
trust_data <- trust_raw %>% 
  mutate(term_limit_raw = parse_number(term)) %>% 
  mutate(term_limit = case_when(
    str_detect(term, "月") ~ term_limit_raw / 12,
    TRUE ~ term_limit_raw))  %>% 
  mutate(asset_num = parse_number(asset)*10000, # convert to 1 RMB unit
         asset_size_log = log(asset_num),
         asset_size_1000 = asset_num/1000) %>% 
  mutate(file_year = year(mdy(file_time)),
         Year = as.factor(file_year))  %>% 
  mutate(term_category = case_when(
    term_limit <= 5 ~ "1-5 years",
    term_limit >= 6 ~ "6+ years",
    TRUE ~ "no limit")) %>% 
  mutate(term_category = ordered(term_category, 
                                 levels = c("1-5 years", "6+ years", "no limit")),
         collaboration_type = case_when(
           structure == 0 ~ "loose/no",
           structure == 1 ~ "same role",
           structure == 2 ~ "different roles"),
         collaboration_type = fct_rev(collaboration_type),
         collaborate = ifelse(structure == 0, FALSE, TRUE),
         league = ifelse(str_detect(settlor, "慈善会|慈善协会|慈善总会|慈善联合会") |
                                 str_detect(trustee, "慈善会|慈善协会|慈善总会|慈善联合会"), TRUE, FALSE),
         league_rev = !league,
         charity_league = fct_rev(factor(league)))
#self reference:distribution of trusts with terms of <= 10 years
trust_data %>% 
  filter(!is.na(term_limit) & term_limit <= 10) %>% 
  ggplot() +
  geom_histogram(aes(x = term_limit))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

trust_data %>% 
  count(collaboration_type)
## # A tibble: 4 × 2
##   collaboration_type     n
##   <fct>              <int>
## 1 same role             96
## 2 loose/no             568
## 3 different roles      515
## 4 <NA>                   1

descriptive analysis

table1 <- datasummary((asset_size_1000 * (Mean + SD) + Year + charity_league + league_rev + term_category) ~
                      (collaboration_type + 1) ,
                      data = trust_data, fmt = 0)
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
table1
same role loose/no different roles All
asset_size_1000 Mean 9539 3269 4648 4378
SD 54767 25825 28918 30493
Year 2016 1 13 8 22
2017 6 25 12 43
2018 9 49 29 87
2019 14 79 33 126
2020 11 164 93 268
2021 18 126 101 245
2022 37 112 239 389
charity_league TRUE 54 6 287 347
FALSE 40 473 228 741
league_rev FALSE 54 6 287 347
TRUE 40 473 228 741
term_category 1-5 years 38 215 167 421
6+ years 13 70 80 163
no limit 45 283 268 596

descriptive analysis

#benchmark model
model_logit1 <- glm(collaborate ~ asset_size_log + file_year + term_category + charity_league,
                   data = trust_data, family = "binomial")
summary(model_logit1)
## 
## Call:
## glm(formula = collaborate ~ asset_size_log + file_year + term_category + 
##     charity_league, family = "binomial", data = trust_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9437  -0.8224   0.1597   0.2817   2.0297  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -367.20821  108.47712  -3.385 0.000711 ***
## asset_size_log           0.16850    0.03997   4.215 2.49e-05 ***
## file_year                0.18270    0.05363   3.407 0.000658 ***
## term_category6+ years    1.08246    0.22411   4.830 1.36e-06 ***
## term_categoryno limit   -0.21869    0.17608  -1.242 0.214236    
## charity_leagueFALSE     -4.73626    0.42389 -11.173  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1492.72  on 1087  degrees of freedom
## Residual deviance:  970.98  on 1082  degrees of freedom
##   (92 observations deleted due to missingness)
## AIC: 982.98
## 
## Number of Fisher Scoring iterations: 6

logit regression

#add province fixed effect
model_logit2 <- glm(collaborate ~ asset_size_log + file_year + term_category + charity_league + province,
                   data = trust_data, family = "binomial")
summary(model_logit2)
## 
## Call:
## glm(formula = collaborate ~ asset_size_log + file_year + term_category + 
##     charity_league + province, family = "binomial", data = trust_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0030  -0.7113   0.1487   0.2645   2.4255  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -460.17309  118.59285  -3.880 0.000104 ***
## asset_size_log           0.17179    0.04496   3.821 0.000133 ***
## file_year                0.22771    0.05864   3.883 0.000103 ***
## term_category6+ years    1.23237    0.27505   4.480 7.45e-06 ***
## term_categoryno limit    0.08822    0.21790   0.405 0.685565    
## charity_leagueFALSE     -4.55634    0.44113 -10.329  < 2e-16 ***
## provinceBeijing          1.77734    0.84122   2.113 0.034617 *  
## provinceChongqing        0.78126    1.09395   0.714 0.475127    
## provinceFujian         -15.09876 1373.62413  -0.011 0.991230    
## provinceGansu            1.04803    0.86862   1.207 0.227603    
## provinceGuangdong        1.56956    0.87424   1.795 0.072598 .  
## provinceHainan         -15.74327 3956.18041  -0.004 0.996825    
## provinceHebei            2.60638    1.51357   1.722 0.085068 .  
## provinceHeilongjiang   -16.39681 2281.75282  -0.007 0.994266    
## provinceHenan            1.42694    0.95124   1.500 0.133593    
## provinceHubei            1.46566    1.61267   0.909 0.363436    
## provinceHunan            0.41442    1.38532   0.299 0.764823    
## provinceInnerMogolia   -14.68020 2796.53176  -0.005 0.995812    
## provinceJiangsu         18.68683  776.83894   0.024 0.980809    
## provinceJilin          -15.07440 1592.59094  -0.009 0.992448    
## provinceLiaoning       -14.96041 2268.30013  -0.007 0.994738    
## provinceNanjing          1.95252    0.89357   2.185 0.028883 *  
## provinceQinghai          1.43176    0.89886   1.593 0.111191    
## provinceShaanxi          1.12868    0.87649   1.288 0.197840    
## provinceShandong         3.26043    1.11429   2.926 0.003433 ** 
## provinceShanghai         2.21388    0.91745   2.413 0.015818 *  
## provinceSichuan          1.96041    1.04404   1.878 0.060419 .  
## provinceTianjin          4.17696    0.95805   4.360 1.30e-05 ***
## provinceXinjiang       -15.07959 2283.12193  -0.007 0.994730    
## provinceYunnan           2.47884    1.47953   1.675 0.093851 .  
## provinceZhejiang         1.76986    0.85879   2.061 0.039315 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1492.72  on 1087  degrees of freedom
## Residual deviance:  860.61  on 1057  degrees of freedom
##   (92 observations deleted due to missingness)
## AIC: 922.61
## 
## Number of Fisher Scoring iterations: 16
# use year fixed effect instead of linear trend
model_logit2Y <- glm(collaborate ~ asset_size_log +Year + term_category + charity_league + province,
                   data = trust_data, family = "binomial")
summary(model_logit2Y)
## 
## Call:
## glm(formula = collaborate ~ asset_size_log + Year + term_category + 
##     charity_league + province, family = "binomial", data = trust_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0919  -0.7071   0.1299   0.2944   2.0536  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -0.09878    1.27583  -0.077 0.938284    
## asset_size_log           0.16526    0.04539   3.641 0.000272 ***
## Year2017                 0.01891    0.69298   0.027 0.978225    
## Year2018                -0.06847    0.61208  -0.112 0.910934    
## Year2019                -0.19547    0.59795  -0.327 0.743742    
## Year2020                -0.08794    0.58077  -0.151 0.879651    
## Year2021                 0.26045    0.58318   0.447 0.655156    
## Year2022                 1.00013    0.57379   1.743 0.081328 .  
## term_category6+ years    1.09201    0.27897   3.914 9.06e-05 ***
## term_categoryno limit    0.03734    0.22238   0.168 0.866662    
## charity_leagueFALSE     -4.51572    0.44266 -10.201  < 2e-16 ***
## provinceBeijing          1.40955    0.83876   1.681 0.092857 .  
## provinceChongqing        0.68999    1.09238   0.632 0.527624    
## provinceFujian         -15.18347 1388.44866  -0.011 0.991275    
## provinceGansu            0.99523    0.85915   1.158 0.246705    
## provinceGuangdong        1.33725    0.86672   1.543 0.122860    
## provinceHainan         -16.34561 3956.18043  -0.004 0.996703    
## provinceHebei            2.29923    1.55629   1.477 0.139574    
## provinceHeilongjiang   -16.25833 2278.53078  -0.007 0.994307    
## provinceHenan            1.18224    0.94567   1.250 0.211238    
## provinceHubei            1.55110    1.61332   0.961 0.336336    
## provinceHunan            0.26113    1.39290   0.187 0.851292    
## provinceInnerMogolia   -15.11000 2796.62335  -0.005 0.995689    
## provinceJiangsu         18.52995  782.40203   0.024 0.981105    
## provinceJilin          -15.48232 1557.69938  -0.010 0.992070    
## provinceLiaoning       -15.02753 2279.69477  -0.007 0.994740    
## provinceNanjing          1.68919    0.88653   1.905 0.056727 .  
## provinceQinghai          1.21292    0.89083   1.362 0.173333    
## provinceShaanxi          0.85277    0.87010   0.980 0.327049    
## provinceShandong         3.16567    1.11855   2.830 0.004652 ** 
## provinceShanghai         1.98842    0.90928   2.187 0.028757 *  
## provinceSichuan          1.80442    1.03901   1.737 0.082445 .  
## provinceTianjin          4.02437    0.94759   4.247 2.17e-05 ***
## provinceXinjiang       -15.10362 2278.07814  -0.007 0.994710    
## provinceYunnan           2.34230    1.47194   1.591 0.111541    
## provinceZhejiang         1.54960    0.85022   1.823 0.068366 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1492.72  on 1087  degrees of freedom
## Residual deviance:  850.41  on 1052  degrees of freedom
##   (92 observations deleted due to missingness)
## AIC: 922.41
## 
## Number of Fisher Scoring iterations: 16
trust_data %>% 
  group_by(collaboration_type, Year) %>% 
  summarise(count = n()) %>% 
 ggplot(aes(x=Year, color = collaboration_type, group = collaboration_type, y = count))+
  geom_line(size = 1)+
  geom_text_repel(aes(label = count, x = Year), color = "grey20", size = 3)+
  scale_color_viridis_d(option = "plasma", begin = 0.1, end = 0.8) +
  guides(color=guide_legend(title="Charities' Roles"),
         label = "none") +
  labs(x = NULL, y = NULL) +
  theme_minimal(base_size = 10) +
  theme(legend.position="bottom")

models_table <- modelsummary(list("Model 1" = model_logit1, "Model 2" = model_logit2, 
                                    "Model 3" = model_logit2Y),
                               coef_omit = "province", 
                               statistic = "({p.value}) {stars}")
models_table
Model 1 Model 2 Model 3
(Intercept) −367.208*** −460.173*** −0.099
(0.001) *** (0.000) *** (0.938)
asset_size_log 0.168*** 0.172*** 0.165***
(0.000) *** (0.000) *** (0.000) ***
file_year 0.183*** 0.228***
(0.001) *** (0.000) ***
term_category6+ years 1.082*** 1.232*** 1.092***
(0.000) *** (0.000) *** (0.000) ***
term_categoryno limit −0.219 0.088 0.037
(0.214) (0.686) (0.867)
charity_leagueFALSE −4.736*** −4.556*** −4.516***
(0.000) *** (0.000) *** (0.000) ***
Year2017 0.019
(0.978)
Year2018 −0.068
(0.911)
Year2019 −0.195
(0.744)
Year2020 −0.088
(0.880)
Year2021 0.260
(0.655)
Year2022 1.000+
(0.081) +
Num.Obs. 1088 1088 1088
AIC 983.0 922.6 922.4
BIC 1012.9 1077.4 1102.1
Log.Lik. −485.491 −430.303 −425.205
F 33.987 6.996 6.108
RMSE 0.39 0.36 0.36