Introduction

The current version re-run the codes of the data analysis.

# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))

Load packages

library(tidyverse) # package for data cleaning and plotting
library(readxl)
library(modelsummary)
library(ordinal) # package for ordinal logit regression
library(brant) # brant test for the parallel assumption for ordered logit
library(MASS) # models that work with the brant test
library(broom) # extracting model summary as data frame
library(modelsummary) # deriving model tables
library(scales) # label percent
library(lubridate) # working with dates
library(marginaleffects) #to calculate marginal effects
library(gt) # to format tables
library(here) # work with directory
set.seed(5432)

Merging CQC and financial data

# import location level full data
rating<- read_csv(here("cleaned_data","cic_all_ratings_2019.csv"))
finance <- read_csv(here("cleaned_data","cls_finance.csv"))
finance1 <- finance %>% 
  mutate(id_digit = as.numeric(str_extract(project_id, "\\d+"))) %>%
  arrange(id_digit)
# checking the largest number for the project_id in the finance data set 
summary(finance1$id_digit)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0   228.0   460.0   610.1   913.2  5181.0
#merging the data
cic2019 <- rating %>% 
  left_join(finance1, by = "project_id")

data cleanning

#select relevant columns, rename and relabel 
cic_cleaned <- cic2019 %>% 
  # recode legal form types to be more readable / easier to present
  mutate(# inherited = ifelse(inherited == "Y", TRUE, FALSE),
         rating = recode(rating, 
                         "Insufficient evidence to rate" = "NA",
                         "Requires improvement" = "Req improv"),
         date = ymd(publication_date)) %>% 
  
  # assign order in the rating levels
  mutate(rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding")),
         social_care = ifelse(type == "Social Care Org", "social care", "healthcare")) %>% 
  
  
  # creating a new dummy variable for facility category
  mutate(founded = as.numeric(founded),
         year = year(date),
         age = year - founded, 
         Year = factor(year)) %>% 
  mutate(cls = ifelse(CLS == 1, "CLS", "CLG"),
         totalequity = as.numeric(totalequity),
         totalequity_std = scale(totalequity, center = TRUE, scale = TRUE)) %>% 
  
   mutate(share_size = case_when(
    totalissueshares <= 20 ~ "small",
    totalissueshares >= 100000 ~ "large",
    TRUE ~ NA_character_
  ))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `founded = as.numeric(founded)`.
## Caused by warning:
## ! NAs introduced by coercion

Check the distribution of the data for share_size

cic_cleaned %>% 
  count(share_size)
## # A tibble: 3 × 2
##   share_size     n
##   <chr>      <int>
## 1 large         23
## 2 small        464
## 3 <NA>        1409

Total counts in full data set

count_full <- cic_cleaned %>% 
  mutate(overall = ifelse(domain == "Overall", 1, 0)) %>% 
  summarize(count_provider = n_distinct(provider_name),
            count_location = n_distinct(location_name)-1,
            count_overall_rating = sum(overall),
            count_rating = n()) 

count_full
## # A tibble: 1 × 4
##   count_provider count_location count_overall_rating count_rating
##            <int>          <dbl>                <dbl>        <int>
## 1            103            170                  551         1896
count_by_form <- cic_cleaned %>% 
  mutate(overall = ifelse(domain == "Overall", 1, 0)) %>% 
  group_by(cls) %>% 
  summarize(count_provider = n_distinct(provider_name),
            count_location = n_distinct(location_name)-1,
            count_overall_rating = sum(overall),
            count_rating = n()) 

count_by_form
## # A tibble: 3 × 5
##   cls   count_provider count_location count_overall_rating count_rating
##   <chr>          <int>          <dbl>                <dbl>        <int>
## 1 CLG               47             56                  104          527
## 2 CLS               56            114                  447         1354
## 3 <NA>               3              2                    0           15
count_by_level_form <- cic_cleaned %>% 
  mutate(overall = ifelse(domain == "Overall", 1, 0)) %>% 
  group_by(cls, level) %>% 
  summarize(count_provider = n_distinct(provider_name),
            count_location = n_distinct(location_name), 
 # the count_location for provider should be manually adjusted to 0
            count_overall_rating = sum(overall),
            count_rating = n()) 
## `summarise()` has grouped output by 'cls'. You can override using the `.groups`
## argument.
count_by_level_form
## # A tibble: 5 × 6
## # Groups:   cls [3]
##   cls   level    count_provider count_location count_overall_rating count_rating
##   <chr> <chr>             <int>          <int>                <dbl>        <int>
## 1 CLG   location             46             56                   75          353
## 2 CLG   provider              3              1                   29          174
## 3 CLS   location             51            114                  395         1042
## 4 CLS   provider             10              1                   52          312
## 5 <NA>  location              3              3                    0           15
count_by_level <- cic_cleaned %>% 
  mutate(overall = ifelse(domain == "Overall", 1, 0)) %>% 
  group_by(level) %>% 
  summarize(count_provider = n_distinct(provider_name),
            count_location = n_distinct(location_name), 
 # the count_location for provider should be manually adjusted to 0
            count_overall_rating = sum(overall),
            count_rating = n()) 

count_by_level
## # A tibble: 2 × 5
##   level    count_provider count_location count_overall_rating count_rating
##   <chr>             <int>          <int>                <dbl>        <int>
## 1 location             97            170                  470         1410
## 2 provider             13              1                   81          486
count_by_level_type <- cic_cleaned %>% 
  mutate(overall = ifelse(domain == "Overall", 1, 0)) %>% 
  group_by(social_care, level) %>% 
  summarize(count_provider = n_distinct(provider_name),
            count_location = n_distinct(location_name), 
 # the count_location for provider should be manually adjusted to 0
            count_overall_rating = sum(overall),
            count_rating = n()) 
## `summarise()` has grouped output by 'social_care'. You can override using the
## `.groups` argument.
count_by_level_type
## # A tibble: 3 × 6
## # Groups:   social_care [2]
##   social_care level    count_provider count_location count_overall_rating
##   <chr>       <chr>             <int>          <int>                <dbl>
## 1 healthcare  location             38             82                  374
## 2 healthcare  provider             13              1                   81
## 3 social care location             61             88                   96
## # ℹ 1 more variable: count_rating <int>
datasummary_crosstab(
  formula = cls ~ rating,
  data = cic_cleaned
)
tinytable_teri27jmghitw0ljwdgq
cls Inadequate Req improv Good Outstanding All
CLG N 13 50 412 52 527
% row 2.5 9.5 78.2 9.9 100.0
CLS N 44 98 1078 134 1354
% row 3.2 7.2 79.6 9.9 100.0
All N 57 151 1502 186 1896
% row 3.0 8.0 79.2 9.8 100.0
datasummary_crosstab(
  formula = cls * spinout ~ rating,
  data = cic_cleaned
)
tinytable_d30yies06ylidy0es72y
cls spinout Inadequate Req improv Good Outstanding All
CLG 0 N 10 27 254 32 323
% row 3.1 8.4 78.6 9.9 100.0
1 N 3 23 158 20 204
% row 1.5 11.3 77.5 9.8 100.0
CLS 0 N 25 46 643 92 806
% row 3.1 5.7 79.8 11.4 100.0
1 N 19 52 435 42 548
% row 3.5 9.5 79.4 7.7 100.0
All N 57 151 1502 186 1896
% row 3.0 8.0 79.2 9.8 100.0

regression analysis

models without equity variable

model_order_overall <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Overall"),
                link = "logit")

model_order_safe <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Safe"),
                link = "logit")
model_order_effective <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Effective"),
                link = "logit")
model_order_caring <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Caring"),
                link = "logit")
model_order_well_led <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Well-led"),
                link = "logit")
model_order_responsive <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Responsive"),
                link = "logit")
ordinal_models <-
  modelsummary(
    list(
      "overall" = model_order_overall,
      "safe" = model_order_safe,
      "effective" = model_order_effective,
      "caring" = model_order_caring,
      "well-led" = model_order_well_led,
      "responsive" = model_order_responsive
    ),
    coef_omit = "region",
    exponentiate = F,
    statistic = "({p.value}) {stars}")
ordinal_models
tinytable_k502zsxz8pkka343ympg
overall safe effective caring well-led responsive
Inadequate|Req improv -3.485 -3.363 -3.775 -3.510
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Req improv|Good -2.496 -1.536 -2.316 -5.337 -2.021 -3.910
(<0.001) *** (0.003) ** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Good|Outstanding 1.435 4.103 2.803 0.674 1.885 1.485
(<0.001) *** (<0.001) *** (<0.001) *** (0.202) (<0.001) *** (0.007) **
clsCLS 0.483 0.564 0.273 -0.329 -0.056 -0.135
(0.072) + (0.102) (0.451) (0.364) (0.857) (0.730)
spinout -0.386 -0.305 -0.446 -0.189 -0.325 -0.681
(0.064) + (0.370) (0.214) (0.588) (0.277) (0.082) +
social_caresocial care 0.096 0.947 0.153 -0.817 0.043 -0.093
(0.716) (0.016) * (0.677) (0.040) * (0.888) (0.810)
age -0.119 -0.045 -0.050 -0.070 -0.023 -0.064
(<0.001) *** (0.354) (0.275) (0.207) (0.587) (0.247)
dissolved 0.212 -0.972 0.370 0.129 0.740 0.439
(0.607) (0.032) * (0.530) (0.817) (0.131) (0.458)
Num.Obs. 540 261 261 261 261 261
AIC 906.6 340.0 332.5 274.7 445.5 269.3
BIC 940.9 368.5 361.0 299.6 474.1 294.3
RMSE 2.45 2.17 2.23 1.55 2.42 1.40

models with equity variables

Due to the large range/dispersion of the fiancial data. I standardize the totalequity variable to enable the models to run.

summary(cic_cleaned$totalequity)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##   -23436    53934   653284  2892894  3613364 36881694       57
eq_order_overall <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Overall"),
                link = "logit")

eq_order_safe <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Safe"),
                link = "logit")
eq_order_effective <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Effective"),
                link = "logit")
eq_order_caring <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Caring"),
                link = "logit")
eq_order_well_led <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Well-led"),
                link = "logit")
eq_order_responsive <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Responsive"),
                link = "logit")
eq_models <-
  modelsummary(
    list(
      "overall" = eq_order_overall,
      "safe" = eq_order_safe,
      "effective" = eq_order_effective,
      "caring" = eq_order_caring,
      "well-led" = eq_order_well_led,
      "responsive" = eq_order_responsive
    ),
    coef_omit = "region",
    exponentiate = F,
    statistic = "({p.value}) {stars}")
eq_models
tinytable_n48qs45djadyft9jwtr4
overall safe effective caring well-led responsive
Inadequate|Req improv -3.632 -3.792 -3.868 -3.763
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Req improv|Good -2.670 -1.992 -2.404 -5.353 -2.328 -4.106
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Good|Outstanding 1.308 3.868 2.713 0.651 1.652 1.316
(<0.001) *** (<0.001) *** (<0.001) *** (0.232) (<0.001) *** (0.019) *
clsCLS 0.495 0.611 0.246 -0.311 -0.028 -0.151
(0.066) + (0.083) + (0.496) (0.392) (0.929) (0.698)
spinout -0.507 -0.682 -0.545 -0.191 -0.530 -0.858
(0.026) * (0.066) + (0.150) (0.602) (0.099) + (0.041) *
social_caresocial care 0.081 0.960 0.184 -0.842 -0.004 -0.074
(0.760) (0.017) * (0.619) (0.035) * (0.989) (0.849)
age -0.132 -0.078 -0.057 -0.072 -0.047 -0.079
(<0.001) *** (0.094) + (0.219) (0.204) (0.294) (0.154)
dissolved 0.240 -0.857 0.449 0.104 0.812 0.541
(0.566) (0.066) + (0.450) (0.853) (0.104) (0.364)
totalequity_std 0.092 0.446 0.163 -0.038 0.174 0.238
(0.356) (0.027) * (0.382) (0.851) (0.274) (0.219)
Num.Obs. 538 259 259 259 259 259
AIC 895.3 327.6 333.0 275.6 436.8 269.2
BIC 933.8 359.6 365.1 304.1 468.8 297.7
RMSE 2.45 2.16 2.23 1.55 2.41 1.40

regression analysis comparing CIC with big vs. small shares

Check the distribution of the data for share_size

cic_cleaned %>% 
  count(share_size)
## # A tibble: 3 × 2
##   share_size     n
##   <chr>      <int>
## 1 large         23
## 2 small        464
## 3 <NA>        1409

Only CLS has large vs. small value as expected

# Crosstab between `cls` and `share_size`
datasummary_crosstab(cls ~ share_size, data = cic_cleaned)
tinytable_49luib1np3wiy4e4ku8n
cls large small All
CLG N 0 0 527
% row 0.0 0.0 100.0
CLS N 23 464 1354
% row 1.7 34.3 100.0
All N 23 464 1896
% row 1.2 24.5 100.0

Spint out can perfectly predict whether one is small or not, so droping that in the model

# Crosstab between `spinout` and `social_care`
datasummary_crosstab(spinout ~ share_size, data = cic_cleaned)
tinytable_rhb0nohc18bz8kowqwbk
spinout large small All
0 N 23 390 1144
% row 2.0 34.1 100.0
1 N 0 74 752
% row 0.0 9.8 100.0
All N 23 464 1896
% row 1.2 24.5 100.0

healthcare perfectly predicts share_size as well

# Crosstab between `dissolved` and `share_size`
datasummary_crosstab(healthcare ~ share_size, data = cic_cleaned)
tinytable_uirc000rjp2pxqdab8m8
healthcare large small All
0 N 0 138 594
% row 0.0 23.2 100.0
1 N 23 326 1257
% row 1.8 25.9 100.0
999 N 0 0 24
% row 0.0 0.0 100.0
All N 23 464 1896
% row 1.2 24.5 100.0

To sum, all large share CICs are cls, non-spinout and healthcare category

models without equity variable

model_order_overall_size <- clm(rating ~  share_size + age,
                data = filter(cic_cleaned, domain == "Overall"),
                link = "logit")

model_order_safe_size <- clm(rating ~  share_size + age ,
                data = filter(cic_cleaned, domain == "Safe"),
                link = "logit")
model_order_effective_size <- clm(rating ~  share_size + age ,
                data = filter(cic_cleaned, domain == "Effective"),
                link = "logit")
model_order_caring_size <- clm(rating ~  share_size + age ,
                data = filter(cic_cleaned, domain == "Caring"),
                link = "logit")
model_order_well_led_size <- clm(rating ~  share_size + age ,
                data = filter(cic_cleaned, domain == "Well-led"),
                link = "logit")
model_order_responsive_size <- clm(rating ~  share_size + age ,
                data = filter(cic_cleaned, domain == "Responsive"),
                link = "logit")
ordinal_models_size <-
  modelsummary(
    list(
      "overall" = model_order_overall_size,
      "safe" = model_order_safe_size,
      "effective" = model_order_effective_size,
      "caring" = model_order_caring_size,
      "well-led" = model_order_well_led_size,
      "responsive" = model_order_responsive_size
    ),
    coef_omit = "region",
    exponentiate = F,
    statistic = "({p.value}) {stars}")
ordinal_models_size
tinytable_qe86kq61p3dzox81e50l
overall safe effective caring well-led responsive
Inadequate|Req improv -5.006 -5.618 -2.414
(<0.001) *** (<0.001) *** (0.100)
Req improv|Good -3.639 -0.066 -3.723 -0.100 -4.167
(<0.001) *** (0.963) (0.005) ** (0.937) (0.003) **
Good|Outstanding -0.242 4.229 0.453 0.922 3.311 0.870
(0.765) (0.006) ** (0.704) (0.461) (0.014) * (0.477)
share_sizesmall -1.325 1.898 -1.285 -1.196 1.560 -1.355
(0.111) (0.201) (0.309) (0.368) (0.238) (0.298)
age -0.043 -0.023 -0.040 0.057 0.029 0.054
(0.365) (0.715) (0.523) (0.363) (0.614) (0.393)
Num.Obs. 207 56 56 56 56 56
AIC 384.6 83.2 92.4 57.4 109.3 72.6
BIC 401.2 91.3 102.5 63.4 119.4 80.7
RMSE 2.62 1.36 2.45 0.78 2.58 1.57