Notes for edits on 7/2/2024

Add the model with health/social care categorization rather than the eight service types

Notes for editting 1/4/2024

  1. I only kept the model we chose to use for ARNOVA 2023
  2. The data used in the analysis is the version MERGING in provider sheet data. We keep 2 levels for both government and CICs.
  3. Re-ran descriptive statistics to better suit the need for the AOM draft

Notes for updates 10/17/2023

  1. I have checked the false positive CIC “Community Integrated Care (CIC) - 2 Seafarers Walk” found in the 2019 dataset. It is NOT in the 2017 dataset
  2. Added data cleaning for dropping overlapping provider rating
  3. Drop certain rating categories as discussed

Setting up the R session and Loading packages

# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))

This chunk loads all the packages to use

#packages for ordered logit
library(ordinal) # package for ordinal logit regression
library(brant) # brant test for the parallel assumption for ordered logit
library(MASS) # models that work with the brant test
library(tidyverse) # package for data cleaning and plotting
library(readxl) # package for reading excel file
library(broom) # extracting model summary as data frame
library(modelsummary) # deriving model tables
library(scales) # label percent
library(lubridate) # working with dates
library(marginaleffects) #to calculate marginal effects
library(gt) # to format tables
library(here) # work with directory

Import the cleaned data

merged <- read_rds(here("data", "spinout_2017.rds"))

Prepare the data for ordinal regression

#select relevant columns, rename and relabel 
merged_cleaned <- merged %>% 
  # recode legal form types to be more readable / easier to present
  mutate(# inherited = ifelse(inherited == "Y", TRUE, FALSE),
         rating = recode(rating, 
                         "Insufficient evidence to rate" = "NA",
                         "Requires improvement" = "Req improv"),
         date = ymd(publication_date)) %>% 
  
  # assign order in the rating levels
  mutate(rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding")),
         form_spinout = case_when(form == "GOV" ~ "GOV",
                           form == "CIC" & spin_out == "TRUE" ~ "SP_CIC",
                           form == "CIC" & spin_out == "FALSE" ~ "Ind_CIC"),
         form_spinout = fct_relevel(form_spinout, "GOV"),
         social_care = ifelse(type == "Social Care Org", "social care", "healthcare")) %>% 
  
  
  # creating a new dummy variable for facility category
  mutate(year = year(date),
         year2 = year-2013,
         Year = factor(year)) %>%
  
  # regroup care type

  mutate(service_type = case_when(
      str_detect(primary_cat, "Acute hospital") ~ "Acute hospital",
      str_detect(primary_cat, "Mental health") ~ "Mental health",
      TRUE ~ primary_cat  # Keep the original value if none of the conditions match
    )) %>%
  
  # drop unmatched service types
  filter(!service_type %in% c("Acute hospital", "Ambulance service", "Dentists", 
                              "Independent consulting doctors", "Prison Healthcare")) %>% 
  
  # drop overlapping provider data 
  # (correcting the issues with counting twice when both provider and service location are available)
  group_by(provider_name) %>% 
  mutate(overlap = ifelse(
    n() > 1 & level == "provider" & report_type == "Provider", "drop", "keep"
  )) %>%
  ungroup()  %>% 
  filter(overlap != "drop") %>% 
  # 11784 rolws of data down to 11310 rows
  
  # converting the ordinal variable to numerical 
  mutate(rating_num = case_when(rating == "Inadequate" ~ 1,
                                rating == "Req improv" ~ 2,
                                rating == "Good" ~ 3,
                                rating == "Outstanding" ~ 4)) 
nrow(merged_cleaned)
## [1] 11310
merged_cleaned %>% 
  mutate(overall = ifelse(domain == "Overall", 1, 0)) %>% 
  group_by(form_spinout) %>% 
  summarize(count_provider = n_distinct(provider_name),
            count_location = n_distinct(location_name),
            count_overall_rating = sum(overall),
            count_rating = n()) %>% 
  gt()
form_spinout count_provider count_location count_overall_rating count_rating
GOV 218 808 1719 9785
Ind_CIC 62 94 247 777
SP_CIC 21 52 233 748

Note The count of unique location name does not include provider level data lines.

Overall distribution of key variables - two groups

datasummary(form_spinout + social_care + service_type  + region + Year ~ 1, data = merged_cleaned, fmt = 0)
All
form_spinout GOV 9785
Ind_CIC 777
SP_CIC 748
social_care healthcare 6258
social care 5052
service_type Community based adult social care services 2298
Community health - NHS & Independent 1110
GP Practices 750
Hospice services 18
Mental health 4074
Out of hours 12
Residential social care 3018
Urgent care services & mobile doctors 30
region East Midlands 1120
East of England 1026
London 1338
North East 519
North West 1823
South East 1757
South West 1072
West Midlands 1176
Yorkshire and The Humber 1479
Year 2013 0
2014 492
2015 1447
2016 5085
2017 4286

Overall distribution of key variables - three groups

datasummary(1 + rating + social_care + service_type + region ~ form_spinout, data = merged_cleaned, fmt = 0)
GOV Ind_CIC SP_CIC
All 9785 777 748
rating Inadequate 80 1 11
Req improv 1441 98 74
Good 7932 607 627
Outstanding 332 71 36
social_care healthcare 5249 429 580
social care 4536 348 168
service_type Community based adult social care services 1944 252 102
Community health - NHS & Independent 846 96 168
GP Practices 203 285 262
Hospice services 6 6 6
Mental health 3930 24 120
Out of hours 6 6 0
Residential social care 2832 102 84
Urgent care services & mobile doctors 18 6 6
region East Midlands 1056 54 10
East of England 732 60 234
London 1152 108 78
North East 507 12 0
North West 1556 231 36
South East 1649 102 6
South West 796 78 198
West Midlands 1086 84 6
Yorkshire and The Humber 1251 48 180

whole models with continous year trend

Since service_type is inclusive of variation/information from socialcare and level, we drop this variable from the model

model_order_overall <- clm(rating ~ form_spinout  + service_type + region + year2,
                data = filter(merged_cleaned, domain == "Overall"),
                link = "logit")

model_order_safe <- clm(rating ~ form_spinout  + service_type+ region + year2,
                data = filter(merged_cleaned, domain == "Safe"),
                link = "logit")
model_order_effective <- clm(rating ~ form_spinout  + service_type + region + year2,
                data = filter(merged_cleaned, domain == "Effective"),
                link = "logit")
model_order_caring <- clm(rating ~ form_spinout  + service_type + region + year2,
                data = filter(merged_cleaned, domain == "Caring"),
                link = "logit")
model_order_well_led <- clm(rating ~ form_spinout + service_type + region + year2,
                data = filter(merged_cleaned, domain == "Well-led"),
                link = "logit")
model_order_responsive <- clm(rating ~ form_spinout + service_type + region + year2,
                data = filter(merged_cleaned, domain == "Responsive"),
                link = "logit")
ordinal_models_exp <-
  modelsummary(
    list(
      "overall" = model_order_overall,
      "safe" = model_order_safe,
      "effective" = model_order_effective,
      "caring" = model_order_caring,
      "well-led" = model_order_well_led,
      "responsive" = model_order_responsive
    ),
    coef_omit = "region",
    exponentiate = F,
    statistic = "({p.value}) {stars}")
ordinal_models_exp
overall safe effective caring well-led responsive
Inadequate|Req improv -6.503 -6.365 -6.484 -5.711 -5.687
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Req improv|Good -3.573 -3.018 -2.684 -3.690 -2.779 -2.640
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Good|Outstanding 1.244 3.821 2.982 3.384 2.251 2.695
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
form_spinoutInd_CIC 0.540 0.432 0.303 0.358 0.079 0.655
(0.010) ** (0.146) (0.314) (0.350) (0.763) (0.033) *
form_spinoutSP_CIC 0.384 0.469 0.264 0.149 0.715 0.407
(0.071) + (0.105) (0.390) (0.708) (0.015) * (0.197)
service_typeCommunity health - NHS & Independent -1.105 -1.652 -0.907 1.262 -0.887 -1.011
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
service_typeGP Practices 0.143 -0.282 1.028 0.508 0.854 -0.114
(0.483) (0.511) (0.017) * (0.337) (0.026) * (0.787)
service_typeHospice services 0.103 0.924 0.345 -0.027 0.519 -0.091
(0.945) (0.692) (0.848) (0.991) (0.737) (0.961)
service_typeMental health -0.683 -1.538 -0.762 0.866 -0.304 -0.598
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (0.065) + (0.001) **
service_typeOut of hours -1.489 -1.792 -3.836 -0.498 -1.431 0.137
(0.271) (0.196) (0.064) + (0.868) (0.291) (0.946)
service_typeResidential social care -0.363 -0.552 -0.398 -0.372 -0.422 -0.243
(0.031) * (0.005) ** (0.039) * (0.187) (0.013) * (0.210)
service_typeUrgent care services & mobile doctors 0.760 1.585 0.869 -0.436 0.960 0.253
(0.509) (0.378) (0.531) (0.816) (0.425) (0.844)
year2 -0.585 -0.370 -0.331 0.086 -0.385 -0.162
(<0.001) *** (<0.001) *** (<0.001) *** (0.426) (<0.001) *** (0.048) *
Num.Obs. 2199 1823 1820 1820 1824 1824
AIC 2940.3 2233.6 2014.0 1187.3 2409.7 2030.3
BIC 3059.9 2349.3 2129.7 1297.5 2525.4 2145.9
RMSE 2.28 2.12 2.18 1.28 2.22 2.23

whole models with continous year trend by social/health care

Since service_type is inclusive of variation/information from socialcare and level, we drop this variable from the model

model_order_overall2 <- clm(rating ~ form_spinout + social_care + region + year2,
                data = filter(merged_cleaned, domain == "Overall"),
                link = "logit")

model_order_safe2 <- clm(rating ~ form_spinout  + social_care + region + year2,
                data = filter(merged_cleaned, domain == "Safe"),
                link = "logit")
model_order_effective2 <- clm(rating ~ form_spinout  + social_care + region + year2,
                data = filter(merged_cleaned, domain == "Effective"),
                link = "logit")
model_order_caring2 <- clm(rating ~ form_spinout  + social_care + region + year2,
                data = filter(merged_cleaned, domain == "Caring"),
                link = "logit")
model_order_well_led2 <- clm(rating ~ form_spinout + social_care + region + year2,
                data = filter(merged_cleaned, domain == "Well-led"),
                link = "logit")
model_order_responsive2 <- clm(rating ~ form_spinout + social_care + region + year2,
                data = filter(merged_cleaned, domain == "Responsive"),
                link = "logit")
ordinal_models_exp2 <-
  modelsummary(
    list(
      "overall" = model_order_overall2,
      "safe" = model_order_safe2,
      "effective" = model_order_effective2,
      "caring" = model_order_caring2,
      "well-led" = model_order_well_led2,
      "responsive" = model_order_responsive2
    ),
    coef_omit = "region",
    exponentiate = F,
    statistic = "({p.value}) {stars}")
ordinal_models_exp2
overall safe effective caring well-led responsive
Inadequate|Req improv -5.727 -4.767 -5.657 -5.199 -4.941
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Req improv|Good -2.816 -1.445 -1.886 -4.658 -2.283 -1.901
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
Good|Outstanding 1.917 5.275 3.652 2.365 2.656 3.395
(<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) *** (<0.001) ***
form_spinoutInd_CIC 1.001 0.772 0.622 0.375 0.274 0.750
(<0.001) *** (0.005) ** (0.032) * (0.286) (0.274) (0.011) *
form_spinoutSP_CIC 0.739 0.665 0.529 0.163 0.816 0.411
(<0.001) *** (0.013) * (0.077) + (0.664) (0.004) ** (0.172)
social_caresocial care 0.312 1.067 0.380 -1.080 0.052 0.447
(0.005) ** (<0.001) *** (0.003) ** (<0.001) *** (0.652) (<0.001) ***
year2 -0.534 -0.342 -0.307 0.053 -0.347 -0.130
(<0.001) *** (<0.001) *** (<0.001) *** (0.624) (<0.001) *** (0.108)
Num.Obs. 2199 1823 1820 1820 1824 1824
AIC 2966.7 2261.8 2033.4 1183.1 2428.1 2029.2
BIC 3052.2 2344.4 2116.0 1260.2 2510.7 2111.9
RMSE 2.28 2.13 2.19 1.28 2.23 2.23