Read Me

This is the data analysis RMarkdown file for 1) merging in spin-out data, and 2)initial data analysis with the additional spin-out variable.

Please read the subtitles and notes added as normal text in this document. Blocks with darker backgrounds are code chunks, mostly with their outputs. I tried to add #comments within the chunck just before the code line to explain the purpose of the code line. Meanwhile I explain the purpose of the section in the texts, so that is where to find information to give you fuller pictures.

# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))

This chunk loads all the packages to use

#packages for ordered logit
library(ordinal) # package for ordinal logit regression

library(tidyverse) # package for data cleaning and plotting
library(readxl) # package for reading excel file
library(broom) # extracting model summary as data frame
library(modelsummary) # deriving model tables
library(scales) # label percent
library(lubridate) # working with dates
library(marginaleffects) #to calculate marginal effects

Data Preparation

First, import raw data document with spin-out column

#import the raw data file
cic_spinout <- read_csv("data/CIC_spinout.csv")

Create the subset with provider_id and spin_out indicator

#Derive the spin-out indicator sub-set
spinout <- cic_spinout %>% 
    filter(form == "CIC") %>% 
    select(provider_id, spin_out) %>% 
  group_by(provider_id) %>% 
  summarise(spin_out = first(spin_out)) %>% 
  ungroup() %>% 
  mutate(spin_out = ifelse(spin_out == "no", "Non-spin-out", "Spin-out"))
  

head(spinout)
## # A tibble: 6 × 2
##   provider_id spin_out    
##   <chr>       <chr>       
## 1 1-101641089 Non-spin-out
## 2 1-101652758 Non-spin-out
## 3 1-101657333 Spin-out    
## 4 1-101691942 Non-spin-out
## 5 1-119229201 Non-spin-out
## 6 1-125892596 Non-spin-out

Import the main CQC data set

#import the raw data file
socialcare_raw <- read_csv("data/sample_new_cleaned.csv")

Assign orders to the ordinal level variables and name the organizational form in a reader-friendly way.

# (self-note: do not replicate this chunk, dataframe name changed)



#select relevant columns, rename and relabel 
socialcare_sorted <- socialcare_raw %>% 
  # recode legal form types to be more readable / easier to present
  mutate(location_id = factor(location_id),
         form = case_when(form_num == 1 ~ "FPO",
                          form_num == 2 ~ "NPO",
                          form_num == 3 ~ "GOV",
                          form_num == 4 ~ "CIC",
                          form_num == 5 ~ "IND",
                          TRUE ~ NA_character_),
         inherited = ifelse(inherited == "Y", TRUE, FALSE),
         rating = recode(rating, 
                         "Insufficient evidence to rate" = "NA",
                         "Requires improvement" = "Req improv"),
         publication_date = dmy(publication_date)) %>% 
  # set the order of the values in the factors 
  mutate(form = fct_relevel(form, levels = c("FPO", "NPO", "GOV", "CIC", "IND")),
         
         # assume the order of the ratings as follows but need to double check with the source 
         rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding"))) %>% 
  
  # creating a new dummy variable for facility category
  mutate(category = case_when(primary_cat == "Community based adult social care services" ~ "community",
                              primary_cat == "Residential social care" ~ "residential",
                              TRUE ~ as.character(NA)),
         # deriving year column and dummy variable for before_covid
         year = year(publication_date),
         after_covid = ifelse(year >= 2020, TRUE, FALSE),
         before_covid = ifelse(year <= 2019, TRUE, FALSE)) %>%

  # converting the ordinal variable to numerical 
  mutate(rating_num = case_when(rating == "Inadequate" ~ 1,
                                rating == "Req improv" ~ 2,
                                rating == "Good" ~ 3,
                                rating == "Outstanding" ~ 4)) %>% 
  # derive the rating dummy
  mutate(rating_higher = ifelse(rating_num > 2, 1, 0))
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
# show first several rows of the data set derived 
head(socialcare_sorted)
## # A tibble: 6 × 40
##    ...1 locati…¹ Locat…² Locat…³ Care …⁴ prima…⁵ Locat…⁶ Locat…⁷ Locat…⁸ Locat…⁹
##   <dbl> <fct>    <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>  
## 1     1 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 2     2 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 3     3 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 4     4 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 5     5 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 6     6 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## # … with 30 more variables: `Location Local Authority` <chr>, region <chr>,
## #   `Location NHS Region` <chr>, `Location ONSPD CCG Code` <chr>,
## #   `Location ONSPD CCG` <chr>, `Location Commissioning CCG Code` <lgl>,
## #   `Location Commissioning CCG Name` <lgl>,
## #   `Service / Population Group` <chr>, domain <chr>, rating <ord>,
## #   publication_date <date>, `Report Type` <chr>, inherited <lgl>, URL <chr>,
## #   `Provider ID` <chr>, location_type <chr>, form_num <dbl>, …

Merge

## merge in CIC spin-out indicator
socialcare <- socialcare_sorted %>% 
  rename(provider_id = `Provider ID`)  %>%
  left_join(spinout, by = "provider_id") %>%
  mutate(cic_other = ifelse(form_num == 4, spin_out, as.character(form)),
         cic_other = fct_relevel(cic_other, levels = c("FPO", "NPO", "GOV", "Spin-out", "Non-spin-out", "IND")),
         cic_gov = ifelse(form_num == 4 | form_num == 3, as.character(cic_other), NA),
         cic_gov = fct_relevel(cic_gov, "Non-spin-out", after = Inf))
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
head(socialcare)
## # A tibble: 6 × 43
##    ...1 locati…¹ Locat…² Locat…³ Care …⁴ prima…⁵ Locat…⁶ Locat…⁷ Locat…⁸ Locat…⁹
##   <dbl> <fct>    <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>  
## 1     1 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 2     2 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 3     3 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 4     4 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 5     5 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 6     6 1-10000… VNH4P   69 Ten… N       Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## # … with 33 more variables: `Location Local Authority` <chr>, region <chr>,
## #   `Location NHS Region` <chr>, `Location ONSPD CCG Code` <chr>,
## #   `Location ONSPD CCG` <chr>, `Location Commissioning CCG Code` <lgl>,
## #   `Location Commissioning CCG Name` <lgl>,
## #   `Service / Population Group` <chr>, domain <chr>, rating <ord>,
## #   publication_date <date>, `Report Type` <chr>, inherited <lgl>, URL <chr>,
## #   provider_id <chr>, location_type <chr>, form_num <dbl>, …

Check the distribution by spin-out and CLS/CLG

spinout_table <- socialcare %>%  
  group_by(location_id)  %>%
  summarise(cic_type = first(cic_type),
            spin_out = first(spin_out))

table(spinout_table$cic_type, spinout_table$spin_out)
##        
##         Non-spin-out Spin-out
##   CIC_G           32        5
##   CIC_S           26       19

OLS with the sub-domain ratings one by one

Model with COVID control compare with For-profit (CIC split into Spin-out, Non-spin-out)

# run the model loops with nest() method
# add after_covid as control
models_ols_covid <- socialcare %>% 
  group_by(domain) %>% 
  nest()%>% 
  mutate(ols_models = map(data, 
                          ~lm(rating_num ~ cic_other + after_covid + 
                              category + region + inherited, 
                              data = .x))) %>% 
  mutate(results = map(ols_models, ~tidy(.x, conf.int = TRUE))) %>% 
  pull(ols_models, name = domain) 
table_ols_covid <- modelsummary(models_ols_covid, statistic = "({p.value}) {stars}")
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 3 > 1' in coercion to 'logical(1)'
table_ols_covid
Safe Effective Caring Responsive Well-led Overall
(Intercept) 2.879*** 2.925*** 3.036*** 2.986*** 2.923*** 2.982***
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) ***
cic_otherNPO 0.056*** 0.032*** 0.023** 0.058*** 0.069*** 0.050***
(0.000) *** (0.000) *** (0.002) ** (0.000) *** (0.000) *** (0.000) ***
cic_otherGOV 0.069** 0.032+ 0.025 0.048* 0.065* 0.050*
(0.001) ** (0.058) + (0.101) (0.013) * (0.011) * (0.028) *
cic_otherSpin-out 0.038 0.026 0.017 −0.026 0.026 −0.013
(0.642) (0.698) (0.779) (0.721) (0.792) (0.881)
cic_otherNon-spin-out 0.104* 0.012 0.105** 0.064 0.109+ 0.119*
(0.049) * (0.785) (0.006) ** (0.182) (0.090) + (0.039) *
cic_otherIND −0.038* −0.039** −0.008 −0.012 −0.062*** −0.059***
(0.011) * (0.001) ** (0.454) (0.387) (0.001) *** (0.000) ***
after_covidTRUE −0.271*** −0.135*** −0.083*** −0.107*** −0.329*** −0.333***
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) ***
categoryresidential −0.060*** −0.037*** −0.048*** −0.031*** −0.061*** −0.063***
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) ***
regionEast of England 0.038* 0.039** 0.015 0.035* 0.019 0.033+
(0.014) * (0.002) ** (0.193) (0.014) * (0.311) (0.052) +
regionLondon 0.045** 0.055*** 0.003 −0.001 0.024 0.018
(0.004) ** (0.000) *** (0.827) (0.925) (0.207) (0.299)
regionNorth East 0.133*** 0.104*** 0.074*** 0.089*** 0.169*** 0.150***
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) ***
regionNorth West 0.062*** 0.063*** 0.020+ 0.039** 0.047* 0.046**
(0.000) *** (0.000) *** (0.069) + (0.006) ** (0.012) * (0.006) **
regionSouth East 0.086*** 0.047*** 0.038*** 0.050*** 0.042* 0.047**
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.014) * (0.002) **
regionSouth West 0.110*** 0.092*** 0.082*** 0.088*** 0.128*** 0.126***
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) ***
regionWest Midlands 0.007 0.018 −0.017 −0.002 −0.090*** −0.045**
(0.664) (0.142) (0.125) (0.890) (0.000) *** (0.008) **
regionYorkshire and The Humber −0.016 0.031* 0.036** 0.008 −0.036+ −0.029
(0.337) (0.018) * (0.003) ** (0.590) (0.067) + (0.107)
inheritedTRUE −0.045** −0.042*** −0.037*** −0.038** −0.051** −0.053**
(0.003) ** (0.001) *** (0.001) *** (0.005) ** (0.006) ** (0.001) **
Num.Obs. 12966 12931 12927 12953 12964 12942
R2 0.104 0.041 0.030 0.027 0.107 0.124
R2 Adj. 0.103 0.039 0.029 0.026 0.106 0.123
AIC 13273.1 7817.4 4933.6 10858.2 18400.7 15587.8
BIC 13407.6 7951.8 5068.0 10992.6 18535.1 15722.2
Log.Lik. −6618.558 −3890.688 −2448.819 −5411.087 −9182.340 −7775.882
RMSE 0.40 0.33 0.29 0.37 0.49 0.44

Model with COVID control compare with GOV only (CIC split into Spin-out, Non-spin-out)

# run the model loops with nest() method
# add after_covid as control
models_ols_covid_gov <- socialcare %>% 
  group_by(domain) %>% 
  nest()%>% 
  mutate(ols_models = map(data, 
                          ~lm(rating_num ~ cic_gov + after_covid + 
                              category + region + inherited, 
                              data = .x))) %>% 
  mutate(results = map(ols_models, ~tidy(.x, conf.int = TRUE))) %>% 
  pull(ols_models, name = domain) 
table_ols_covid_gov <- modelsummary(models_ols_covid_gov, statistic = "({p.value}) {stars}")
table_ols_covid_gov
Safe Effective Caring Responsive Well-led Overall
(Intercept) 2.975*** 2.921*** 2.984*** 2.972*** 2.849*** 2.960***
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) ***
cic_govSpin-out −0.060 −0.001 −0.028 −0.063 −0.025 −0.105
(0.393) (0.985) (0.661) (0.347) (0.810) (0.216)
cic_govNon-spin-out 0.001 −0.036 0.068 −0.004 −0.004 0.023
(0.978) (0.435) (0.102) (0.933) (0.951) (0.681)
after_covidTRUE −0.127*** −0.033 0.024 −0.064+ −0.208*** −0.180***
(0.000) *** (0.378) (0.492) (0.080) + (0.000) *** (0.000) ***
categoryresidential −0.105*** −0.087** −0.018 −0.043 −0.126** −0.132***
(0.001) *** (0.004) ** (0.519) (0.145) (0.005) ** (0.000) ***
regionEast of England 0.048 0.052 0.084 0.056 0.102 0.157+
(0.480) (0.444) (0.179) (0.396) (0.309) (0.056) +
regionLondon −0.069 0.031 0.054 0.036 0.115 0.043
(0.286) (0.627) (0.358) (0.566) (0.223) (0.573)
regionNorth East 0.040 0.123+ 0.143* 0.220** 0.366*** 0.279**
(0.586) (0.086) + (0.031) * (0.002) ** (0.001) *** (0.001) **
regionNorth West 0.058 0.117* 0.143** 0.174** 0.290*** 0.199**
(0.321) (0.047) * (0.008) ** (0.002) ** (0.001) *** (0.005) **
regionSouth East −0.001 0.090 0.059 0.093+ 0.190* 0.076
(0.993) (0.121) (0.261) (0.097) + (0.027) * (0.276)
regionSouth West 0.051 0.099 0.094 0.089 0.167+ 0.105
(0.461) (0.148) (0.132) (0.176) (0.099) + (0.204)
regionWest Midlands 0.070 0.070 −0.014 0.087 0.229* 0.087
(0.281) (0.274) (0.815) (0.159) (0.015) * (0.256)
regionYorkshire and The Humber 0.001 0.142* 0.066 0.078 0.134 0.092
(0.992) (0.022) * (0.243) (0.190) (0.143) (0.218)
inheritedTRUE −0.106* 0.025 −0.004 −0.029 −0.068 −0.061
(0.047) * (0.643) (0.930) (0.565) (0.386) (0.340)
Num.Obs. 471 470 469 469 471 472
R2 0.073 0.039 0.043 0.054 0.092 0.103
R2 Adj. 0.046 0.011 0.016 0.027 0.066 0.077
AIC 266.0 253.5 168.4 221.7 625.2 439.2
BIC 328.3 315.8 230.6 284.0 687.5 501.6
Log.Lik. −117.986 −111.766 −69.183 −95.873 −297.605 −204.618
RMSE 0.31 0.31 0.28 0.30 0.46 0.37

The results changed in a wierld way. I think this is due to all the information we lose for the control variables observed in other sectors.

ordered logit models

whole model with control

model_order_overall_covid <- clm(rating ~ cic_other + after_covid + 
                                   category + region + inherited,
                data = filter(socialcare, domain == "Overall"),
                link = "logit")
model_order_safe_covid <- clm(rating ~ cic_other + after_covid +
                                category + region + inherited,
                data = filter(socialcare, domain == "Safe"),
                link = "logit")
model_order_effective_covid <- clm(rating ~ cic_other + after_covid +
                                     category + region + inherited,
                data = filter(socialcare, domain == "Effective"),
                link = "logit")
model_order_caring_covid <- clm(rating ~ cic_other + after_covid + 
                                  category + region + inherited,
                data = filter(socialcare, domain == "Caring"),
                link = "logit")
model_order_well_led_covid <- clm(rating ~ cic_other + after_covid +
                                    category + region + inherited,
                data = filter(socialcare, domain == "Well-led"),
                link = "logit")
model_order_responsive_covid <- clm(rating ~ cic_other + after_covid +
                                      category + region + inherited,
                data = filter(socialcare, domain == "Responsive"),
                link = "logit")
ordinal_models_covid <- modelsummary(list("overall" = model_order_overall_covid, "safe" = model_order_safe_covid, 
                                    "effective" = model_order_effective_covid, "caring"= model_order_caring_covid, 
                                    "well-led" = model_order_well_led_covid, "responsive" = model_order_responsive_covid),
                               coef_omit = "region", exponentiate = F,
                               statistic = "({p.value}) {stars}")
ordinal_models_covid
overall safe effective caring well-led responsive
Inadequate|Req improv −5.397*** −4.999*** −6.333*** −7.445*** −4.795*** −7.122***
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) ***
Req improv|Good −2.497*** −2.077*** −2.443*** −3.764*** −1.905*** −2.610***
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) ***
Good|Outstanding 2.796*** 5.548*** 4.214*** 2.849*** 2.867*** 2.834***
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) ***
cic_otherNPO 0.307*** 0.450*** 0.350*** 0.275** 0.357*** 0.467***
(0.000) *** (0.000) *** (0.000) *** (0.002) ** (0.000) *** (0.000) ***
cic_otherGOV 0.290* 0.562** 0.337+ 0.291 0.316* 0.378*
(0.036) * (0.001) ** (0.065) + (0.106) (0.014) * (0.012) *
cic_otherSpin-out −0.079 0.325 0.316 0.164 0.120 −0.208
(0.884) (0.603) (0.658) (0.819) (0.812) (0.721)
cic_otherNon-spin-out 0.728* 0.994+ 0.128 1.007** 0.532 0.501
(0.037) * (0.051) + (0.777) (0.006) ** (0.113) (0.178)
cic_otherIND −0.339*** −0.248** −0.344** −0.097 −0.282*** −0.097
(0.000) *** (0.009) ** (0.002) ** (0.468) (0.001) *** (0.363)
after_covidTRUE −1.777*** −1.496*** −1.132*** −1.043*** −1.412*** −0.844***
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) ***
categoryresidential −0.376*** −0.419*** −0.386*** −0.586*** −0.293*** −0.243***
(0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) *** (0.000) ***
inheritedTRUE −0.320*** −0.334*** −0.398*** −0.460*** −0.237** −0.305**
(0.001) *** (0.001) *** (0.000) *** (0.001) *** (0.007) ** (0.004) **
Num.Obs. 12942 12966 12931 12927 12964 12953
AIC 15297.1 12508.6 10047.5 8808.8 17892.2 12549.2
BIC 15439.0 12650.6 10189.4 8950.7 18034.1 12691.1
RMSE 2.24 2.10 2.13 2.21 2.25 2.27