data-analysis

Introduction

The current version re-run the codes of the data analysis.

# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))

Load packages

library(tidyverse) # package for data cleaning and plotting
library(readxl)
library(modelsummary)
library(ordinal) # package for ordinal logit regression
library(brant) # brant test for the parallel assumption for ordered logit
library(MASS) # models that work with the brant test
library(broom) # extracting model summary as data frame
library(modelsummary) # deriving model tables
library(scales) # label percent
library(lubridate) # working with dates
library(marginaleffects) #to calculate marginal effects
library(gt) # to format tables
library(here) # work with directory
set.seed(5432)

Merging CQC and financial data

# import location level full data
rating<- read_csv(here("cleaned_data","cic_all_ratings_2019.csv"))
finance <- read_csv(here("cleaned_data","cls_finance.csv"))

finance1 <- finance %>% 
  mutate(id_digit = as.numeric(str_extract(project_id, "\\d+"))) %>%
  arrange(id_digit)

# checking the largest number for the project_id in the finance data set 
summary(finance1$id_digit)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0   228.0   460.0   610.1   913.2  5181.0

#merging the data
cic2019 <- rating %>% 
  left_join(finance1, by = "project_id")

data cleanning

#select relevant columns, rename and relabel 
cic_cleaned <- cic2019 %>% 
  # recode legal form types to be more readable / easier to present
  mutate(# inherited = ifelse(inherited == "Y", TRUE, FALSE),
         rating = recode(rating, 
                         "Insufficient evidence to rate" = "NA",
                         "Requires improvement" = "Req improv"),
         date = ymd(publication_date)) %>% 
  
  # assign order in the rating levels
  mutate(rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding")),
         social_care = ifelse(type == "Social Care Org", "social care", "healthcare")) %>% 
  
  
  # creating a new dummy variable for facility category
  mutate(founded = as.numeric(founded),
         year = year(date),
         age = year - founded, 
         Year = factor(year)) %>% 
  mutate(cls = ifelse(CLS == 1, "CLS", "CLG"),
         totalequity = as.numeric(totalequity),
         totalequity_std = scale(totalequity, center = TRUE, scale = TRUE))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `founded = as.numeric(founded)`.
## Caused by warning:
## ! NAs introduced by coercion

datasummary_crosstab(
  formula = cls ~ rating,
  data = cic_cleaned
)

cls		Inadequate	Req improv	Good	Outstanding	All
CLG	N	13	50	412	52	527
	% row	2.5	9.5	78.2	9.9	100.0
CLS	N	44	98	1078	134	1354
	% row	3.2	7.2	79.6	9.9	100.0
All	N	57	151	1502	186	1896
	% row	3.0	8.0	79.2	9.8	100.0

datasummary_crosstab(
  formula = cls * spinout ~ rating,
  data = cic_cleaned
)

cls	spinout		Inadequate	Req improv	Good	Outstanding	All
CLG	0	N	10	27	254	32	323
		% row	3.1	8.4	78.6	9.9	100.0
	1	N	3	23	158	20	204
		% row	1.5	11.3	77.5	9.8	100.0
CLS	0	N	25	46	643	92	806
		% row	3.1	5.7	79.8	11.4	100.0
	1	N	19	52	435	42	548
		% row	3.5	9.5	79.4	7.7	100.0
	All	N	57	151	1502	186	1896
		% row	3.0	8.0	79.2	9.8	100.0

regression analysis

models without equity variable

model_order_overall <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Overall"),
                link = "logit")

model_order_safe <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Safe"),
                link = "logit")
model_order_effective <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Effective"),
                link = "logit")
model_order_caring <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Caring"),
                link = "logit")
model_order_well_led <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Well-led"),
                link = "logit")
model_order_responsive <- clm(rating ~ cls  + spinout + social_care + age + dissolved,
                data = filter(cic_cleaned, domain == "Responsive"),
                link = "logit")

ordinal_models <-
  modelsummary(
    list(
      "overall" = model_order_overall,
      "safe" = model_order_safe,
      "effective" = model_order_effective,
      "caring" = model_order_caring,
      "well-led" = model_order_well_led,
      "responsive" = model_order_responsive
    ),
    coef_omit = "region",
    exponentiate = F,
    statistic = "({p.value}) {stars}")
ordinal_models

	overall	safe	effective	caring	well-led	responsive
Inadequate\|Req improv	−3.485	−3.363	−3.775		−3.510
	(<0.001) ***	(<0.001) ***	(<0.001) ***		(<0.001) ***
Req improv\|Good	−2.496	−1.536	−2.316	−5.337	−2.021	−3.910
	(<0.001) ***	(0.003) **	(<0.001) ***	(<0.001) ***	(<0.001) ***	(<0.001) ***
Good\|Outstanding	1.435	4.103	2.803	0.674	1.885	1.485
	(<0.001) ***	(<0.001) ***	(<0.001) ***	(0.202)	(<0.001) ***	(0.007) **
clsCLS	0.483	0.564	0.273	−0.329	−0.056	−0.135
	(0.072) +	(0.102)	(0.451)	(0.364)	(0.857)	(0.730)
spinout	−0.386	−0.305	−0.446	−0.189	−0.325	−0.681
	(0.064) +	(0.370)	(0.214)	(0.588)	(0.277)	(0.082) +
social_caresocial care	0.096	0.947	0.153	−0.817	0.043	−0.093
	(0.716)	(0.016) *	(0.677)	(0.040) *	(0.888)	(0.810)
age	−0.119	−0.045	−0.050	−0.070	−0.023	−0.064
	(<0.001) ***	(0.354)	(0.275)	(0.207)	(0.587)	(0.247)
dissolved	0.212	−0.972	0.370	0.129	0.740	0.439
	(0.607)	(0.032) *	(0.530)	(0.817)	(0.131)	(0.458)
Num.Obs.	540	261	261	261	261	261
AIC	906.6	340.0	332.5	274.7	445.5	269.3
BIC	940.9	368.5	361.0	299.6	474.1	294.3
RMSE	2.45	2.17	2.23	1.55	2.42	1.40

models with equity variables

Due to the large range/dispersion of the fiancial data. I standardize the totalequity variable to enable the models to run.

summary(cic_cleaned$totalequity)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##   -23436    53934   653284  2892894  3613364 36881694       57

eq_order_overall <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Overall"),
                link = "logit")

eq_order_safe <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Safe"),
                link = "logit")
eq_order_effective <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Effective"),
                link = "logit")
eq_order_caring <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Caring"),
                link = "logit")
eq_order_well_led <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Well-led"),
                link = "logit")
eq_order_responsive <- clm(rating ~ cls  + spinout + social_care + age + dissolved + totalequity_std,
                data = filter(cic_cleaned, domain == "Responsive"),
                link = "logit")

eq_models <-
  modelsummary(
    list(
      "overall" = eq_order_overall,
      "safe" = eq_order_safe,
      "effective" = eq_order_effective,
      "caring" = eq_order_caring,
      "well-led" = eq_order_well_led,
      "responsive" = eq_order_responsive
    ),
    coef_omit = "region",
    exponentiate = F,
    statistic = "({p.value}) {stars}")
eq_models

	overall	safe	effective	caring	well-led	responsive
Inadequate\|Req improv	−3.632	−3.792	−3.868		−3.763
	(<0.001) ***	(<0.001) ***	(<0.001) ***		(<0.001) ***
Req improv\|Good	−2.670	−1.992	−2.404	−5.353	−2.328	−4.106
	(<0.001) ***	(<0.001) ***	(<0.001) ***	(<0.001) ***	(<0.001) ***	(<0.001) ***
Good\|Outstanding	1.308	3.868	2.713	0.651	1.652	1.316
	(<0.001) ***	(<0.001) ***	(<0.001) ***	(0.232)	(<0.001) ***	(0.019) *
clsCLS	0.495	0.611	0.246	−0.311	−0.028	−0.151
	(0.066) +	(0.083) +	(0.496)	(0.392)	(0.929)	(0.698)
spinout	−0.507	−0.682	−0.545	−0.191	−0.530	−0.858
	(0.026) *	(0.066) +	(0.150)	(0.602)	(0.099) +	(0.041) *
social_caresocial care	0.081	0.960	0.184	−0.842	−0.004	−0.074
	(0.760)	(0.017) *	(0.619)	(0.035) *	(0.989)	(0.849)
age	−0.132	−0.078	−0.057	−0.072	−0.047	−0.079
	(<0.001) ***	(0.094) +	(0.219)	(0.204)	(0.294)	(0.154)
dissolved	0.240	−0.857	0.449	0.104	0.812	0.541
	(0.566)	(0.066) +	(0.450)	(0.853)	(0.104)	(0.364)
totalequity_std	0.092	0.446	0.163	−0.038	0.174	0.238
	(0.356)	(0.027) *	(0.382)	(0.851)	(0.274)	(0.219)
Num.Obs.	538	259	259	259	259	259
AIC	895.3	327.6	333.0	275.6	436.8	269.2
BIC	933.8	359.6	365.1	304.1	468.8	297.7
RMSE	2.45	2.16	2.23	1.55	2.41	1.40

data-analysis

Meng Ye

2024-08-24

Introduction

Load packages

Merging CQC and financial data

data cleanning

regression analysis

models without equity variable

models with equity variables