Addtional Analysis with SfC and CQC data

# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))

This chunk loads all the packages to use

#packages for ordered logit
library(tidyverse) # package for data cleaning and plotting
library(readxl) # package for reading excel file
library(broom) # extracting model summary as data frame
library(modelsummary) # deriving model tables
library(scales) # label percent
library(lubridate) # working with dates
library(marginaleffects) #to calculate marginal effects

library(ordinal) # package for ordinal logit regression

Data Preparation

First, import the sampled and coded data set

#import the raw data file
socialcare_sk <- read_csv("data/cqc_sfc_cleaned.csv")

socialcare <- socialcare_sk %>% 
# recode legal form types to be more readable / easier to present
  mutate(inherited = ifelse(inherited == "Y", TRUE, FALSE),
         rating = recode(rating, 
                         "Insufficient evidence to rate" = "NA",
                         "Requires improvement" = "Req improv")) %>% 
  # set the order of the values in the factors 
  mutate(form = ordered(form, levels = c("FPO", "NPO", "GOV", "CIC", "IND")),
         
  # assume the order of the ratings as follows but need to double check with the source 
         rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding"))) %>%
  
  # adding the rating data coded as numerical
  mutate(rating_num = case_when(rating == "Inadequate" ~ 1,
                                rating == "Req improv" ~ 2,
                                rating == "Good" ~ 3,
                                rating == "Outstanding" ~ 4)) %>% 
  
  mutate(category = case_when(primary_cat == "Community based adult social care services" ~ "community",
                              primary_cat == "Residential social care" ~ "residential",
                              TRUE ~ as.character(NA)),

         # deriving year column and dummy variable for before_covid
         year = year(publication_date),
         after_covid = ifelse(year >= 2020, TRUE, FALSE),
         before_covid = ifelse(year <= 2019, TRUE, FALSE)) %>%

  # converting the ordinal variable to numerical 
  mutate(rating_num = case_when(rating == "Inadequate" ~ 1,
                                rating == "Req improv" ~ 2,
                                rating == "Good" ~ 3,
                                rating == "Outstanding" ~ 4)) %>% 
  # derive the rating dummy
  mutate(rating_higher = ifelse(rating_num > 2, 1, 0))

Discriptive statstics

Mean and Standard Deviation for numerical variables

table1 <- datasummary((turnover + staff_level 
                       + totalstaff + age) * (Mean + SD) ~ (form + 1),
            data = socialcare, fmt = 1)

## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 3 > 1' in coercion to 'logical(1)'

table1

		FPO	NPO	GOV	CIC	IND	All
turnover	Mean	32.6	24.6	12.6	29.5	20.1	29.2
	SD	42.9	33.0	11.9	28.0	27.4	39.6
staff_level	Mean	148.9	210.7	222.0	189.2	140.3	164.7
	SD	175.1	254.7	247.2	179.2	290.5	204.6
totalstaff	Mean	42.4	44.4	52.1	54.9	22.3	42.5
	SD	46.3	58.6	60.1	93.2	17.3	49.6
age	Mean	9.0	10.3	10.5	8.2	9.5	9.4
	SD	5.0	5.3	3.6	4.2	5.2	5.0

table2 <- datasummary(service_type_selected ~ (form + 1),
            data = socialcare, fmt = 0)
table2

service_type_selected	FPO	NPO	GOV	CIC	IND	All
care home w/ nursing	6636	1134	192	30	210	8202
care home w/o nursing	13074	4992	1002	48	1260	20400
Domicilary Care	9006	1104	564	180	294	11148
Supported Living	1242	762	180	36	0	2220

table4 <- datasummary(size_type ~ (form + 1),
            data = socialcare, fmt = 0)
table4

size_type	FPO	NPO	GOV	CIC	IND	All
large	9552	5760	2238	102	0	17652
medium	7104	1524	18	90	138	8874
micro	702	84	0	6	168	960
small	11094	834	6	78	1284	13320

```

Addtional Analysis with SfC and CQC data

Meng Ye

2/19/2023

Data Preparation

Discriptive statstics