Exploring Skills for Care Data

Setting up

#packages for ordered logit
library(ordinal) # package for ordinal logit regression
library(brant) # brant test for the parallel assumption for ordered logit
library(MASS) # models that work with the brant test

library(tidyverse) # package for data cleaning and plotting
library(readxl) # package for reading excel file
library(broom) # extracting model summary as data frame
library(modelsummary) # deriving model tables
library(scales) # label percent

# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))

Import data and exploration

#import the raw data file
skills_cqc <- read_excel("data/SfC_CQC.xlsx")
form_full <- read_csv("data/form_full.csv")

Check the distribution of different forms

form_sub <- skills_cqc %>% 
  group_by(establishmentid) %>% 
  summarise(form = first(form)) %>% 
  count(form)
form_sub

## # A tibble: 6 × 2
##   form      n
##   <chr> <int>
## 1 CIC      50
## 2 FPO    4763
## 3 GOV     368
## 4 IND     279
## 5 NA        3
## 6 NPO    1337

Derive the table to compare the distributions

form_full <- form_full %>% 
  select(form, 
         count_full = n)

form_compare <- form_sub %>% 
  filter(form != "NA") %>% 
  select(form, 
         count_sub = n) %>% 
  left_join(form_full, by = "form") %>% 
  mutate(ratio = count_sub/count_full) 

form_compare

## # A tibble: 5 × 4
##   form  count_sub count_full ratio
##   <chr>     <int>      <dbl> <dbl>
## 1 CIC          50         82 0.610
## 2 FPO        4763      10219 0.466
## 3 GOV         368        398 0.925
## 4 IND         279        833 0.335
## 5 NPO        1337       2028 0.659

Run Toy Models

Prepare data for analysis

#select relevant columns, rename and relabel 
skills_cqc <- skills_cqc %>% 
# recode legal form types to be more readable / easier to present
  mutate(inherited = ifelse(inherited == "Y", TRUE, FALSE),
         rating = recode(rating, 
                         "Insufficient evidence to rate" = "NA",
                         "Requires improvement" = "Req improv")) %>% 
  # set the order of the values in the factors 
  mutate(form = ordered(form, levels = c("FPO", "NPO", "GOV", "CIC", "IND")),
         
  # assume the order of the ratings as follows but need to double check with the source 
         rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding"))) %>%
  
  # adding the rating data coded as numerical
  mutate(rating_num = case_when(rating == "Inadequate" ~ 1,
                                rating == "Req improv" ~ 2,
                                rating == "Good" ~ 3,
                                rating == "Outstanding" ~ 4)) %>% 
  
  mutate(category = case_when(primary_cat == "Community based adult social care services" ~ "community",
                              primary_cat == "Residential social care" ~ "residential",
                              TRUE ~ as.character(NA)))

# show first several rows of the data set derived 
head(skills_cqc)

## # A tibble: 6 × 13
##   form  cic_t…¹ care_…² prima…³ region servi…⁴ domain rating publication_date   
##   <ord> <chr>   <chr>   <chr>   <chr>  <chr>   <chr>  <ord>  <dttm>             
## 1 FPO   NA      Y       Reside… East … Overall Safe   Good   2020-03-03 00:00:00
## 2 FPO   NA      Y       Reside… East … Overall Effec… Good   2020-03-03 00:00:00
## 3 FPO   NA      Y       Reside… East … Overall Caring Good   2020-03-03 00:00:00
## 4 FPO   NA      Y       Reside… East … Overall Respo… Good   2020-03-03 00:00:00
## 5 FPO   NA      Y       Reside… East … Overall Well-… Good   2020-03-03 00:00:00
## 6 FPO   NA      Y       Reside… East … Overall Overa… Good   2020-03-03 00:00:00
## # … with 4 more variables: inherited <lgl>, establishmentid <dbl>,
## #   rating_num <dbl>, category <chr>, and abbreviated variable names ¹cic_type,
## #   ²care_home, ³primary_cat, ⁴service_group

Check simple differences in means: seems consistent with our earlier findings

simple_model <- lm(rating_num ~ form, data = skills_cqc)
summary(simple_model)

## 
## Call:
## lm(formula = rating_num ~ form, data = skills_cqc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.96515  0.03677  0.10505  0.10505  1.13277 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  2.894954   0.001729 1674.610  < 2e-16 ***
## formNPO      0.068274   0.004196   16.271  < 2e-16 ***
## formGOV      0.070192   0.008817    7.961 1.73e-15 ***
## formCIC      0.097109   0.018646    5.208 1.91e-07 ***
## formIND     -0.027727   0.006216   -4.461 8.18e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4168 on 77707 degrees of freedom
##   (6999 observations deleted due to missingness)
## Multiple R-squared:  0.004806,   Adjusted R-squared:  0.004755 
## F-statistic: 93.82 on 4 and 77707 DF,  p-value: < 2.2e-16

Running simple difference of means between CIC sub-types

simple_cic <- lm(rating_num ~ cic_type, data = skills_cqc)
summary(simple_cic)

## 
## Call:
## lm(formula = rating_num ~ cic_type, data = skills_cqc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.90582  0.09418  0.09418  0.09418  1.09418 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.00855    0.02731 110.175  < 2e-16 ***
## cic_typeCIC_S -0.03077    0.03731  -0.825 0.409530    
## cic_typeNA    -0.10273    0.02735  -3.756 0.000173 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4177 on 77746 degrees of freedom
##   (6962 observations deleted due to missingness)
## Multiple R-squared:  0.0002832,  Adjusted R-squared:  0.0002575 
## F-statistic: 11.01 on 2 and 77746 DF,  p-value: 1.652e-05

Again CIC as for-profit with shares (S type) has lower rating, but not significant. The NA group (those are not CICs) difference is significant, as expected.

Run the same models as in the full CQC data set

# run the model loops with nest() method
model_list <- skills_cqc %>% 
  group_by(domain) %>% 
  nest()%>% 
  mutate(ols_models = map(data, 
                          ~lm(rating_num ~ form + category + region + inherited, 
                          data = .x)))

Derive the OLS results table

I first print out the order of the domain names to match with summary table.

print(model_list$domain) %>% 
  kableExtra::kable()

## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 3 > 1' in coercion to 'logical(1)'

## [1] "Safe"       "Effective"  "Caring"     "Responsive" "Well-led"  
## [6] "Overall"

x
Safe
Effective
Caring
Responsive
Well-led
Overall

table_ols <- modelsummary(model_list$ols_models,
             statistic = "({p.value}) {stars}")
table_ols

	Model 1	Model 2	Model 3	Model 4	Model 5	Model 6
(Intercept)	2.789***	2.892***	3.016***	2.961***	2.814***	2.873***
	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***
formNPO	0.082***	0.040***	0.027***	0.064***	0.100***	0.082***
	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***
formGOV	0.097***	0.039*	0.029+	0.053**	0.097***	0.081***
	(0.000) ***	(0.023) *	(0.061) +	(0.006) **	(0.000) ***	(0.001) ***
formCIC	0.100*	0.019	0.079*	0.039	0.109+	0.104*
	(0.030) *	(0.604)	(0.014) *	(0.339)	(0.055) +	(0.043) *
formIND	−0.019	−0.033**	−0.004	−0.007	−0.040*	−0.036*
	(0.214)	(0.007) **	(0.712)	(0.621)	(0.033) *	(0.035) *
categoryresidential	−0.064***	−0.037***	−0.047***	−0.030***	−0.067***	−0.070***
	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***
regionEast of England	0.047**	0.041**	0.015	0.035*	0.030	0.043*
	(0.004) **	(0.002) **	(0.189)	(0.016) *	(0.132)	(0.017) *
regionLondon	0.048**	0.054***	0.003	−0.003	0.025	0.019
	(0.004) **	(0.000) ***	(0.807)	(0.820)	(0.206)	(0.308)
regionNorth East	0.123***	0.095***	0.069***	0.082***	0.155***	0.133***
	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***
regionNorth West	0.067***	0.064***	0.022*	0.040**	0.052**	0.052**
	(0.000) ***	(0.000) ***	(0.050) *	(0.004) **	(0.007) **	(0.003) **
regionSouth East	0.098***	0.049***	0.040***	0.051***	0.057**	0.062***
	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***	(0.002) **	(0.000) ***
regionSouth West	0.124***	0.094***	0.085***	0.091***	0.144***	0.142***
	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***	(0.000) ***
regionWest Midlands	0.019	0.020	−0.015	0.000	−0.075***	−0.030+
	(0.233)	(0.114)	(0.173)	(0.982)	(0.000) ***	(0.092) +
regionYorkshire and The Humber	−0.012	0.031*	0.036**	0.008	−0.033	−0.025
	(0.467)	(0.020) *	(0.002) **	(0.576)	(0.115)	(0.186)
inheritedTRUE	−0.018	−0.032**	−0.032**	−0.032*	−0.018	−0.020
	(0.248)	(0.008) **	(0.003) **	(0.020) *	(0.346)	(0.260)
Num.Obs.	12966	12931	12927	12953	12964	12942
R2	0.023	0.013	0.017	0.013	0.025	0.023
R2 Adj.	0.021	0.012	0.016	0.012	0.024	0.022
AIC	14402.5	8181.6	5099.3	11037.1	19530.8	16998.2
BIC	14522.0	8301.1	5218.8	11156.6	19650.3	17117.7
Log.Lik.	−7185.242	−4074.798	−2533.650	−5502.563	−9749.405	−8483.091
RMSE	0.42	0.33	0.29	0.37	0.51	0.47

Exploring Skills for Care Data

Meng Ye

9/29/2022

Setting up

Import data and exploration

Run Toy Models

Run the same models as in the full CQC data set