#packages for ordered logit
library(ordinal) # package for ordinal logit regression
library(brant) # brant test for the parallel assumption for ordered logit
library(MASS) # models that work with the brant test
library(tidyverse) # package for data cleaning and plotting
library(readxl) # package for reading excel file
library(broom) # extracting model summary as data frame
library(modelsummary) # deriving model tables
library(scales) # label percent
# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))
#import the raw data file
skills_cqc <- read_excel("data/SfC_CQC.xlsx")
form_full <- read_csv("data/form_full.csv")
Check the distribution of different forms
form_sub <- skills_cqc %>%
group_by(establishmentid) %>%
summarise(form = first(form)) %>%
count(form)
form_sub
## # A tibble: 6 × 2
## form n
## <chr> <int>
## 1 CIC 50
## 2 FPO 4763
## 3 GOV 368
## 4 IND 279
## 5 NA 3
## 6 NPO 1337
Derive the table to compare the distributions
form_full <- form_full %>%
select(form,
count_full = n)
form_compare <- form_sub %>%
filter(form != "NA") %>%
select(form,
count_sub = n) %>%
left_join(form_full, by = "form") %>%
mutate(ratio = count_sub/count_full)
form_compare
## # A tibble: 5 × 4
## form count_sub count_full ratio
## <chr> <int> <dbl> <dbl>
## 1 CIC 50 82 0.610
## 2 FPO 4763 10219 0.466
## 3 GOV 368 398 0.925
## 4 IND 279 833 0.335
## 5 NPO 1337 2028 0.659
Prepare data for analysis
#select relevant columns, rename and relabel
skills_cqc <- skills_cqc %>%
# recode legal form types to be more readable / easier to present
mutate(inherited = ifelse(inherited == "Y", TRUE, FALSE),
rating = recode(rating,
"Insufficient evidence to rate" = "NA",
"Requires improvement" = "Req improv")) %>%
# set the order of the values in the factors
mutate(form = ordered(form, levels = c("FPO", "NPO", "GOV", "CIC", "IND")),
# assume the order of the ratings as follows but need to double check with the source
rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding"))) %>%
# adding the rating data coded as numerical
mutate(rating_num = case_when(rating == "Inadequate" ~ 1,
rating == "Req improv" ~ 2,
rating == "Good" ~ 3,
rating == "Outstanding" ~ 4)) %>%
mutate(category = case_when(primary_cat == "Community based adult social care services" ~ "community",
primary_cat == "Residential social care" ~ "residential",
TRUE ~ as.character(NA)))
# show first several rows of the data set derived
head(skills_cqc)
## # A tibble: 6 × 13
## form cic_t…¹ care_…² prima…³ region servi…⁴ domain rating publication_date
## <ord> <chr> <chr> <chr> <chr> <chr> <chr> <ord> <dttm>
## 1 FPO NA Y Reside… East … Overall Safe Good 2020-03-03 00:00:00
## 2 FPO NA Y Reside… East … Overall Effec… Good 2020-03-03 00:00:00
## 3 FPO NA Y Reside… East … Overall Caring Good 2020-03-03 00:00:00
## 4 FPO NA Y Reside… East … Overall Respo… Good 2020-03-03 00:00:00
## 5 FPO NA Y Reside… East … Overall Well-… Good 2020-03-03 00:00:00
## 6 FPO NA Y Reside… East … Overall Overa… Good 2020-03-03 00:00:00
## # … with 4 more variables: inherited <lgl>, establishmentid <dbl>,
## # rating_num <dbl>, category <chr>, and abbreviated variable names ¹cic_type,
## # ²care_home, ³primary_cat, ⁴service_group
Check simple differences in means: seems consistent with our earlier findings
simple_model <- lm(rating_num ~ form, data = skills_cqc)
summary(simple_model)
##
## Call:
## lm(formula = rating_num ~ form, data = skills_cqc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.96515 0.03677 0.10505 0.10505 1.13277
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.894954 0.001729 1674.610 < 2e-16 ***
## formNPO 0.068274 0.004196 16.271 < 2e-16 ***
## formGOV 0.070192 0.008817 7.961 1.73e-15 ***
## formCIC 0.097109 0.018646 5.208 1.91e-07 ***
## formIND -0.027727 0.006216 -4.461 8.18e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4168 on 77707 degrees of freedom
## (6999 observations deleted due to missingness)
## Multiple R-squared: 0.004806, Adjusted R-squared: 0.004755
## F-statistic: 93.82 on 4 and 77707 DF, p-value: < 2.2e-16
Running simple difference of means between CIC sub-types
simple_cic <- lm(rating_num ~ cic_type, data = skills_cqc)
summary(simple_cic)
##
## Call:
## lm(formula = rating_num ~ cic_type, data = skills_cqc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.90582 0.09418 0.09418 0.09418 1.09418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.00855 0.02731 110.175 < 2e-16 ***
## cic_typeCIC_S -0.03077 0.03731 -0.825 0.409530
## cic_typeNA -0.10273 0.02735 -3.756 0.000173 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4177 on 77746 degrees of freedom
## (6962 observations deleted due to missingness)
## Multiple R-squared: 0.0002832, Adjusted R-squared: 0.0002575
## F-statistic: 11.01 on 2 and 77746 DF, p-value: 1.652e-05
Again CIC as for-profit with shares (S type) has lower rating, but not significant. The NA group (those are not CICs) difference is significant, as expected.
# run the model loops with nest() method
model_list <- skills_cqc %>%
group_by(domain) %>%
nest()%>%
mutate(ols_models = map(data,
~lm(rating_num ~ form + category + region + inherited,
data = .x)))
Derive the OLS results table
I first print out the order of the domain names to match with summary table.
print(model_list$domain) %>%
kableExtra::kable()
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 3 > 1' in coercion to 'logical(1)'
## [1] "Safe" "Effective" "Caring" "Responsive" "Well-led"
## [6] "Overall"
| x |
|---|
| Safe |
| Effective |
| Caring |
| Responsive |
| Well-led |
| Overall |
table_ols <- modelsummary(model_list$ols_models,
statistic = "({p.value}) {stars}")
table_ols
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | |
|---|---|---|---|---|---|---|
| (Intercept) | 2.789*** | 2.892*** | 3.016*** | 2.961*** | 2.814*** | 2.873*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| formNPO | 0.082*** | 0.040*** | 0.027*** | 0.064*** | 0.100*** | 0.082*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| formGOV | 0.097*** | 0.039* | 0.029+ | 0.053** | 0.097*** | 0.081*** |
| (0.000) *** | (0.023) * | (0.061) + | (0.006) ** | (0.000) *** | (0.001) *** | |
| formCIC | 0.100* | 0.019 | 0.079* | 0.039 | 0.109+ | 0.104* |
| (0.030) * | (0.604) | (0.014) * | (0.339) | (0.055) + | (0.043) * | |
| formIND | −0.019 | −0.033** | −0.004 | −0.007 | −0.040* | −0.036* |
| (0.214) | (0.007) ** | (0.712) | (0.621) | (0.033) * | (0.035) * | |
| categoryresidential | −0.064*** | −0.037*** | −0.047*** | −0.030*** | −0.067*** | −0.070*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| regionEast of England | 0.047** | 0.041** | 0.015 | 0.035* | 0.030 | 0.043* |
| (0.004) ** | (0.002) ** | (0.189) | (0.016) * | (0.132) | (0.017) * | |
| regionLondon | 0.048** | 0.054*** | 0.003 | −0.003 | 0.025 | 0.019 |
| (0.004) ** | (0.000) *** | (0.807) | (0.820) | (0.206) | (0.308) | |
| regionNorth East | 0.123*** | 0.095*** | 0.069*** | 0.082*** | 0.155*** | 0.133*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| regionNorth West | 0.067*** | 0.064*** | 0.022* | 0.040** | 0.052** | 0.052** |
| (0.000) *** | (0.000) *** | (0.050) * | (0.004) ** | (0.007) ** | (0.003) ** | |
| regionSouth East | 0.098*** | 0.049*** | 0.040*** | 0.051*** | 0.057** | 0.062*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.002) ** | (0.000) *** | |
| regionSouth West | 0.124*** | 0.094*** | 0.085*** | 0.091*** | 0.144*** | 0.142*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| regionWest Midlands | 0.019 | 0.020 | −0.015 | 0.000 | −0.075*** | −0.030+ |
| (0.233) | (0.114) | (0.173) | (0.982) | (0.000) *** | (0.092) + | |
| regionYorkshire and The Humber | −0.012 | 0.031* | 0.036** | 0.008 | −0.033 | −0.025 |
| (0.467) | (0.020) * | (0.002) ** | (0.576) | (0.115) | (0.186) | |
| inheritedTRUE | −0.018 | −0.032** | −0.032** | −0.032* | −0.018 | −0.020 |
| (0.248) | (0.008) ** | (0.003) ** | (0.020) * | (0.346) | (0.260) | |
| Num.Obs. | 12966 | 12931 | 12927 | 12953 | 12964 | 12942 |
| R2 | 0.023 | 0.013 | 0.017 | 0.013 | 0.025 | 0.023 |
| R2 Adj. | 0.021 | 0.012 | 0.016 | 0.012 | 0.024 | 0.022 |
| AIC | 14402.5 | 8181.6 | 5099.3 | 11037.1 | 19530.8 | 16998.2 |
| BIC | 14522.0 | 8301.1 | 5218.8 | 11156.6 | 19650.3 | 17117.7 |
| Log.Lik. | −7185.242 | −4074.798 | −2533.650 | −5502.563 | −9749.405 | −8483.091 |
| RMSE | 0.42 | 0.33 | 0.29 | 0.37 | 0.51 | 0.47 |