# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))
This chunk loads all the packages to use
#packages for ordered logit
library(tidyverse) # package for data cleaning and plotting
library(readxl) # package for reading excel file
library(broom) # extracting model summary as data frame
library(modelsummary) # deriving model tables
library(scales) # label percent
library(lubridate) # working with dates
library(marginaleffects) #to calculate marginal effects
library(ordinal) # package for ordinal logit regression
First, import the sampled and coded data set
#import the raw data file
socialcare_sk <- read_csv("data/cqc_sfc_cleaned.csv")
socialcare <- socialcare_sk %>%
# recode legal form types to be more readable / easier to present
mutate(inherited = ifelse(inherited == "Y", TRUE, FALSE),
rating = recode(rating,
"Insufficient evidence to rate" = "NA",
"Requires improvement" = "Req improv")) %>%
# set the order of the values in the factors
mutate(form = ordered(form, levels = c("FPO", "NPO", "GOV", "CIC", "IND")),
# assume the order of the ratings as follows but need to double check with the source
rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding"))) %>%
# adding the rating data coded as numerical
mutate(rating_num = case_when(rating == "Inadequate" ~ 1,
rating == "Req improv" ~ 2,
rating == "Good" ~ 3,
rating == "Outstanding" ~ 4)) %>%
mutate(category = case_when(primary_cat == "Community based adult social care services" ~ "community",
primary_cat == "Residential social care" ~ "residential",
TRUE ~ as.character(NA)),
# deriving year column and dummy variable for before_covid
year = year(publication_date),
after_covid = ifelse(year >= 2020, TRUE, FALSE),
before_covid = ifelse(year <= 2019, TRUE, FALSE)) %>%
# converting the ordinal variable to numerical
mutate(rating_num = case_when(rating == "Inadequate" ~ 1,
rating == "Req improv" ~ 2,
rating == "Good" ~ 3,
rating == "Outstanding" ~ 4)) %>%
# derive the rating dummy
mutate(rating_higher = ifelse(rating_num > 2, 1, 0))
Mean and Standard Deviation for numerical variables
table1 <- datasummary((turnover + staff_level
+ totalstaff + age) * (Mean + SD) ~ (form + 1),
data = socialcare, fmt = 1)
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 3 > 1' in coercion to 'logical(1)'
table1
| FPO | NPO | GOV | CIC | IND | All | ||
|---|---|---|---|---|---|---|---|
| turnover | Mean | 32.6 | 24.6 | 12.6 | 29.5 | 20.1 | 29.2 |
| SD | 42.9 | 33.0 | 11.9 | 28.0 | 27.4 | 39.6 | |
| staff_level | Mean | 148.9 | 210.7 | 222.0 | 189.2 | 140.3 | 164.7 |
| SD | 175.1 | 254.7 | 247.2 | 179.2 | 290.5 | 204.6 | |
| totalstaff | Mean | 42.4 | 44.4 | 52.1 | 54.9 | 22.3 | 42.5 |
| SD | 46.3 | 58.6 | 60.1 | 93.2 | 17.3 | 49.6 | |
| age | Mean | 9.0 | 10.3 | 10.5 | 8.2 | 9.5 | 9.4 |
| SD | 5.0 | 5.3 | 3.6 | 4.2 | 5.2 | 5.0 |
table2 <- datasummary(service_type_selected ~ (form + 1),
data = socialcare, fmt = 0)
table2
| service_type_selected | FPO | NPO | GOV | CIC | IND | All |
|---|---|---|---|---|---|---|
| care home w/ nursing | 6636 | 1134 | 192 | 30 | 210 | 8202 |
| care home w/o nursing | 13074 | 4992 | 1002 | 48 | 1260 | 20400 |
| Domicilary Care | 9006 | 1104 | 564 | 180 | 294 | 11148 |
| Supported Living | 1242 | 762 | 180 | 36 | 0 | 2220 |
table4 <- datasummary(size_type ~ (form + 1),
data = socialcare, fmt = 0)
table4
| size_type | FPO | NPO | GOV | CIC | IND | All |
|---|---|---|---|---|---|---|
| large | 9552 | 5760 | 2238 | 102 | 0 | 17652 |
| medium | 7104 | 1524 | 18 | 90 | 138 | 8874 |
| micro | 702 | 84 | 0 | 6 | 168 | 960 |
| small | 11094 | 834 | 6 | 78 | 1284 | 13320 |
```