# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))

This chunk loads all the packages to use

#packages for ordered logit
library(tidyverse) # package for data cleaning and plotting
library(readxl) # package for reading excel file
library(broom) # extracting model summary as data frame
library(modelsummary) # deriving model tables
library(scales) # label percent
library(lubridate) # working with dates
library(marginaleffects) #to calculate marginal effects

library(ordinal) # package for ordinal logit regression

Data Preparation

First, import the sampled and coded data set

#import the raw data file
socialcare_sk <- read_csv("data/cqc_sfc_cleaned.csv")
socialcare <- socialcare_sk %>% 
# recode legal form types to be more readable / easier to present
  mutate(inherited = ifelse(inherited == "Y", TRUE, FALSE),
         rating = recode(rating, 
                         "Insufficient evidence to rate" = "NA",
                         "Requires improvement" = "Req improv")) %>% 
  # set the order of the values in the factors 
  mutate(form = ordered(form, levels = c("FPO", "NPO", "GOV", "CIC", "IND")),
         
  # assume the order of the ratings as follows but need to double check with the source 
         rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding"))) %>%
  
  # adding the rating data coded as numerical
  mutate(rating_num = case_when(rating == "Inadequate" ~ 1,
                                rating == "Req improv" ~ 2,
                                rating == "Good" ~ 3,
                                rating == "Outstanding" ~ 4)) %>% 
  
  mutate(category = case_when(primary_cat == "Community based adult social care services" ~ "community",
                              primary_cat == "Residential social care" ~ "residential",
                              TRUE ~ as.character(NA)),

         # deriving year column and dummy variable for before_covid
         year = year(publication_date),
         after_covid = ifelse(year >= 2020, TRUE, FALSE),
         before_covid = ifelse(year <= 2019, TRUE, FALSE)) %>%

  # converting the ordinal variable to numerical 
  mutate(rating_num = case_when(rating == "Inadequate" ~ 1,
                                rating == "Req improv" ~ 2,
                                rating == "Good" ~ 3,
                                rating == "Outstanding" ~ 4)) %>% 
  # derive the rating dummy
  mutate(rating_higher = ifelse(rating_num > 2, 1, 0))

Discriptive statstics

Mean and Standard Deviation for numerical variables

table1 <- datasummary((turnover + staff_level 
                       + totalstaff + age) * (Mean + SD) ~ (form + 1),
            data = socialcare, fmt = 1)
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 3 > 1' in coercion to 'logical(1)'
table1
FPO NPO GOV CIC IND All
turnover Mean 32.6 24.6 12.6 29.5 20.1 29.2
SD 42.9 33.0 11.9 28.0 27.4 39.6
staff_level Mean 148.9 210.7 222.0 189.2 140.3 164.7
SD 175.1 254.7 247.2 179.2 290.5 204.6
totalstaff Mean 42.4 44.4 52.1 54.9 22.3 42.5
SD 46.3 58.6 60.1 93.2 17.3 49.6
age Mean 9.0 10.3 10.5 8.2 9.5 9.4
SD 5.0 5.3 3.6 4.2 5.2 5.0
table2 <- datasummary(service_type_selected ~ (form + 1),
            data = socialcare, fmt = 0)
table2
service_type_selected FPO NPO GOV CIC IND All
care home w/ nursing 6636 1134 192 30 210 8202
care home w/o nursing 13074 4992 1002 48 1260 20400
Domicilary Care 9006 1104 564 180 294 11148
Supported Living 1242 762 180 36 0 2220
table4 <- datasummary(size_type ~ (form + 1),
            data = socialcare, fmt = 0)
table4
size_type FPO NPO GOV CIC IND All
large 9552 5760 2238 102 0 17652
medium 7104 1524 18 90 138 8874
micro 702 84 0 6 168 960
small 11094 834 6 78 1284 13320

```