This is the data analysis RMarkdown file for 1) merging in spin-out data, and 2)initial data analysis with the additional spin-out variable.
Please read the subtitles and notes added as normal text in this document. Blocks with darker backgrounds are code chunks, mostly with their outputs. I tried to add #comments within the chunck just before the code line to explain the purpose of the code line. Meanwhile I explain the purpose of the section in the texts, so that is where to find information to give you fuller pictures.
# environment setup to run ordered logit properly
options(contrasts = rep("contr.treatment", 2))
This chunk loads all the packages to use
#packages for ordered logit
library(ordinal) # package for ordinal logit regression
library(tidyverse) # package for data cleaning and plotting
library(readxl) # package for reading excel file
library(broom) # extracting model summary as data frame
library(modelsummary) # deriving model tables
library(scales) # label percent
library(lubridate) # working with dates
library(marginaleffects) #to calculate marginal effects
#import the raw data file
cic_spinout <- read_csv("data/CIC_spinout.csv")
Create the subset with provider_id and
spin_out indicator
#Derive the spin-out indicator sub-set
spinout <- cic_spinout %>%
filter(form == "CIC") %>%
select(provider_id, spin_out) %>%
group_by(provider_id) %>%
summarise(spin_out = first(spin_out)) %>%
ungroup() %>%
mutate(spin_out = ifelse(spin_out == "no", "Non-spin-out", "Spin-out"))
head(spinout)
## # A tibble: 6 × 2
## provider_id spin_out
## <chr> <chr>
## 1 1-101641089 Non-spin-out
## 2 1-101652758 Non-spin-out
## 3 1-101657333 Spin-out
## 4 1-101691942 Non-spin-out
## 5 1-119229201 Non-spin-out
## 6 1-125892596 Non-spin-out
#import the raw data file
socialcare_raw <- read_csv("data/sample_new_cleaned.csv")
Assign orders to the ordinal level variables and name the organizational form in a reader-friendly way.
# (self-note: do not replicate this chunk, dataframe name changed)
#select relevant columns, rename and relabel
socialcare_sorted <- socialcare_raw %>%
# recode legal form types to be more readable / easier to present
mutate(location_id = factor(location_id),
form = case_when(form_num == 1 ~ "FPO",
form_num == 2 ~ "NPO",
form_num == 3 ~ "GOV",
form_num == 4 ~ "CIC",
form_num == 5 ~ "IND",
TRUE ~ NA_character_),
inherited = ifelse(inherited == "Y", TRUE, FALSE),
rating = recode(rating,
"Insufficient evidence to rate" = "NA",
"Requires improvement" = "Req improv"),
publication_date = dmy(publication_date)) %>%
# set the order of the values in the factors
mutate(form = fct_relevel(form, levels = c("FPO", "NPO", "GOV", "CIC", "IND")),
# assume the order of the ratings as follows but need to double check with the source
rating = ordered(rating, levels = c("Inadequate","Req improv", "Good", "Outstanding"))) %>%
# creating a new dummy variable for facility category
mutate(category = case_when(primary_cat == "Community based adult social care services" ~ "community",
primary_cat == "Residential social care" ~ "residential",
TRUE ~ as.character(NA)),
# deriving year column and dummy variable for before_covid
year = year(publication_date),
after_covid = ifelse(year >= 2020, TRUE, FALSE),
before_covid = ifelse(year <= 2019, TRUE, FALSE)) %>%
# converting the ordinal variable to numerical
mutate(rating_num = case_when(rating == "Inadequate" ~ 1,
rating == "Req improv" ~ 2,
rating == "Good" ~ 3,
rating == "Outstanding" ~ 4)) %>%
# derive the rating dummy
mutate(rating_higher = ifelse(rating_num > 2, 1, 0))
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
# show first several rows of the data set derived
head(socialcare_sorted)
## # A tibble: 6 × 40
## ...1 locati…¹ Locat…² Locat…³ Care …⁴ prima…⁵ Locat…⁶ Locat…⁷ Locat…⁸ Locat…⁹
## <dbl> <fct> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 2 2 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 3 3 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 4 4 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 5 5 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 6 6 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## # … with 30 more variables: `Location Local Authority` <chr>, region <chr>,
## # `Location NHS Region` <chr>, `Location ONSPD CCG Code` <chr>,
## # `Location ONSPD CCG` <chr>, `Location Commissioning CCG Code` <lgl>,
## # `Location Commissioning CCG Name` <lgl>,
## # `Service / Population Group` <chr>, domain <chr>, rating <ord>,
## # publication_date <date>, `Report Type` <chr>, inherited <lgl>, URL <chr>,
## # `Provider ID` <chr>, location_type <chr>, form_num <dbl>, …
## merge in CIC spin-out indicator
socialcare <- socialcare_sorted %>%
rename(provider_id = `Provider ID`) %>%
left_join(spinout, by = "provider_id") %>%
mutate(cic_other = ifelse(form_num == 4, spin_out, as.character(form)),
cic_other = fct_relevel(cic_other, levels = c("FPO", "NPO", "GOV", "Spin-out", "Non-spin-out", "IND")),
cic_gov = ifelse(form_num == 4 | form_num == 3, as.character(cic_other), NA),
cic_gov = fct_relevel(cic_gov, "Non-spin-out", after = Inf))
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
head(socialcare)
## # A tibble: 6 × 43
## ...1 locati…¹ Locat…² Locat…³ Care …⁴ prima…⁵ Locat…⁶ Locat…⁷ Locat…⁸ Locat…⁹
## <dbl> <fct> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 2 2 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 3 3 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 4 4 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 5 5 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## 6 6 1-10000… VNH4P 69 Ten… N Commun… 69 Ten… Warmsw… Doncas… DN4 9PE
## # … with 33 more variables: `Location Local Authority` <chr>, region <chr>,
## # `Location NHS Region` <chr>, `Location ONSPD CCG Code` <chr>,
## # `Location ONSPD CCG` <chr>, `Location Commissioning CCG Code` <lgl>,
## # `Location Commissioning CCG Name` <lgl>,
## # `Service / Population Group` <chr>, domain <chr>, rating <ord>,
## # publication_date <date>, `Report Type` <chr>, inherited <lgl>, URL <chr>,
## # provider_id <chr>, location_type <chr>, form_num <dbl>, …
Check the distribution by spin-out and CLS/CLG
spinout_table <- socialcare %>%
group_by(location_id) %>%
summarise(cic_type = first(cic_type),
spin_out = first(spin_out))
table(spinout_table$cic_type, spinout_table$spin_out)
##
## Non-spin-out Spin-out
## CIC_G 32 5
## CIC_S 26 19
# run the model loops with nest() method
# add after_covid as control
models_ols_covid <- socialcare %>%
group_by(domain) %>%
nest()%>%
mutate(ols_models = map(data,
~lm(rating_num ~ cic_other + after_covid +
category + region + inherited,
data = .x))) %>%
mutate(results = map(ols_models, ~tidy(.x, conf.int = TRUE))) %>%
pull(ols_models, name = domain)
table_ols_covid <- modelsummary(models_ols_covid, statistic = "({p.value}) {stars}")
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 3 > 1' in coercion to 'logical(1)'
table_ols_covid
| Safe | Effective | Caring | Responsive | Well-led | Overall | |
|---|---|---|---|---|---|---|
| (Intercept) | 2.879*** | 2.925*** | 3.036*** | 2.986*** | 2.923*** | 2.982*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| cic_otherNPO | 0.056*** | 0.032*** | 0.023** | 0.058*** | 0.069*** | 0.050*** |
| (0.000) *** | (0.000) *** | (0.002) ** | (0.000) *** | (0.000) *** | (0.000) *** | |
| cic_otherGOV | 0.069** | 0.032+ | 0.025 | 0.048* | 0.065* | 0.050* |
| (0.001) ** | (0.058) + | (0.101) | (0.013) * | (0.011) * | (0.028) * | |
| cic_otherSpin-out | 0.038 | 0.026 | 0.017 | −0.026 | 0.026 | −0.013 |
| (0.642) | (0.698) | (0.779) | (0.721) | (0.792) | (0.881) | |
| cic_otherNon-spin-out | 0.104* | 0.012 | 0.105** | 0.064 | 0.109+ | 0.119* |
| (0.049) * | (0.785) | (0.006) ** | (0.182) | (0.090) + | (0.039) * | |
| cic_otherIND | −0.038* | −0.039** | −0.008 | −0.012 | −0.062*** | −0.059*** |
| (0.011) * | (0.001) ** | (0.454) | (0.387) | (0.001) *** | (0.000) *** | |
| after_covidTRUE | −0.271*** | −0.135*** | −0.083*** | −0.107*** | −0.329*** | −0.333*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| categoryresidential | −0.060*** | −0.037*** | −0.048*** | −0.031*** | −0.061*** | −0.063*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| regionEast of England | 0.038* | 0.039** | 0.015 | 0.035* | 0.019 | 0.033+ |
| (0.014) * | (0.002) ** | (0.193) | (0.014) * | (0.311) | (0.052) + | |
| regionLondon | 0.045** | 0.055*** | 0.003 | −0.001 | 0.024 | 0.018 |
| (0.004) ** | (0.000) *** | (0.827) | (0.925) | (0.207) | (0.299) | |
| regionNorth East | 0.133*** | 0.104*** | 0.074*** | 0.089*** | 0.169*** | 0.150*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| regionNorth West | 0.062*** | 0.063*** | 0.020+ | 0.039** | 0.047* | 0.046** |
| (0.000) *** | (0.000) *** | (0.069) + | (0.006) ** | (0.012) * | (0.006) ** | |
| regionSouth East | 0.086*** | 0.047*** | 0.038*** | 0.050*** | 0.042* | 0.047** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.014) * | (0.002) ** | |
| regionSouth West | 0.110*** | 0.092*** | 0.082*** | 0.088*** | 0.128*** | 0.126*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| regionWest Midlands | 0.007 | 0.018 | −0.017 | −0.002 | −0.090*** | −0.045** |
| (0.664) | (0.142) | (0.125) | (0.890) | (0.000) *** | (0.008) ** | |
| regionYorkshire and The Humber | −0.016 | 0.031* | 0.036** | 0.008 | −0.036+ | −0.029 |
| (0.337) | (0.018) * | (0.003) ** | (0.590) | (0.067) + | (0.107) | |
| inheritedTRUE | −0.045** | −0.042*** | −0.037*** | −0.038** | −0.051** | −0.053** |
| (0.003) ** | (0.001) *** | (0.001) *** | (0.005) ** | (0.006) ** | (0.001) ** | |
| Num.Obs. | 12966 | 12931 | 12927 | 12953 | 12964 | 12942 |
| R2 | 0.104 | 0.041 | 0.030 | 0.027 | 0.107 | 0.124 |
| R2 Adj. | 0.103 | 0.039 | 0.029 | 0.026 | 0.106 | 0.123 |
| AIC | 13273.1 | 7817.4 | 4933.6 | 10858.2 | 18400.7 | 15587.8 |
| BIC | 13407.6 | 7951.8 | 5068.0 | 10992.6 | 18535.1 | 15722.2 |
| Log.Lik. | −6618.558 | −3890.688 | −2448.819 | −5411.087 | −9182.340 | −7775.882 |
| RMSE | 0.40 | 0.33 | 0.29 | 0.37 | 0.49 | 0.44 |
# run the model loops with nest() method
# add after_covid as control
models_ols_covid_gov <- socialcare %>%
group_by(domain) %>%
nest()%>%
mutate(ols_models = map(data,
~lm(rating_num ~ cic_gov + after_covid +
category + region + inherited,
data = .x))) %>%
mutate(results = map(ols_models, ~tidy(.x, conf.int = TRUE))) %>%
pull(ols_models, name = domain)
table_ols_covid_gov <- modelsummary(models_ols_covid_gov, statistic = "({p.value}) {stars}")
table_ols_covid_gov
| Safe | Effective | Caring | Responsive | Well-led | Overall | |
|---|---|---|---|---|---|---|
| (Intercept) | 2.975*** | 2.921*** | 2.984*** | 2.972*** | 2.849*** | 2.960*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| cic_govSpin-out | −0.060 | −0.001 | −0.028 | −0.063 | −0.025 | −0.105 |
| (0.393) | (0.985) | (0.661) | (0.347) | (0.810) | (0.216) | |
| cic_govNon-spin-out | 0.001 | −0.036 | 0.068 | −0.004 | −0.004 | 0.023 |
| (0.978) | (0.435) | (0.102) | (0.933) | (0.951) | (0.681) | |
| after_covidTRUE | −0.127*** | −0.033 | 0.024 | −0.064+ | −0.208*** | −0.180*** |
| (0.000) *** | (0.378) | (0.492) | (0.080) + | (0.000) *** | (0.000) *** | |
| categoryresidential | −0.105*** | −0.087** | −0.018 | −0.043 | −0.126** | −0.132*** |
| (0.001) *** | (0.004) ** | (0.519) | (0.145) | (0.005) ** | (0.000) *** | |
| regionEast of England | 0.048 | 0.052 | 0.084 | 0.056 | 0.102 | 0.157+ |
| (0.480) | (0.444) | (0.179) | (0.396) | (0.309) | (0.056) + | |
| regionLondon | −0.069 | 0.031 | 0.054 | 0.036 | 0.115 | 0.043 |
| (0.286) | (0.627) | (0.358) | (0.566) | (0.223) | (0.573) | |
| regionNorth East | 0.040 | 0.123+ | 0.143* | 0.220** | 0.366*** | 0.279** |
| (0.586) | (0.086) + | (0.031) * | (0.002) ** | (0.001) *** | (0.001) ** | |
| regionNorth West | 0.058 | 0.117* | 0.143** | 0.174** | 0.290*** | 0.199** |
| (0.321) | (0.047) * | (0.008) ** | (0.002) ** | (0.001) *** | (0.005) ** | |
| regionSouth East | −0.001 | 0.090 | 0.059 | 0.093+ | 0.190* | 0.076 |
| (0.993) | (0.121) | (0.261) | (0.097) + | (0.027) * | (0.276) | |
| regionSouth West | 0.051 | 0.099 | 0.094 | 0.089 | 0.167+ | 0.105 |
| (0.461) | (0.148) | (0.132) | (0.176) | (0.099) + | (0.204) | |
| regionWest Midlands | 0.070 | 0.070 | −0.014 | 0.087 | 0.229* | 0.087 |
| (0.281) | (0.274) | (0.815) | (0.159) | (0.015) * | (0.256) | |
| regionYorkshire and The Humber | 0.001 | 0.142* | 0.066 | 0.078 | 0.134 | 0.092 |
| (0.992) | (0.022) * | (0.243) | (0.190) | (0.143) | (0.218) | |
| inheritedTRUE | −0.106* | 0.025 | −0.004 | −0.029 | −0.068 | −0.061 |
| (0.047) * | (0.643) | (0.930) | (0.565) | (0.386) | (0.340) | |
| Num.Obs. | 471 | 470 | 469 | 469 | 471 | 472 |
| R2 | 0.073 | 0.039 | 0.043 | 0.054 | 0.092 | 0.103 |
| R2 Adj. | 0.046 | 0.011 | 0.016 | 0.027 | 0.066 | 0.077 |
| AIC | 266.0 | 253.5 | 168.4 | 221.7 | 625.2 | 439.2 |
| BIC | 328.3 | 315.8 | 230.6 | 284.0 | 687.5 | 501.6 |
| Log.Lik. | −117.986 | −111.766 | −69.183 | −95.873 | −297.605 | −204.618 |
| RMSE | 0.31 | 0.31 | 0.28 | 0.30 | 0.46 | 0.37 |
model_order_overall_covid <- clm(rating ~ cic_other + after_covid +
category + region + inherited,
data = filter(socialcare, domain == "Overall"),
link = "logit")
model_order_safe_covid <- clm(rating ~ cic_other + after_covid +
category + region + inherited,
data = filter(socialcare, domain == "Safe"),
link = "logit")
model_order_effective_covid <- clm(rating ~ cic_other + after_covid +
category + region + inherited,
data = filter(socialcare, domain == "Effective"),
link = "logit")
model_order_caring_covid <- clm(rating ~ cic_other + after_covid +
category + region + inherited,
data = filter(socialcare, domain == "Caring"),
link = "logit")
model_order_well_led_covid <- clm(rating ~ cic_other + after_covid +
category + region + inherited,
data = filter(socialcare, domain == "Well-led"),
link = "logit")
model_order_responsive_covid <- clm(rating ~ cic_other + after_covid +
category + region + inherited,
data = filter(socialcare, domain == "Responsive"),
link = "logit")
ordinal_models_covid <- modelsummary(list("overall" = model_order_overall_covid, "safe" = model_order_safe_covid,
"effective" = model_order_effective_covid, "caring"= model_order_caring_covid,
"well-led" = model_order_well_led_covid, "responsive" = model_order_responsive_covid),
coef_omit = "region", exponentiate = F,
statistic = "({p.value}) {stars}")
ordinal_models_covid
| overall | safe | effective | caring | well-led | responsive | |
|---|---|---|---|---|---|---|
| Inadequate|Req improv | −5.397*** | −4.999*** | −6.333*** | −7.445*** | −4.795*** | −7.122*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| Req improv|Good | −2.497*** | −2.077*** | −2.443*** | −3.764*** | −1.905*** | −2.610*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| Good|Outstanding | 2.796*** | 5.548*** | 4.214*** | 2.849*** | 2.867*** | 2.834*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| cic_otherNPO | 0.307*** | 0.450*** | 0.350*** | 0.275** | 0.357*** | 0.467*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.002) ** | (0.000) *** | (0.000) *** | |
| cic_otherGOV | 0.290* | 0.562** | 0.337+ | 0.291 | 0.316* | 0.378* |
| (0.036) * | (0.001) ** | (0.065) + | (0.106) | (0.014) * | (0.012) * | |
| cic_otherSpin-out | −0.079 | 0.325 | 0.316 | 0.164 | 0.120 | −0.208 |
| (0.884) | (0.603) | (0.658) | (0.819) | (0.812) | (0.721) | |
| cic_otherNon-spin-out | 0.728* | 0.994+ | 0.128 | 1.007** | 0.532 | 0.501 |
| (0.037) * | (0.051) + | (0.777) | (0.006) ** | (0.113) | (0.178) | |
| cic_otherIND | −0.339*** | −0.248** | −0.344** | −0.097 | −0.282*** | −0.097 |
| (0.000) *** | (0.009) ** | (0.002) ** | (0.468) | (0.001) *** | (0.363) | |
| after_covidTRUE | −1.777*** | −1.496*** | −1.132*** | −1.043*** | −1.412*** | −0.844*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| categoryresidential | −0.376*** | −0.419*** | −0.386*** | −0.586*** | −0.293*** | −0.243*** |
| (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | (0.000) *** | |
| inheritedTRUE | −0.320*** | −0.334*** | −0.398*** | −0.460*** | −0.237** | −0.305** |
| (0.001) *** | (0.001) *** | (0.000) *** | (0.001) *** | (0.007) ** | (0.004) ** | |
| Num.Obs. | 12942 | 12966 | 12931 | 12927 | 12964 | 12953 |
| AIC | 15297.1 | 12508.6 | 10047.5 | 8808.8 | 17892.2 | 12549.2 |
| BIC | 15439.0 | 12650.6 | 10189.4 | 8950.7 | 18034.1 | 12691.1 |
| RMSE | 2.24 | 2.10 | 2.13 | 2.21 | 2.25 | 2.27 |