library(tidyverse) # package for data cleaning and plotting
library(readxl)
library(modelsummary)
set.seed(5432)
cic_location_list <- read_csv("data/cic_full_list.csv")
# sampled data manually coded (with unfriendly column names)
providers <- read_excel("data/full_ratings.xlsx", sheet = 3)
### Rename the columns
providers_renamed <- providers %>%
rename(primary_cat = `Provider Primary Inspection Category`,
region = `Provider Region`,
domain = Domain,
rating = `Latest Rating`,
inherited = `Inherited Rating (Y/N)`,
provider_name = `Provider Name`,
publication_date = `Publication Date`)
providers_std <- providers_renamed %>%
# remove dots
mutate(std_name = str_replace_all(provider_name, "\\.", "")) %>%
# capitalize all
mutate(std_name = str_to_upper(std_name)) %>%
# standardize to LTD
mutate(std_name = str_replace_all(std_name, "LIMITED", "LTD")) %>%
# standardize to CIC
mutate(std_name = str_replace_all(std_name, "COMMUNITY INTEREST COMPANY", "CIC"))
## filter with the name CIC and check the total number of observations (6 domains)
CIC_provider_subset <- providers_std %>%
filter(str_detect(std_name, "CIC"))
nrow(CIC_provider_subset)
## [1] 378
cic_provider_list <- CIC_provider_subset %>%
group_by(provider_name) %>%
summarise(provider_type = first(`Provider Type`),
primary_cat = first(primary_cat),
region = first(region)) %>%
mutate(index = row_number(),
level = "provider")%>%
select(index, everything())
cic_location_list <- cic_location_list %>%
mutate(level = "location")
cic_location_list %>%
group_by(location_type) %>%
summarise(location_type = first(location_type))
## # A tibble: 3 × 1
## location_type
## <chr>
## 1 Independent Healthcare Org
## 2 Primary Medical Services
## 3 Social Care Org
cic_location_list %>%
group_by(primary_cat) %>%
summarise(primary_cat = first(primary_cat))
## # A tibble: 9 × 1
## primary_cat
## <chr>
## 1 Community based adult social care services
## 2 Community health - NHS & Independent
## 3 Community substance misuse
## 4 GP Practices
## 5 Independent consulting doctors
## 6 Out of hours
## 7 Remote clinical advice
## 8 Residential social care
## 9 Urgent care services & mobile doctors
cic_provider_list %>%
group_by(provider_type) %>%
summarise(provider_type = first(provider_type))
## # A tibble: 1 × 1
## provider_type
## <chr>
## 1 Independent Healthcare Org
cic_provider_list %>%
group_by(primary_cat) %>%
summarise(primary_cat = first(primary_cat))
## # A tibble: 2 × 1
## primary_cat
## <chr>
## 1 Community health - NHS & Independent
## 2 Mental health - community & hospital - independent
Merge the two data sets
cic_list_2levels <- bind_rows(cic_location_list, cic_provider_list)
combined <- cic_list_2levels %>%
filter(!duplicated(cic_list_2levels$provider_name)) %>%
group_by(provider_name) %>%
slice(1)
nrow(combined)
## [1] 102