Load packages

library(tidyverse) # package for data cleaning and plotting
library(readxl)
library(modelsummary)
set.seed(5432)

Import Raw Data

cic_location_list <- read_csv("data/cic_full_list.csv")
# sampled data manually coded (with unfriendly column names)
providers <- read_excel("data/full_ratings.xlsx", sheet = 3)
### Rename the columns 

providers_renamed <- providers %>% 
  rename(primary_cat = `Provider Primary Inspection Category`,
         region = `Provider Region`,
         domain = Domain,
         rating = `Latest Rating`,
         inherited = `Inherited Rating (Y/N)`,
         provider_name = `Provider Name`,
         publication_date = `Publication Date`) 

Standardize the provider name

providers_std <- providers_renamed %>% 
# remove dots
  mutate(std_name = str_replace_all(provider_name, "\\.", "")) %>% 
# capitalize all
  mutate(std_name = str_to_upper(std_name)) %>%
# standardize to LTD
  mutate(std_name = str_replace_all(std_name, "LIMITED", "LTD")) %>% 
# standardize to CIC
  mutate(std_name = str_replace_all(std_name, "COMMUNITY INTEREST COMPANY", "CIC"))
## filter with the name CIC and check the total number of observations (6 domains)
CIC_provider_subset <- providers_std %>% 
  filter(str_detect(std_name, "CIC"))

nrow(CIC_provider_subset)
## [1] 378

Derive the CIC list

cic_provider_list <- CIC_provider_subset %>% 
  group_by(provider_name) %>% 
  summarise(provider_type = first(`Provider Type`),
            primary_cat = first(primary_cat),
            region = first(region)) %>% 
  mutate(index = row_number(),
         level = "provider")%>% 
  select(index, everything())
cic_location_list <- cic_location_list %>%
  mutate(level = "location")

CIC location level summarize

cic_location_list %>% 
  group_by(location_type) %>% 
  summarise(location_type = first(location_type))
## # A tibble: 3 × 1
##   location_type             
##   <chr>                     
## 1 Independent Healthcare Org
## 2 Primary Medical Services  
## 3 Social Care Org
cic_location_list %>% 
  group_by(primary_cat) %>% 
  summarise(primary_cat = first(primary_cat))
## # A tibble: 9 × 1
##   primary_cat                               
##   <chr>                                     
## 1 Community based adult social care services
## 2 Community health - NHS & Independent      
## 3 Community substance misuse                
## 4 GP Practices                              
## 5 Independent consulting doctors            
## 6 Out of hours                              
## 7 Remote clinical advice                    
## 8 Residential social care                   
## 9 Urgent care services & mobile doctors

CIC Provider level summarize

cic_provider_list %>% 
  group_by(provider_type) %>% 
  summarise(provider_type = first(provider_type))
## # A tibble: 1 × 1
##   provider_type             
##   <chr>                     
## 1 Independent Healthcare Org
cic_provider_list %>% 
  group_by(primary_cat) %>% 
  summarise(primary_cat = first(primary_cat))
## # A tibble: 2 × 1
##   primary_cat                                       
##   <chr>                                             
## 1 Community health - NHS & Independent              
## 2 Mental health - community & hospital - independent

Section below to update

Merge the two data sets

cic_list_2levels <- bind_rows(cic_location_list, cic_provider_list)
combined <- cic_list_2levels %>% 
  filter(!duplicated(cic_list_2levels$provider_name)) %>% 
  group_by(provider_name) %>% 
  slice(1)
nrow(combined)
## [1] 102