ACS Data Pull and Cleaning

Select “code” button to the right to see the code for this section.

#bring in county hospital data and dichotomize ICU bed per capita at media
aha2017_2019 <- readRDS(paste0(data_dir,"/prepared_data/aha2017_2019.county.Rds"))

#Bring in config file with new variable names and labels
# web address: https://api.census.gov/data/2019/acs/acs5/profile/variables.html
#This will be used to create a labels list now and rename variables later.
names <- read.csv(paste0(data_dir,"/acs/acs_config_2019.csv"), header = T)

# turn labels into list object.
# pre-allocate a list and fill it with human-readable var names a loop
acs.labels.list <- vector("list", nrow(names))

for (i in 1:nrow(names)) {
  acs.labels.list[[i]] <- names[i,2]
}
#name elements using var name
acs.labels.list <- setNames(acs.labels.list, names[,1])

# all population characteristics
acs0 <- get_acs(geography = "county",
                variables = c(
                  "DP05_0001E", #Total pop
                  "DP05_0021E", # total pop 18+
                  "DP05_0021PE", # %pop 18+
                  "DP05_0024PE", # %pop 65%
                  "DP05_0019PE", # %pop < 18
                  "DP05_0003P", # %female
                  "DP05_0037PE", # %White
                  "DP05_0038PE", # %Black
                  "DP05_0039PE", # %AI/AN
                  "DP05_0044PE", # %Asian
                  "DP05_0052PE", # %NH/PI
                  "DP05_0057PE", # %Other
                  "DP05_0058PE", # 2+ races
                  "DP05_0071PE", # %Hispanic
                  "DP02_0060PE", # %25yr+ and <9th grade
                  "DP02_0067PE", # %25yr+ and at least HS graduate
                  "DP03_0027PE", # %16yr+ white collar jobs
                  "DP03_0099PE", # %wo health insurance
                  "DP03_0088E", # $per capita income
                  "DP03_0086E", # $median family income
                  "DP03_0062E", # $mean household income
                  "DP03_0052PE", # %household income <10K
                  "DP03_0053PE", # %household income 10-14K
                  "DP03_0054PE", # %household income 15-24K
                  "DP03_0055PE", # %household income 25-34K
                  "DP03_0056PE", # %household income 35-49K
                  "DP03_0057PE", # %household income 50-74K
                  "DP03_0058PE", # %household income 75-99K
                  "DP03_0059PE", # %household income 100-149K
                  "DP03_0060PE", # %household income 150-199K
                  "DP03_0061PE", # %household income 200K+
                  "DP04_0089E", # $Median home value
                  "DP04_0109E", # $Median rent
                  "DP04_0101E", # $Median mortgage
                  "DP04_0046PE", # %owner occupied
                  "DP03_0005PE", # %civilian labor force 16yr+ unemployed
                  "DP03_0119PE", # % families below poverty
                  "DP03_0128PE", # % individuals below poverty
                  "DP02_0007PE", # % male single parent family
                  "DP02_0011PE", # % female single parent family
                  "DP04_0058PE", # % households no car
                  "DP04_0075PE", # % households no phone service (inc cell)
                  "DP04_0073PE", # % households no plumbing
                  "DP04_0077PE", # % households 1 or less people per room
                  "DP05_0024PE", # % 65yr+
                  "DP05_0019PE", # % < 18yr
                  "DP02_0072PE", # % with a disability
                  "DP02_0114PE", # % 5yr+ speak English < very well
                  "DP04_0009PE", # % houses 2 units
                  "DP04_0010PE", # % houses 3-4 units
                  "DP04_0011PE", # % houses 5-9 units
                  "DP04_0012PE", # % houses 10-19 units
                  "DP04_0013PE", # % houses 20+ units
                  "DP04_0014PE", # % mobile homes
                  "DP02_0152PE" # % houses with computer
                ),
                year = 2019,
                survey = "acs5",
                geometry = FALSE
) 

acs1 <- left_join(acs0, names, by = "variable")
acs2 <- pivot_wider(acs1, id_cols = c(GEOID, NAME), names_from = name, values_from = estimate)

## Population characteristics that need some curation
# % Minority - the complement of %White

acs3 <- acs2 %>% 
  group_by(NAME) %>%
  mutate(
    over50k = sum(perc.house.50_74k,
                  perc.house.75_99k,
                  perc.house.100_149k,
                  perc.house.150_199k,
                  perc.house.ge.200k), #Income disparity, create %under 50K
    perc.singleparent = sum(perc.fam.singlemale,
                            perc.fam.singlefemale),# single parent households - add up male and female single parent homes
    perc.multiunithouse = sum(perc.house.2unit,
                              perc.house.3_4unit,
                              perc.house.5_9unit,
                              perc.house.10_19unit,
                              perc.house.ge.20unit)#Multi unit housing, sum % of all relevant categories
  )%>%
  ungroup()%>%
  mutate(
    perc.minority = (100-perc.white), # % Minority - the complement of %White
    perc.l.highschool = (100-perc.ge.highschool),# less than HS diploma - complement of %25yr+ and at least HS graduate
    perc.house.g.1perroom = (100-perc.house.le.1perroom), # Houses >1 person per room - complement of 1 or less person per room
    income.disparity = log(100*perc.house.l.10k/over50k),# Income disparity=log of 100 x ratio <10K/>50K
    total.thousands = total/1000,#simple division for presentation
    mean.indi.income.thousands = mean.indi.income/1000,
    median.fam.income.thousands = median.fam.income/1000,
    median.house.income.thousands = median.house.income/1000,
    median.house.value.thousands = median.house.value/1000,
    median.mortgage.thousands = median.mortgage/1000)

#Join with hospital data
final2017_2019 <- left_join(acs3, aha2017_2019, by = "GEOID")

#Create per capita variables and dichotomize at median

final2017_2019 <- final2017_2019 %>%
  mutate(adult_icubeds_per1k = 1000*county.adult.icu.beds/total.ge.18,
         adult_intensivists_per1k = 1000*county.adult.icu.beds/total.ge.18)

County-Level Graphical Examination

ICU bed density by Total Population

ggplot(data = final2017_2019,
       aes(x = total.thousands,
           y = adult_icubeds_per1k))+
  geom_point()+
  theme_bw()

Interpretation:

There is one extreme and 2 moderate outliers on the ICU Beds Per Capita axis above 2.5 ICU beds per 1000 adults. These values seem high in context of other similarly populated counties on the y axis.
There are also 4 outliers on the County population axis above 3,750,000 persons

ICU Beds by Total Population

ggplot(data = final2017_2019,
       aes(x = total.thousands,
           y = county.adult.icu.beds))+
  geom_point()+
  theme_bw()

Interpretation:

Point cloud does have expected upward trend of more ICU beds as population increases.
There are also 4 outliers on the County population axis above 3,750,000 persons

FTE Intensivists by Total Population

ggplot(data = final2017_2019,
       aes(x = total.thousands,
           y = county.adult.fte))+
  geom_point()+
  theme_bw()

Interpretation:

Similar 4 Total population outliers of population > 3.75 million
Additional outliers along Intensivist axis with 2 counties with FTE > 400 that seems out of context with other counties of similar size.

FTE Intensivists by ICU beds

ggplot(data = final2017_2019,
       aes(x = county.adult.icu.beds,
           y = county.adult.fte))+
  geom_point()+
  theme_bw()

Interpretation:

Similar outliers of Inensivists above 300 and ICU beds above 1000

Detailed Examination of Outliers

Here I filter out the counties that have any one of the following parameters:

ICU beds per 1000 adults > 2.5
ICU beds > 750
FTE Intensivists > 300
total population > 3.75 million

test <- final2017_2019 %>%
  filter(adult_icubeds_per1k > 2.5 |
           county.adult.icu.beds > 750 |
           county.adult.fte > 300 |
           total.thousands > 3750)%>%
  select(GEOID, NAME, total.hospitals, county.hospital.beds.total, 
         county.adult.fte, county.adult.icu.beds, total.thousands, 
         adult_icubeds_per1k, percent.hospital.beds.unaccounted)
kable(test, digits = 1)

GEOID	NAME	total.hospitals	county.hospital.beds.total	county.adult.fte	county.adult.icu.beds	total.thousands	adult_icubeds_per1k	percent.hospital.beds.unaccounted
04013	Maricopa County, Arizona	26	7972	170	880	4328.8	0.3	8
06037	Los Angeles County, California	77	19912	495	1792	10081.6	0.2	18
17031	Cook County, Illinois	46	14627	384	1364	5198.3	0.3	17
28055	Issaquena County, Mississippi	1	187	1	16	1.4	13.8	0
36061	New York County, New York	10	8820	480	861	1632.0	0.6	0
39035	Cuyahoga County, Ohio	16	6331	702	719	1247.5	0.7	7
42093	Montour County, Pennsylvania	1	536	32	57	18.3	3.9	0
48201	Harris County, Texas	29	10086	273	1262	4646.6	0.4	8
51720	Norton city, Virginia	2	139	0	18	4.0	5.9	0

Interpretation:

ICU bed per capita - Issaquena Mississippi is an extreme outlier at 14 ICU beds per 1000 adults with only one hospital which does not have an unreasonable amount of ICU beds (16) given 187 hospital beds. So it is a population denominator issue.
ICU Beds - The counties with high numbers of ICU beds are the ones to be expected as they are all large counties. Of note, a couple of these counties have a high degree of missing information from their hospitals so ICU beds and Intensivists may be an underestimate….
Intensivists - Counties with high number of intensivists also seem to be from populated counties.
Highly populated counties seem appropriate.
I think Issaquena needs to be fixed but others are OK but will need adjustment or sensitivity analyses for missingess.

Examining Outliers

Erin R Stearns & Jordan A Kempker

2021-07-06

ACS Data Pull and Cleaning

County-Level Graphical Examination

ICU bed density by Total Population

ICU Beds by Total Population

FTE Intensivists by Total Population

FTE Intensivists by ICU beds

Detailed Examination of Outliers