ACS Data Pull and Cleaning

Select “code” button to the right to see the code for this section.

#bring in county hospital data and dichotomize ICU bed per capita at media
aha2017_2019 <- readRDS(paste0(data_dir,"/prepared_data/aha2017_2019.county.Rds"))

#Bring in config file with new variable names and labels
# web address: https://api.census.gov/data/2019/acs/acs5/profile/variables.html
#This will be used to create a labels list now and rename variables later.
names <- read.csv(paste0(data_dir,"/acs/acs_config_2019.csv"), header = T)

# turn labels into list object.
# pre-allocate a list and fill it with human-readable var names a loop
acs.labels.list <- vector("list", nrow(names))

for (i in 1:nrow(names)) {
  acs.labels.list[[i]] <- names[i,2]
}
#name elements using var name
acs.labels.list <- setNames(acs.labels.list, names[,1])

# all population characteristics
acs0 <- get_acs(geography = "county",
                variables = c(
                  "DP05_0001E", #Total pop
                  "DP05_0021E", # total pop 18+
                  "DP05_0021PE", # %pop 18+
                  "DP05_0024PE", # %pop 65%
                  "DP05_0019PE", # %pop < 18
                  "DP05_0003P", # %female
                  "DP05_0037PE", # %White
                  "DP05_0038PE", # %Black
                  "DP05_0039PE", # %AI/AN
                  "DP05_0044PE", # %Asian
                  "DP05_0052PE", # %NH/PI
                  "DP05_0057PE", # %Other
                  "DP05_0058PE", # 2+ races
                  "DP05_0071PE", # %Hispanic
                  "DP02_0060PE", # %25yr+ and <9th grade
                  "DP02_0067PE", # %25yr+ and at least HS graduate
                  "DP03_0027PE", # %16yr+ white collar jobs
                  "DP03_0099PE", # %wo health insurance
                  "DP03_0088E", # $per capita income
                  "DP03_0086E", # $median family income
                  "DP03_0062E", # $mean household income
                  "DP03_0052PE", # %household income <10K
                  "DP03_0053PE", # %household income 10-14K
                  "DP03_0054PE", # %household income 15-24K
                  "DP03_0055PE", # %household income 25-34K
                  "DP03_0056PE", # %household income 35-49K
                  "DP03_0057PE", # %household income 50-74K
                  "DP03_0058PE", # %household income 75-99K
                  "DP03_0059PE", # %household income 100-149K
                  "DP03_0060PE", # %household income 150-199K
                  "DP03_0061PE", # %household income 200K+
                  "DP04_0089E", # $Median home value
                  "DP04_0109E", # $Median rent
                  "DP04_0101E", # $Median mortgage
                  "DP04_0046PE", # %owner occupied
                  "DP03_0005PE", # %civilian labor force 16yr+ unemployed
                  "DP03_0119PE", # % families below poverty
                  "DP03_0128PE", # % individuals below poverty
                  "DP02_0007PE", # % male single parent family
                  "DP02_0011PE", # % female single parent family
                  "DP04_0058PE", # % households no car
                  "DP04_0075PE", # % households no phone service (inc cell)
                  "DP04_0073PE", # % households no plumbing
                  "DP04_0077PE", # % households 1 or less people per room
                  "DP05_0024PE", # % 65yr+
                  "DP05_0019PE", # % < 18yr
                  "DP02_0072PE", # % with a disability
                  "DP02_0114PE", # % 5yr+ speak English < very well
                  "DP04_0009PE", # % houses 2 units
                  "DP04_0010PE", # % houses 3-4 units
                  "DP04_0011PE", # % houses 5-9 units
                  "DP04_0012PE", # % houses 10-19 units
                  "DP04_0013PE", # % houses 20+ units
                  "DP04_0014PE", # % mobile homes
                  "DP02_0152PE" # % houses with computer
                ),
                year = 2019,
                survey = "acs5",
                geometry = FALSE
) 

acs1 <- left_join(acs0, names, by = "variable")
acs2 <- pivot_wider(acs1, id_cols = c(GEOID, NAME), names_from = name, values_from = estimate)

## Population characteristics that need some curation
# % Minority - the complement of %White

acs3 <- acs2 %>% 
  group_by(NAME) %>%
  mutate(
    over50k = sum(perc.house.50_74k,
                  perc.house.75_99k,
                  perc.house.100_149k,
                  perc.house.150_199k,
                  perc.house.ge.200k), #Income disparity, create %under 50K
    perc.singleparent = sum(perc.fam.singlemale,
                            perc.fam.singlefemale),# single parent households - add up male and female single parent homes
    perc.multiunithouse = sum(perc.house.2unit,
                              perc.house.3_4unit,
                              perc.house.5_9unit,
                              perc.house.10_19unit,
                              perc.house.ge.20unit)#Multi unit housing, sum % of all relevant categories
  )%>%
  ungroup()%>%
  mutate(
    perc.minority = (100-perc.white), # % Minority - the complement of %White
    perc.l.highschool = (100-perc.ge.highschool),# less than HS diploma - complement of %25yr+ and at least HS graduate
    perc.house.g.1perroom = (100-perc.house.le.1perroom), # Houses >1 person per room - complement of 1 or less person per room
    income.disparity = log(100*perc.house.l.10k/over50k),# Income disparity=log of 100 x ratio <10K/>50K
    total.thousands = total/1000,#simple division for presentation
    mean.indi.income.thousands = mean.indi.income/1000,
    median.fam.income.thousands = median.fam.income/1000,
    median.house.income.thousands = median.house.income/1000,
    median.house.value.thousands = median.house.value/1000,
    median.mortgage.thousands = median.mortgage/1000)

#Join with hospital data
final2017_2019 <- left_join(acs3, aha2017_2019, by = "GEOID")

#Create per capita variables and dichotomize at median

final2017_2019 <- final2017_2019 %>%
  mutate(adult_icubeds_per1k = 1000*county.adult.icu.beds/total.ge.18,
         adult_intensivists_per1k = 1000*county.adult.icu.beds/total.ge.18)

County-Level Graphical Examination

ICU bed density by Total Population

ggplot(data = final2017_2019,
       aes(x = total.thousands,
           y = adult_icubeds_per1k))+
  geom_point()+
  theme_bw()

Interpretation:

  • There is one extreme and 2 moderate outliers on the ICU Beds Per Capita axis above 2.5 ICU beds per 1000 adults. These values seem high in context of other similarly populated counties on the y axis.
  • There are also 4 outliers on the County population axis above 3,750,000 persons

ICU Beds by Total Population

ggplot(data = final2017_2019,
       aes(x = total.thousands,
           y = county.adult.icu.beds))+
  geom_point()+
  theme_bw()

Interpretation:

  • Point cloud does have expected upward trend of more ICU beds as population increases.
  • There are also 4 outliers on the County population axis above 3,750,000 persons

FTE Intensivists by Total Population

ggplot(data = final2017_2019,
       aes(x = total.thousands,
           y = county.adult.fte))+
  geom_point()+
  theme_bw()

Interpretation:

  • Similar 4 Total population outliers of population > 3.75 million
  • Additional outliers along Intensivist axis with 2 counties with FTE > 400 that seems out of context with other counties of similar size.

FTE Intensivists by ICU beds

ggplot(data = final2017_2019,
       aes(x = county.adult.icu.beds,
           y = county.adult.fte))+
  geom_point()+
  theme_bw()

Interpretation:

  • Similar outliers of Inensivists above 300 and ICU beds above 1000

Detailed Examination of Outliers

Here I filter out the counties that have any one of the following parameters:

  • ICU beds per 1000 adults > 2.5
  • ICU beds > 750
  • FTE Intensivists > 300
  • total population > 3.75 million
test <- final2017_2019 %>%
  filter(adult_icubeds_per1k > 2.5 |
           county.adult.icu.beds > 750 |
           county.adult.fte > 300 |
           total.thousands > 3750)%>%
  select(GEOID, NAME, total.hospitals, county.hospital.beds.total, 
         county.adult.fte, county.adult.icu.beds, total.thousands, 
         adult_icubeds_per1k, percent.hospital.beds.unaccounted)
kable(test, digits = 1)
GEOID NAME total.hospitals county.hospital.beds.total county.adult.fte county.adult.icu.beds total.thousands adult_icubeds_per1k percent.hospital.beds.unaccounted
04013 Maricopa County, Arizona 26 7972 170 880 4328.8 0.3 8
06037 Los Angeles County, California 77 19912 495 1792 10081.6 0.2 18
17031 Cook County, Illinois 46 14627 384 1364 5198.3 0.3 17
28055 Issaquena County, Mississippi 1 187 1 16 1.4 13.8 0
36061 New York County, New York 10 8820 480 861 1632.0 0.6 0
39035 Cuyahoga County, Ohio 16 6331 702 719 1247.5 0.7 7
42093 Montour County, Pennsylvania 1 536 32 57 18.3 3.9 0
48201 Harris County, Texas 29 10086 273 1262 4646.6 0.4 8
51720 Norton city, Virginia 2 139 0 18 4.0 5.9 0

Interpretation:

  • ICU bed per capita - Issaquena Mississippi is an extreme outlier at 14 ICU beds per 1000 adults with only one hospital which does not have an unreasonable amount of ICU beds (16) given 187 hospital beds. So it is a population denominator issue.
  • ICU Beds - The counties with high numbers of ICU beds are the ones to be expected as they are all large counties. Of note, a couple of these counties have a high degree of missing information from their hospitals so ICU beds and Intensivists may be an underestimate….
  • Intensivists - Counties with high number of intensivists also seem to be from populated counties.
  • Highly populated counties seem appropriate.
  • I think Issaquena needs to be fixed but others are OK but will need adjustment or sensitivity analyses for missingess.