Select “code” button to the right to see the code for this section.
#bring in county hospital data and dichotomize ICU bed per capita at media
aha2017_2019 <- readRDS(paste0(data_dir,"/prepared_data/aha2017_2019.county.Rds"))
#Bring in config file with new variable names and labels
# web address: https://api.census.gov/data/2019/acs/acs5/profile/variables.html
#This will be used to create a labels list now and rename variables later.
names <- read.csv(paste0(data_dir,"/acs/acs_config_2019.csv"), header = T)
# turn labels into list object.
# pre-allocate a list and fill it with human-readable var names a loop
acs.labels.list <- vector("list", nrow(names))
for (i in 1:nrow(names)) {
acs.labels.list[[i]] <- names[i,2]
}
#name elements using var name
acs.labels.list <- setNames(acs.labels.list, names[,1])
# all population characteristics
acs0 <- get_acs(geography = "county",
variables = c(
"DP05_0001E", #Total pop
"DP05_0021E", # total pop 18+
"DP05_0021PE", # %pop 18+
"DP05_0024PE", # %pop 65%
"DP05_0019PE", # %pop < 18
"DP05_0003P", # %female
"DP05_0037PE", # %White
"DP05_0038PE", # %Black
"DP05_0039PE", # %AI/AN
"DP05_0044PE", # %Asian
"DP05_0052PE", # %NH/PI
"DP05_0057PE", # %Other
"DP05_0058PE", # 2+ races
"DP05_0071PE", # %Hispanic
"DP02_0060PE", # %25yr+ and <9th grade
"DP02_0067PE", # %25yr+ and at least HS graduate
"DP03_0027PE", # %16yr+ white collar jobs
"DP03_0099PE", # %wo health insurance
"DP03_0088E", # $per capita income
"DP03_0086E", # $median family income
"DP03_0062E", # $mean household income
"DP03_0052PE", # %household income <10K
"DP03_0053PE", # %household income 10-14K
"DP03_0054PE", # %household income 15-24K
"DP03_0055PE", # %household income 25-34K
"DP03_0056PE", # %household income 35-49K
"DP03_0057PE", # %household income 50-74K
"DP03_0058PE", # %household income 75-99K
"DP03_0059PE", # %household income 100-149K
"DP03_0060PE", # %household income 150-199K
"DP03_0061PE", # %household income 200K+
"DP04_0089E", # $Median home value
"DP04_0109E", # $Median rent
"DP04_0101E", # $Median mortgage
"DP04_0046PE", # %owner occupied
"DP03_0005PE", # %civilian labor force 16yr+ unemployed
"DP03_0119PE", # % families below poverty
"DP03_0128PE", # % individuals below poverty
"DP02_0007PE", # % male single parent family
"DP02_0011PE", # % female single parent family
"DP04_0058PE", # % households no car
"DP04_0075PE", # % households no phone service (inc cell)
"DP04_0073PE", # % households no plumbing
"DP04_0077PE", # % households 1 or less people per room
"DP05_0024PE", # % 65yr+
"DP05_0019PE", # % < 18yr
"DP02_0072PE", # % with a disability
"DP02_0114PE", # % 5yr+ speak English < very well
"DP04_0009PE", # % houses 2 units
"DP04_0010PE", # % houses 3-4 units
"DP04_0011PE", # % houses 5-9 units
"DP04_0012PE", # % houses 10-19 units
"DP04_0013PE", # % houses 20+ units
"DP04_0014PE", # % mobile homes
"DP02_0152PE" # % houses with computer
),
year = 2019,
survey = "acs5",
geometry = FALSE
)
acs1 <- left_join(acs0, names, by = "variable")
acs2 <- pivot_wider(acs1, id_cols = c(GEOID, NAME), names_from = name, values_from = estimate)
## Population characteristics that need some curation
# % Minority - the complement of %White
acs3 <- acs2 %>%
group_by(NAME) %>%
mutate(
over50k = sum(perc.house.50_74k,
perc.house.75_99k,
perc.house.100_149k,
perc.house.150_199k,
perc.house.ge.200k), #Income disparity, create %under 50K
perc.singleparent = sum(perc.fam.singlemale,
perc.fam.singlefemale),# single parent households - add up male and female single parent homes
perc.multiunithouse = sum(perc.house.2unit,
perc.house.3_4unit,
perc.house.5_9unit,
perc.house.10_19unit,
perc.house.ge.20unit)#Multi unit housing, sum % of all relevant categories
)%>%
ungroup()%>%
mutate(
perc.minority = (100-perc.white), # % Minority - the complement of %White
perc.l.highschool = (100-perc.ge.highschool),# less than HS diploma - complement of %25yr+ and at least HS graduate
perc.house.g.1perroom = (100-perc.house.le.1perroom), # Houses >1 person per room - complement of 1 or less person per room
income.disparity = log(100*perc.house.l.10k/over50k),# Income disparity=log of 100 x ratio <10K/>50K
total.thousands = total/1000,#simple division for presentation
mean.indi.income.thousands = mean.indi.income/1000,
median.fam.income.thousands = median.fam.income/1000,
median.house.income.thousands = median.house.income/1000,
median.house.value.thousands = median.house.value/1000,
median.mortgage.thousands = median.mortgage/1000)
#Join with hospital data
final2017_2019 <- left_join(acs3, aha2017_2019, by = "GEOID")
#Create per capita variables and dichotomize at median
final2017_2019 <- final2017_2019 %>%
mutate(adult_icubeds_per1k = 1000*county.adult.icu.beds/total.ge.18,
adult_intensivists_per1k = 1000*county.adult.icu.beds/total.ge.18)
For 2019, the ACS dataset has 3220 counties. However, our AHA dataset has only 2476. This means that there are only 2476 counties with General Medical-Surgical Hospitals in the AHA data (we excluded all other hospital types before making county tallies). When merging these two datasets by county, we get a number of counties that have no hospital information, presumably because they truly do not have General Medical-Surgical Hospitals
Below I display:
test1 <- final2017_2019%>%
filter(is.na(percent.missing)==T)%>%
select(NAME, total.thousands,county.hospital.beds.total)%>%
arrange(desc(total.thousands))
summary(test1$total.thousands)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.066 5.720 11.917 17.384 21.567 215.044
summary(test1$county.hospital.beds.total)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## NA NA NA NaN NA NA 751
print(test1)
## # A tibble: 751 x 3
## NAME total.thousands county.hospital.beds.total
## <chr> <dbl> <dbl>
## 1 Berkeley County, South Carolina 215. NA
## 2 Columbia County, Georgia 151. NA
## 3 Livingston Parish, Louisiana 139. NA
## 4 Randall County, Texas 134. NA
## 5 Bossier Parish, Louisiana 126. NA
## 6 Kendall County, Illinois 126. NA
## 7 Sumter County, Florida 125. NA
## 8 Sutter County, California 96.1 NA
## 9 Sherburne County, Minnesota 94.5 NA
## 10 Roanoke County, Virginia 93.8 NA
## # ... with 741 more rows
Interpretation:
The AHA is a voluntary annual survey, with a reported approximate annual response of 75%. One way we increase our availability of data is to use combine data from multiple survey years. Specifically, we use the most recent, available 3 years of data (although this could be expanded). For this report we use 2017-2019. First, using the AHA Change Report that identifies which hospitals have closed or merged from one year to the next we remove these hospitals from the multi-year data. Second, we average data over the included years. While this will create some errors when bed numbers increase or decrease it also allows us to include data for hospitals that may have missed a year or two of survey response but are still open. A last value carried forward would be a better way to do this but involves timestamping data and was not done at this time. After we have a dataset set that is a summary average of 3 years of data we then summarize at the county level.
We wanted a way to indicate which counties may have a lot of hospitals with missing data and therefore may be problematic or inaccurate. We do this in 2 ways:
Percent missing was calculated by county as the proportion of hospitals with no survey response for any of the years of included AHA study period (2017-2019 here). First, a general summary of Percent Missing Among the whole dataset.
ggplot(data = final2017_2019,
aes(x = percent.missing))+
geom_histogram() +
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary(final2017_2019$percent.missing)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 15.23 0.00 100.00 751
Looks like a number of counties with the percent of hospitals with missing information information is either 0 or 100. Specifically:
Now I remove these and re-examine missingness among this subset.
ggplot(data = subset(final2017_2019,
percent.missing != 0 &
percent.missing != 100),
aes(x = percent.missing))+
geom_histogram() +
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
test2<-subset(final2017_2019, percent.missing != 0 & percent.missing != 100)
summary(test2$percent.missing)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 25.00 50.00 39.23 50.00 79.00
A general summary of the percent of hospital beds from non-responding hospitals.
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted))+
geom_histogram() +
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary(final2017_2019$percent.hospital.beds.unaccounted)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 14.35 0.00 100.00 751
As expected, lots of 0s and 100s. Actually, the numbers of 0s and 100s should be nearly equivalent between the two methods since if all hospitals not reporting then will have all hospital beds unaccounted for and vice versa:
Now among the subset of counties with Percent Hospital Beds from non-responding hospitals not equal to 0 or 100, I present the summary of the percent of hospital beds missing.
test2 <-subset(final2017_2019, percent.hospital.beds.unaccounted != 0 &
percent.hospital.beds.unaccounted != 100)
ggplot(data = test2,
aes(x = percent.hospital.beds.unaccounted))+
geom_histogram() +
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary(test2$percent.hospital.beds.unaccounted)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 13.00 27.00 31.63 49.00 97.00
Interpretation:
Here I compare the two measures of county-level missingness on each access. Data is with counties with 0 and 100% missing removed since these are the essentialy the same for each method.
test2 <- test2%>%
mutate(difference = percent.hospital.beds.unaccounted - percent.missing)
ggplot(data = test2,
aes(x = percent.missing,
y = percent.hospital.beds.unaccounted))+
geom_point()+
geom_abline(slope = 1)
Here is a different way to look at the same data. For each county I subtract the Hospital Bed measure from the Hospital measure of non-response and present summary and graph of this difference. Here, a negative number indicates that there is a lower degree of missingness regarding hospital resources using the hospital bed measure.
ggplot(data = test2,
aes(x=difference))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary(test2$difference)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -50.000 -19.000 -9.000 -7.706 0.750 47.000
INterpretation:
Here I do a graphical examination exploring any associations of county-level hospital bed missingess with other variables.
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted,
y = total.hospitals))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted,
y = county.hospital.beds.total))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = subset(final2017_2019, county.hospital.beds.total<7500),
aes(x = percent.hospital.beds.unaccounted,
y = county.hospital.beds.total))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted,
y = county.adult.icu.beds))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = subset(final2017_2019, county.hospital.beds.total<7500),
aes(x = percent.hospital.beds.unaccounted,
y = adult_icubeds_per1k))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted,
y = county.adult.fte))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted,
y = total.thousands))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted,
y = perc.minority))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted,
y = median.fam.income))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted,
y = median.fam.income))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted,
y = perc.fam.poverty))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted,
y = income.disparity))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data = final2017_2019,
aes(x = percent.hospital.beds.unaccounted,
y = perc.noinsurance))+
geom_point() +
geom_smooth()+
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Here I examine a basic summary but with recategorizing counties based on their percent missingness. These counties data are labeled “unreliable”
Here if Percent Hospital Beds Missing > 0, then categorized as “unreliable”.
High (N=1051) | Low (N=871) | No Hospital (N=751) | Unreliable (N=547) | Total (N=3220) | |
---|---|---|---|---|---|
Total population, thousands | |||||
Mean (SD) | 128 (215) | 40 (100) | 17 (20) | 265 (694) | 102 (327) |
Range | 1 - 2050 | 1 - 2287 | 0 - 215 | 1 - 10082 | 0 - 10082 |
Here if Percent Hospital Beds Missing > 10, then categorized as “unreliable”.
High (N=1107) | Low (N=877) | No Hospital (N=751) | Unreliable (N=485) | Total (N=3220) | |
---|---|---|---|---|---|
Total population, thousands | |||||
Mean (SD) | 156 (309) | 41 (102) | 17 (20) | 218 (661) | 102 (327) |
Range | 1 - 4647 | 1 - 2287 | 0 - 215 | 1 - 10082 | 0 - 10082 |
Here if Percent Hospital Beds Missing > 25, then categorized as “unreliable”.
High (N=1171) | Low (N=883) | No Hospital (N=751) | Unreliable (N=415) | Total (N=3220) | |
---|---|---|---|---|---|
Total population, thousands | |||||
Mean (SD) | 205 (506) | 41 (102) | 17 (20) | 93 (188) | 102 (327) |
Range | 1 - 10082 | 1 - 2287 | 0 - 215 | 1 - 2182 | 0 - 10082 |
Here if Percent Hospital Beds Missing > 50, then categorized as “unreliable”.
High (N=1218) | Low (N=927) | No Hospital (N=751) | Unreliable (N=324) | Total (N=3220) | |
---|---|---|---|---|---|
Total population, thousands | |||||
Mean (SD) | 207 (499) | 47 (116) | 17 (20) | 58 (147) | 102 (327) |
Range | 1 - 10082 | 1 - 2287 | 0 - 215 | 1 - 2182 | 0 - 10082 |