library(data.table)
This method can also be applied to forecast severe cases in any US county using the same data sources.
These numbers are based on data from the Chinese CDC (http://weekly.chinacdc.cn/en/article/id/e53946e2-c6c4-41e9-9a9b-fea8db1a8f51).
I assume we can use these rates to estimate a lower bound of US hospitalizations by comorbidity.
I am currently only including the Hypertension and Diabetes comorbidities because they are the only ones I have conditional distributions for by age bucket in the US.
cond_fatality_rates = fread('cond_fatality_rates.csv')
cond_fatality_rates
## comorbidity case_fatality_rate denominator
## 1: Hypertension 0.06000745 2683
## 2: Diabetes 0.07259528 1102
## 3: Not Hypertension Nor Diabetes 0.02161259 40717
These numbers are based on a simple aggregatation of the public NHANES dataset (https://cran.r-project.org/web/packages/NHANES/NHANES.pdf).
The table contains the proportion of individuals by age decade that have Hypertension, Diabetes, both, or neither.
Hypertension definition: Hypertension was defined as mean systolic BP ≥140 and/or mean diastolic BP ≥90 mmHg https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3589732/#R4 NOTE: Don’t know if participants in NHANES are taking medication to lower BP
Diabetes definition: Currently just using self-reported Diabetes indicator in NHANES data, defined as Study participant told by a doctor or health professional that they have diabetes. NOTE: This might be an underestimate.
Only including NHANES observations that have values for AgeDecade, BPSysAve, BPDiaAve, Diabetes
nhanes_conditional_pdf = fread('nhanes_conditional_pdf.csv')
nhanes_conditional_pdf
## AgeDecade pHBPandDB pHBP pDB neither nhanes_denominator
## 1: 0-9 0.000000000 0.003891051 0.000000000 0.9961089 257
## 2: 10-19 0.000000000 0.008333333 0.012878788 0.9787879 1320
## 3: 20-29 0.001529052 0.027522936 0.009174312 0.9648318 1308
## 4: 30-39 0.003152088 0.073286052 0.032308905 0.8975571 1269
## 5: 40-49 0.015463918 0.106774669 0.069219440 0.8394698 1358
## 6: 50-59 0.023034154 0.169976172 0.138204925 0.7148531 1259
## 7: 60-69 0.065315315 0.233108108 0.228603604 0.6036036 888
## 8: 70+ 0.081272085 0.318021201 0.238515901 0.5247350 566
Data source:
Census County Population by Characteristics: 2010-2018 (https://www.census.gov/data/datasets/time-series/demo/popest/2010s-counties-detail.html?)
Using 2018 population estimates. I combined the 5-year age buckets into 10-year buckets, and collapsed all the buckets for ages 70+ so they line up with NHANES data.
est_2018_w_brackets = fread('census_ca_county_age_populations.csv')
FOCAL_COUNTY = "Santa Clara County"
focal_county_pop = est_2018_w_brackets[CTYNAME == FOCAL_COUNTY, .(pop, age_range)]
focal_county_pop
## pop age_range
## 1: 231605 0-9
## 2: 237135 10-19
## 3: 278072 20-29
## 4: 302185 30-39
## 5: 270163 40-49
## 6: 252777 50-59
## 7: 186657 60-69
## 8: 178976 70+
I assume everyone in the county will be infected by COVID-19 here. You can scale the estimate down by your expected infection rate.
For now, assume population without hypertension/diabetes have the samecase fatality rate as everyone in CCDC data without hypertension/diabetes (~2.2%).There is probably a smarter way to combine fatality rates for age, gender, comorbidity that we can use to get better estimates of severe cases by demographics.
Steps to get simple estimate of severe cases:
Step 1: Join the county population distribution to conditional comobidities by age.
join_pop_comorb = merge(focal_county_pop, nhanes_conditional_pdf, by.x='age_range', by.y='AgeDecade')
join_pop_comorb
## age_range pop pHBPandDB pHBP pDB neither
## 1: 0-9 231605 0.000000000 0.003891051 0.000000000 0.9961089
## 2: 10-19 237135 0.000000000 0.008333333 0.012878788 0.9787879
## 3: 20-29 278072 0.001529052 0.027522936 0.009174312 0.9648318
## 4: 30-39 302185 0.003152088 0.073286052 0.032308905 0.8975571
## 5: 40-49 270163 0.015463918 0.106774669 0.069219440 0.8394698
## 6: 50-59 252777 0.023034154 0.169976172 0.138204925 0.7148531
## 7: 60-69 186657 0.065315315 0.233108108 0.228603604 0.6036036
## 8: 70+ 178976 0.081272085 0.318021201 0.238515901 0.5247350
## nhanes_denominator
## 1: 257
## 2: 1320
## 3: 1308
## 4: 1269
## 5: 1358
## 6: 1259
## 7: 888
## 8: 566
Step 2: Aggregate resulting table by comorbidities to get estimated population within each.
melted_join_pop_comorb = melt(join_pop_comorb, id.vars = c('age_range','pop', 'nhanes_denominator'))
melted_join_pop_comorb = melted_join_pop_comorb[, .(est_pop = sum(pop*value)), by='variable']
melted_join_pop_comorb
## variable est_pop
## 1: pHBPandDB 38115.3
## 2: pHBP 204918.7
## 3: pDB 154363.0
## 4: neither 1616403.6
Step 3: Join the resulting table to the conditional fatality rates by comorbidity. For the population with several comorbidities (e.g. Diabetes and Hypertension), use the maximum conditional fatality rate of each comorbidity.
# Manually adding fatality rates here, automate later when adding more comorbidities
melted_join_pop_comorb[variable == "pHBPandDB",
case_fatality_rate := cond_fatality_rates[comorbidity %in% c("Hypertension", "Diabetes"), max(case_fatality_rate)]]
melted_join_pop_comorb[variable == "pHBP",
case_fatality_rate := cond_fatality_rates[comorbidity == "Hypertension", max(case_fatality_rate)]]
melted_join_pop_comorb[variable == "pDB",
case_fatality_rate := cond_fatality_rates[comorbidity == "Diabetes", max(case_fatality_rate)]]
melted_join_pop_comorb[variable == "neither",
case_fatality_rate := cond_fatality_rates[comorbidity == "Not Hypertension Nor Diabetes", max(case_fatality_rate)]]
melted_join_pop_comorb
## variable est_pop case_fatality_rate
## 1: pHBPandDB 38115.3 0.07259528
## 2: pHBP 204918.7 0.06000745
## 3: pDB 154363.0 0.07259528
## 4: neither 1616403.6 0.02161259
Step 4: Multiply the population in cormobidity group by the conditional fatality rate to get an estimated number of severe cases.
melted_join_pop_comorb[, est_severe_cases := est_pop * case_fatality_rate]
Step 5: Report the estimated number of severe cases overall and by comorbidity.
melted_join_pop_comorb[, est_severe_cases := est_pop * case_fatality_rate]
total_pop = melted_join_pop_comorb[, sum(est_pop)]
est_overall_severe_cases = melted_join_pop_comorb[, sum(est_pop * case_fatality_rate)]
melted_join_pop_comorb[, .(variable, est_severe_cases)]
## variable est_severe_cases
## 1: pHBPandDB 2766.991
## 2: pHBP 12296.648
## 3: pDB 11206.028
## 4: neither 34934.675
par(mar = c(0,0,0,0))
plot(c(0, 1), c(0, 1), ann = F, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
text(x = 0.5, y = 0.5, cex = 1.6, col = "black", paste0(
"Total estimated severe COVID-19 cases in\n", FOCAL_COUNTY, ": ", round(est_overall_severe_cases), ", \n",
"which is ", round(est_overall_severe_cases/total_pop*100, 2), "% of the overall county population")
)