Data from 2017 for FFS Medicare beneficiaries aged 65+ From CMS https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Chronic-Conditions/CC_Main

library(data.table)
library(knitr)
library(kableExtra)

cms_dt = fread('cms_medicare_county_65plus_ccs.csv')

# subset to relevant columns
ss_cols = c('State', 'County', 'FIPS', 'Hypertension', 'Diabetes', 'Atrial Fibrillation', 'Cancer', 
            'COPD', 'Asthma', 'Heart Failure', 'Hypertension',
            'Ischemic Heart Disease', 'Stroke')
cms_dt = cms_dt[,..ss_cols]

str(cms_dt)

Will split proportion of population within each county that has each condition in order of fatality rate, so each member of the population is assigned to a single cormobidity from the CCDC data (the comorbidities with the highest fatality rate).

Load CCDC fatality rates by comorbidity

ccdc = fread('ccdc_output.csv')
ccdc = ccdc[group_type == 'Comorbidity']
ccdc = ccdc[order(case_fatality_rate, decreasing = T)]
ccdc  %>% kable() %>% kable_styling()
group_type group_name case_fatality_rate
Comorbidity Cardiovascular disease 0.105
Comorbidity Diabetes 0.073
Comorbidity Chronic respiratory disease 0.063
Comorbidity Hypertension 0.060
Comorbidity Cancer 0.056
Comorbidity None 0.009

Assume the probability of CMS chronic conditions across CCDC comorbidities is independent. But that is not independent within CCDC cormobidities.

Order of sampling of CMS CCs:

  1. Cardiovascular disease
  1. Diabetes
  2. Chronic respiratory disease
  1. Hypertension
  2. Cancer
  3. None

In order, will assign remaining (fraction of county Medicare population * prevalence) and then subtract that result from remaining fraction.

# Define CCDC comorbidities that map to multiple CMS CCs
cms_dt[,`:=`(
  `Cardiovascular disease` = pmax(`Atrial Fibrillation`, `Heart Failure`, `Ischemic Heart Disease`, Stroke),
  `Chronic respiratory disease` = pmax(COPD, Asthma)
)]

# Sample population fractions in each cormobidity in order of case fatality rate
comorbidity_order = ccdc[order(case_fatality_rate, decreasing = T), group_name]
comorbidity_order = setdiff(comorbidity_order, c("Missing", "None"))

cms_dt[, remaining_proportion := 1]
for (x in comorbidity_order) {
  print(x)
  new_col_name = paste0('sampled_proportion_', x)
  cms_dt[, (new_col_name) := remaining_proportion * pmax(as.numeric(get(x))/100,0, na.rm=TRUE)]
  cms_dt[, remaining_proportion := remaining_proportion - remaining_proportion * pmax(as.numeric(get(x))/100,0, na.rm=TRUE)]
}
## [1] "Cardiovascular disease"
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion

## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## [1] "Diabetes"
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion

## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## [1] "Chronic respiratory disease"
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion

## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## [1] "Hypertension"
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion

## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## [1] "Cancer"
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion

## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion

Subset to relevant columns and melt to long table

sample_cols = sapply(comorbidity_order, function(x) paste0('sampled_proportion_', x))
ss_cols = c("State", "County", "FIPS", sample_cols)
ss_dt = cms_dt[, ..ss_cols]

# rename column and add `None` column
setnames(ss_dt, old = sample_cols, sapply(sample_cols, function(x) gsub('sampled_proportion_', '', x)))
ss_dt[, None := 1 - `Cardiovascular disease` - Diabetes - `Chronic respiratory disease` - Hypertension - Cancer]

# melt table
melted_dt = melt(ss_dt, id.vars = c("State", "County", "FIPS"), variable.name = "comorbidity")

melted_dt[County == "Santa Clara"]  %>% kable() %>% kable_styling()
State County FIPS comorbidity value
California Santa Clara 6085 Cardiovascular disease 0.0820000
California Santa Clara 6085 Diabetes 0.2423520
California Santa Clara 6085 Chronic respiratory disease 0.0418902
California Santa Clara 6085 Hypertension 0.3320891
California Santa Clara 6085 Cancer 0.0253402
California Santa Clara 6085 None 0.2763286
fwrite(melted_dt, 'output_cms_county_ccs_v3.csv')
str(melted_dt)
## Classes 'data.table' and 'data.frame':   19506 obs. of  5 variables:
##  $ State      : chr  "National" "Alabama" "Alabama" "Alabama" ...
##  $ County     : chr  "" "" "Autauga" "Baldwin" ...
##  $ FIPS       : int  NA NA 1001 1003 1005 1007 1009 1011 1013 1015 ...
##  $ comorbidity: Factor w/ 6 levels "Cardiovascular disease",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ value      : num  0.095 0.095 0.045 0.042 0.088 0.066 0.05 0.311 0.087 0.093 ...
##  - attr(*, ".internal.selfref")=<externalptr> 
##  - attr(*, "index")= int 
##   ..- attr(*, "__County")= int  1 2 71 106 123 200 260 326 336 342 ...