Data from 2017 for FFS Medicare beneficiaries aged 65+ From CMS https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Chronic-Conditions/CC_Main
library(data.table)
library(knitr)
library(kableExtra)
cms_dt = fread('cms_medicare_county_65plus_ccs.csv')
# subset to relevant columns
ss_cols = c('State', 'County', 'FIPS', 'Hypertension', 'Diabetes', 'Atrial Fibrillation', 'Cancer',
'COPD', 'Asthma', 'Heart Failure', 'Hypertension',
'Ischemic Heart Disease', 'Stroke')
cms_dt = cms_dt[,..ss_cols]
str(cms_dt)
Will split proportion of population within each county that has each condition in order of fatality rate, so each member of the population is assigned to a single cormobidity from the CCDC data (the comorbidities with the highest fatality rate).
Load CCDC fatality rates by comorbidity
ccdc = fread('ccdc_output.csv')
ccdc = ccdc[group_type == 'Comorbidity']
ccdc = ccdc[order(case_fatality_rate, decreasing = T)]
ccdc %>% kable() %>% kable_styling()
| group_type | group_name | case_fatality_rate |
|---|---|---|
| Comorbidity | Cardiovascular disease | 0.105 |
| Comorbidity | Diabetes | 0.073 |
| Comorbidity | Chronic respiratory disease | 0.063 |
| Comorbidity | Hypertension | 0.060 |
| Comorbidity | Cancer | 0.056 |
| Comorbidity | None | 0.009 |
Assume the probability of CMS chronic conditions across CCDC comorbidities is independent. But that is not independent within CCDC cormobidities.
Order of sampling of CMS CCs:
In order, will assign remaining (fraction of county Medicare population * prevalence) and then subtract that result from remaining fraction.
# Define CCDC comorbidities that map to multiple CMS CCs
cms_dt[,`:=`(
`Cardiovascular disease` = pmax(`Atrial Fibrillation`, `Heart Failure`, `Ischemic Heart Disease`, Stroke),
`Chronic respiratory disease` = pmax(COPD, Asthma)
)]
# Sample population fractions in each cormobidity in order of case fatality rate
comorbidity_order = ccdc[order(case_fatality_rate, decreasing = T), group_name]
comorbidity_order = setdiff(comorbidity_order, c("Missing", "None"))
cms_dt[, remaining_proportion := 1]
for (x in comorbidity_order) {
print(x)
new_col_name = paste0('sampled_proportion_', x)
cms_dt[, (new_col_name) := remaining_proportion * pmax(as.numeric(get(x))/100,0, na.rm=TRUE)]
cms_dt[, remaining_proportion := remaining_proportion - remaining_proportion * pmax(as.numeric(get(x))/100,0, na.rm=TRUE)]
}
## [1] "Cardiovascular disease"
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## [1] "Diabetes"
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## [1] "Chronic respiratory disease"
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## [1] "Hypertension"
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## [1] "Cancer"
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
## Warning in pmax(as.numeric(get(x))/100, 0, na.rm = TRUE): NAs introduced by
## coercion
Subset to relevant columns and melt to long table
sample_cols = sapply(comorbidity_order, function(x) paste0('sampled_proportion_', x))
ss_cols = c("State", "County", "FIPS", sample_cols)
ss_dt = cms_dt[, ..ss_cols]
# rename column and add `None` column
setnames(ss_dt, old = sample_cols, sapply(sample_cols, function(x) gsub('sampled_proportion_', '', x)))
ss_dt[, None := 1 - `Cardiovascular disease` - Diabetes - `Chronic respiratory disease` - Hypertension - Cancer]
# melt table
melted_dt = melt(ss_dt, id.vars = c("State", "County", "FIPS"), variable.name = "comorbidity")
melted_dt[County == "Santa Clara"] %>% kable() %>% kable_styling()
| State | County | FIPS | comorbidity | value |
|---|---|---|---|---|
| California | Santa Clara | 6085 | Cardiovascular disease | 0.0820000 |
| California | Santa Clara | 6085 | Diabetes | 0.2423520 |
| California | Santa Clara | 6085 | Chronic respiratory disease | 0.0418902 |
| California | Santa Clara | 6085 | Hypertension | 0.3320891 |
| California | Santa Clara | 6085 | Cancer | 0.0253402 |
| California | Santa Clara | 6085 | None | 0.2763286 |
fwrite(melted_dt, 'output_cms_county_ccs_v3.csv')
str(melted_dt)
## Classes 'data.table' and 'data.frame': 19506 obs. of 5 variables:
## $ State : chr "National" "Alabama" "Alabama" "Alabama" ...
## $ County : chr "" "" "Autauga" "Baldwin" ...
## $ FIPS : int NA NA 1001 1003 1005 1007 1009 1011 1013 1015 ...
## $ comorbidity: Factor w/ 6 levels "Cardiovascular disease",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ value : num 0.095 0.095 0.045 0.042 0.088 0.066 0.05 0.311 0.087 0.093 ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "index")= int
## ..- attr(*, "__County")= int 1 2 71 106 123 200 260 326 336 342 ...