Import data

https://www1.nyc.gov/site/doh/data/data-sets/community-health-survey-public-use-data.page for the codebook, other years, and further information about the dataset.

Import:

library(haven)
chs_list <- lapply(18:20, function(i){
  read_sas(paste0("https://www1.nyc.gov/assets/doh/downloads/sas/episrv/chs20", i, "_public.sas7bdat"))
})
names(chs_list) <- 2018:2020

Recoding:

library(dplyr)
for (i in 2018:2020){
  print(i)
  ichar <- as.character(i)
  chs_list[[ichar]] <- rename_with(chs_list[[ichar]], ~ gsub("18|18a|19|19a|20|20a|21|20_all|19_all", "", .x))
  chs_list[[ichar]]$year <- ichar
}
## [1] 2018
## [1] 2019
## [1] 2020
chs <- purrr::reduce(chs_list, full_join) %>%
  mutate_if(is.numeric, as.character) %>%
  mutate(across(c("daysalc30", "averagedrink", "wt_dual"), as.numeric))
library(table1)
table1(~ mhtreat + smoker + everyday + cpd + heavysmoker + smokecat + agegroup + birthsex + newrace + education + employment + imputed_povertygroup + daysalc30 + averagedrink + heavydrink + bingenew | year, data = chs)
2018
(N=10076)
2019
(N=8803)
2020
(N=8781)
Overall
(N=27660)
mhtreat
1 433 (4.3%) 1474 (16.7%) 1408 (16.0%) 3315 (12.0%)
2 520 (5.2%) 7295 (82.9%) 7342 (83.6%) 15157 (54.8%)
Missing 9123 (90.5%) 34 (0.4%) 31 (0.4%) 9188 (33.2%)
smoker
1 6584 (65.3%) 5862 (66.6%) 5884 (67.0%) 18330 (66.3%)
2 1245 (12.4%) 999 (11.3%) 942 (10.7%) 3186 (11.5%)
3 2207 (21.9%) 1892 (21.5%) 1927 (21.9%) 6026 (21.8%)
Missing 40 (0.4%) 50 (0.6%) 28 (0.3%) 118 (0.4%)
everyday
1 762 (7.6%) 584 (6.6%) 555 (6.3%) 1901 (6.9%)
2 483 (4.8%) 415 (4.7%) 387 (4.4%) 1285 (4.6%)
Missing 8831 (87.6%) 7804 (88.7%) 7839 (89.3%) 24474 (88.5%)
cpd
1 801 (7.9%) 642 (7.3%) 613 (7.0%) 2056 (7.4%)
2 406 (4.0%) 333 (3.8%) 312 (3.6%) 1051 (3.8%)
3 38 (0.4%) 24 (0.3%) 17 (0.2%) 79 (0.3%)
Missing 8831 (87.6%) 7804 (88.7%) 7839 (89.3%) 24474 (88.5%)
heavysmoker
1 247 (2.5%) 198 (2.2%) 159 (1.8%) 604 (2.2%)
2 998 (9.9%) 801 (9.1%) 783 (8.9%) 2582 (9.3%)
Missing 8831 (87.6%) 7804 (88.7%) 7839 (89.3%) 24474 (88.5%)
smokecat
1 483 (4.8%) 415 (4.7%) 387 (4.4%) 1285 (4.6%)
2 519 (5.2%) 392 (4.5%) 400 (4.6%) 1311 (4.7%)
3 243 (2.4%) 192 (2.2%) 155 (1.8%) 590 (2.1%)
4 8791 (87.2%) 7754 (88.1%) 7811 (89.0%) 24356 (88.1%)
Missing 40 (0.4%) 50 (0.6%) 28 (0.3%) 118 (0.4%)
agegroup
1 724 (7.2%) 654 (7.4%) 706 (8.0%) 2084 (7.5%)
2 3037 (30.1%) 3135 (35.6%) 3189 (36.3%) 9361 (33.8%)
3 3384 (33.6%) 2819 (32.0%) 2951 (33.6%) 9154 (33.1%)
4 2908 (28.9%) 2179 (24.8%) 1921 (21.9%) 7008 (25.3%)
Missing 23 (0.2%) 16 (0.2%) 14 (0.2%) 53 (0.2%)
birthsex
1 4338 (43.1%) 3855 (43.8%) 3850 (43.8%) 12043 (43.5%)
2 5682 (56.4%) 4918 (55.9%) 4902 (55.8%) 15502 (56.0%)
Missing 56 (0.6%) 30 (0.3%) 29 (0.3%) 115 (0.4%)
newrace
1 3484 (34.6%) 3056 (34.7%) 2859 (32.6%) 9399 (34.0%)
2 2300 (22.8%) 1943 (22.1%) 1837 (20.9%) 6080 (22.0%)
3 2895 (28.7%) 2411 (27.4%) 2457 (28.0%) 7763 (28.1%)
4 1126 (11.2%) 1127 (12.8%) 1340 (15.3%) 3593 (13.0%)
5 271 (2.7%) 266 (3.0%) 288 (3.3%) 825 (3.0%)
education
1 1555 (15.4%) 1152 (13.1%) 1223 (13.9%) 3930 (14.2%)
2 2148 (21.3%) 1779 (20.2%) 1809 (20.6%) 5736 (20.7%)
3 2127 (21.1%) 1855 (21.1%) 1691 (19.3%) 5673 (20.5%)
4 4176 (41.4%) 3976 (45.2%) 4009 (45.7%) 12161 (44.0%)
Missing 70 (0.7%) 41 (0.5%) 49 (0.6%) 160 (0.6%)
employment
1 4486 (44.5%) 4219 (47.9%) 3947 (44.9%) 12652 (45.7%)
2 918 (9.1%) 829 (9.4%) 773 (8.8%) 2520 (9.1%)
3 300 (3.0%) 246 (2.8%) 279 (3.2%) 825 (3.0%)
4 312 (3.1%) 306 (3.5%) 893 (10.2%) 1511 (5.5%)
5 511 (5.1%) 415 (4.7%) 355 (4.0%) 1281 (4.6%)
6 358 (3.6%) 355 (4.0%) 389 (4.4%) 1102 (4.0%)
7 2262 (22.4%) 1721 (19.6%) 1484 (16.9%) 5467 (19.8%)
8 861 (8.5%) 626 (7.1%) 601 (6.8%) 2088 (7.5%)
Missing 68 (0.7%) 86 (1.0%) 60 (0.7%) 214 (0.8%)
imputed_povertygroup
1 2436 (24.2%) 1975 (22.4%) 1970 (22.4%) 6381 (23.1%)
2 2141 (21.2%) 1748 (19.9%) 1813 (20.6%) 5702 (20.6%)
3 2155 (21.4%) 1427 (16.2%) 1533 (17.5%) 5115 (18.5%)
4 1642 (16.3%) 1718 (19.5%) 1485 (16.9%) 4845 (17.5%)
5 1702 (16.9%) 1935 (22.0%) 1980 (22.5%) 5617 (20.3%)
daysalc30
Mean (SD) 4.04 (7.05) 4.27 (7.06) 4.63 (7.72) 4.30 (7.28)
Median [Min, Max] 1.00 [0, 30.1] 1.00 [0, 30.1] 1.00 [0, 30.1] 1.00 [0, 30.1]
Missing 89 (0.9%) 93 (1.1%) 62 (0.7%) 244 (0.9%)
averagedrink
Mean (SD) 0.310 (0.770) 0.334 (0.817) 0.364 (1.02) 0.335 (0.871)
Median [Min, Max] 0.0333 [0, 20.0] 0.0333 [0, 33.3] 0.0333 [0, 32.1] 0.0333 [0, 33.3]
Missing 157 (1.6%) 138 (1.6%) 94 (1.1%) 389 (1.4%)
heavydrink
1 436 (4.3%) 432 (4.9%) 513 (5.8%) 1381 (5.0%)
2 9432 (93.6%) 8203 (93.2%) 8146 (92.8%) 25781 (93.2%)
Missing 208 (2.1%) 168 (1.9%) 122 (1.4%) 498 (1.8%)
bingenew
1 1473 (14.6%) 1444 (16.4%) 1319 (15.0%) 4236 (15.3%)
2 8426 (83.6%) 7197 (81.8%) 7366 (83.9%) 22989 (83.1%)
Missing 177 (1.8%) 162 (1.8%) 96 (1.1%) 435 (1.6%)
write.csv(chs, file = "CHS19-21.csv")