pt_hist_explore_tbl <- raw_pt_history %>%
select(-patient_id) %>%
tbl_summary(
statistic = list(
all_categorical() ~ "{n} ({p}%)",
all_continuous() ~ c("{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}")
),
type = list(all_continuous() ~ 'continuous2'),
digits = list(all_categorical() ~ c(0, 1),
all_continuous2() ~ c(1, 1))
)
pt_hist_explore_tbl %>%
gtsummary::as_gt() %>%
gt::tab_header("Explore Patient History")
| Explore Patient History | |
| Characteristic | N = 4,2291 |
|---|---|
| currentSmoker | 2,090 (49.4%) |
| cigsPerDay | |
| Â Â Â Â Mean (SD) | 9.0 (11.9) |
| Â Â Â Â Median (Q1, Q3) | 0.0 (0.0, 20.0) |
| Â Â Â Â Min, Max | 0.0, 70.0 |
| Â Â Â Â Unknown | 29 |
| BPMeds | 124 (3.0%) |
| Â Â Â Â Unknown | 53 |
| prevalentStroke | 25 (0.6%) |
| prevalentHyp | 1,313 (31.0%) |
| diabetes | 109 (2.6%) |
| 1 n (%) | |
pt_clinic_explore_tbl <- raw_pt_clinic %>%
select(-patient_id) %>%
tbl_summary(
statistic = list(
all_categorical() ~ "{n} ({p}%)",
all_continuous() ~ c("{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}")
),
type = list(all_continuous() ~ 'continuous2'),
digits = list(all_categorical() ~ c(0, 1),
all_continuous2() ~ c(1, 1))
)
pt_clinic_explore_tbl %>%
gtsummary::as_gt() %>%
gt::tab_header("Explore Patient Clinical EHR")
| Explore Patient Clinical EHR | |
| Characteristic | N = 4,2311 |
|---|---|
| diagnosis_code | |
| Â Â Â Â | 287 (6.8%) |
| Â Â Â Â 124111000119102 | 112 (2.6%) |
| Â Â Â Â 15699201000119104 | 52 (1.2%) |
| Â Â Â Â 15699241000119102 | 52 (1.2%) |
| Â Â Â Â 15699281000119107 | 52 (1.2%) |
| Â Â Â Â 16320791000119106 | 39 (0.9%) |
| Â Â Â Â 193410003 | 78 (1.8%) |
| Â Â Â Â 193411004 | 39 (0.9%) |
| Â Â Â Â 193413001 | 52 (1.2%) |
| Â Â Â Â 193687000 | 39 (0.9%) |
| Â Â Â Â 231996009 | 78 (1.8%) |
| Â Â Â Â 232050001 | 78 (1.8%) |
| Â Â Â Â 232051002 | 78 (1.8%) |
| Â Â Â Â 232052009 | 52 (1.2%) |
| Â Â Â Â 232053004 | 52 (1.2%) |
| Â Â Â Â 232054005 | 51 (1.2%) |
| Â Â Â Â 232055006 | 52 (1.2%) |
| Â Â Â Â 232061009 | 39 (0.9%) |
| Â Â Â Â 232063007 | 61 (1.4%) |
| Â Â Â Â 232064001 | 61 (1.4%) |
| Â Â Â Â 232065000 | 61 (1.4%) |
| Â Â Â Â 24704003 | 39 (0.9%) |
| Â Â Â Â 267613004 | 52 (1.2%) |
| Â Â Â Â 28835009 | 52 (1.2%) |
| Â Â Â Â 312898002 | 39 (0.9%) |
| Â Â Â Â 312921000 | 78 (1.8%) |
| Â Â Â Â 312925009 | 78 (1.8%) |
| Â Â Â Â 312927001 | 39 (0.9%) |
| Â Â Â Â 314270008 | 61 (1.4%) |
| Â Â Â Â 360455002 | 73 (1.7%) |
| Â Â Â Â 362.53 | 78 (1.8%) |
| Â Â Â Â 362.74 | 39 (0.9%) |
| Â Â Â Â 368.54 | 39 (0.9%) |
| Â Â Â Â 368.61 | 39 (0.9%) |
| Â Â Â Â 390936003 | 38 (0.9%) |
| Â Â Â Â 408847006 | 99 (2.3%) |
| Â Â Â Â 408848001 | 99 (2.3%) |
| Â Â Â Â 408849009 | 87 (2.1%) |
| Â Â Â Â 408850009 | 87 (2.1%) |
| Â Â Â Â 408851008 | 87 (2.1%) |
| Â Â Â Â 415297005 | 112 (2.6%) |
| Â Â Â Â 54122009 | 38 (0.9%) |
| Â Â Â Â 56852002 | 38 (0.9%) |
| Â Â Â Â 66892 | 28 (0.7%) |
| Â Â Â Â 66894 | 28 (0.7%) |
| Â Â Â Â 66999 | 42 (1.0%) |
| Â Â Â Â 70099003 | 65 (1.5%) |
| Â Â Â Â 711162004 | 73 (1.7%) |
| Â Â Â Â 711482008 | 38 (0.9%) |
| Â Â Â Â 715562001 | 39 (0.9%) |
| Â Â Â Â 716663009 | 39 (0.9%) |
| Â Â Â Â 718718009 | 38 (0.9%) |
| Â Â Â Â 719297006 | 39 (0.9%) |
| Â Â Â Â 719431007 | 52 (1.2%) |
| Â Â Â Â 719520001 | 78 (1.8%) |
| Â Â Â Â 722066001 | 39 (0.9%) |
| Â Â Â Â 737579002 | 39 (0.9%) |
| Â Â Â Â 763387005 | 78 (1.8%) |
| Â Â Â Â 764452004 | 38 (0.9%) |
| Â Â Â Â 764939004 | 39 (0.9%) |
| Â Â Â Â 765191009 | 39 (0.9%) |
| Â Â Â Â 95501007 | 38 (0.9%) |
| Â Â Â Â H35.35 | 78 (1.8%) |
| Â Â Â Â H35.351 | 78 (1.8%) |
| Â Â Â Â H35.352 | 78 (1.8%) |
| Â Â Â Â H35.353 | 78 (1.8%) |
| Â Â Â Â H35.359 | 77 (1.8%) |
| Â Â Â Â H35.52 | 39 (0.9%) |
| Â Â Â Â H53.51 | 39 (0.9%) |
| Â Â Â Â H53.63 | 39 (0.9%) |
| totChol | |
| Â Â Â Â Mean (SD) | 236.7 (44.6) |
| Â Â Â Â Median (Q1, Q3) | 234.0 (206.0, 263.0) |
| Â Â Â Â Min, Max | 107.0, 696.0 |
| Â Â Â Â Unknown | 50 |
| sysBP | |
| Â Â Â Â Mean (SD) | 132.4 (22.0) |
| Â Â Â Â Median (Q1, Q3) | 128.0 (117.0, 144.0) |
| Â Â Â Â Min, Max | 83.5, 295.0 |
| diaBP | |
| Â Â Â Â Mean (SD) | 82.9 (11.9) |
| Â Â Â Â Median (Q1, Q3) | 82.0 (75.0, 90.0) |
| Â Â Â Â Min, Max | 48.0, 142.5 |
| BMI | |
| Â Â Â Â Mean (SD) | 25.8 (4.1) |
| Â Â Â Â Median (Q1, Q3) | 25.4 (23.1, 28.0) |
| Â Â Â Â Min, Max | 15.5, 56.8 |
| Â Â Â Â Unknown | 19 |
| heartRate | |
| Â Â Â Â Mean (SD) | 75.9 (12.0) |
| Â Â Â Â Median (Q1, Q3) | 75.0 (68.0, 83.0) |
| Â Â Â Â Min, Max | 44.0, 143.0 |
| Â Â Â Â Unknown | 1 |
| glucose | |
| Â Â Â Â Mean (SD) | 82.0 (24.0) |
| Â Â Â Â Median (Q1, Q3) | 78.0 (71.0, 87.0) |
| Â Â Â Â Min, Max | 40.0, 394.0 |
| Â Â Â Â Unknown | 386 |
| TenYearCHD | 641 (15.2%) |
| 1 n (%) | |
pt_note_explore_tbl <- raw_pt_note %>%
select(Note) %>%
tbl_summary(
statistic = list(
all_categorical() ~ "{n} ({p}%)",
all_continuous() ~ c("{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}")
),
type = list(all_continuous() ~ 'continuous2'),
digits = list(all_categorical() ~ c(0, 1),
all_continuous2() ~ c(1, 1))
)
pt_note_explore_tbl %>%
gtsummary::as_gt() %>%
gt::tab_header("Explore Patient Notes")
| Explore Patient Notes | |
| Characteristic | N = 2,6111 |
|---|---|
| Note | |
| Â Â Â Â chest pain | 47 (1.8%) |
| Â Â Â Â F/U to procedure | 286 (11.0%) |
| Â Â Â Â insurance denied new medicaiton | 190 (7.3%) |
| Â Â Â Â medication review | 380 (14.6%) |
| Â Â Â Â Nausea, indigestion, heartburn (PRN) | 239 (9.2%) |
| Â Â Â Â patient often fatigued | 47 (1.8%) |
| Â Â Â Â patient reports being out of breath, feeling pain in the chest | 190 (7.3%) |
| Â Â Â Â patient reports feeling pressure in the chest off and on | 190 (7.3%) |
| Â Â Â Â patient reports fluttering and irregular pain | 190 (7.3%) |
| Â Â Â Â patient reports pain in arm but otherwise says they feel fine | 48 (1.8%) |
| Â Â Â Â patient reports pain in foot but otherwise says they feel fine | 95 (3.6%) |
| Â Â Â Â patient reports pain in leg but otherwise says they feel fine | 47 (1.8%) |
| Â Â Â Â PRO: tightness in chest | 188 (7.2%) |
| Â Â Â Â routine visit and labwork | 286 (11.0%) |
| Â Â Â Â we discussed improving diet and getting more exercise | 47 (1.8%) |
| Â Â Â Â we discussed smoking and family history and I counseled them to lose weight | 141 (5.4%) |
| 1 n (%) | |
pt_demo_explore_tbl <- raw_pt_demo %>%
select(-patient_id, -male) %>%
tbl_summary(
statistic = list(
all_categorical() ~ "{n} ({p}%)",
all_continuous() ~ c("{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}")
),
type = list(all_continuous() ~ 'continuous2'),
digits = list(all_categorical() ~ c(0, 1),
all_continuous2() ~ c(1, 1))
)
pt_demo_explore_tbl %>%
gtsummary::as_gt() %>%
gt::tab_header("Explore Patient Demographics")
| Explore Patient Demographics | |
| Characteristic | N = 4,2401 |
|---|---|
| age | |
| Â Â Â Â Mean (SD) | 49.6 (8.6) |
| Â Â Â Â Median (Q1, Q3) | 49.0 (42.0, 56.0) |
| Â Â Â Â Min, Max | 32.0, 70.0 |
| education | |
| Â Â Â Â 1 | 1,720 (41.6%) |
| Â Â Â Â 2 | 1,253 (30.3%) |
| Â Â Â Â 3 | 689 (16.7%) |
| Â Â Â Â 4 | 473 (11.4%) |
| Â Â Â Â Unknown | 105 |
| sex | |
| Â Â Â Â female | 2,420 (57.1%) |
| Â Â Â Â male | 1,820 (42.9%) |
| 1 n (%) | |
create table diabetes_count as
WITH history AS ( SELECT patient_id FROM raw_pt_history WHERE diabetes = 1 )
, demo AS ( SELECT patient_id FROM raw_pt_demo WHERE age < 75 —- not including age 75 )
, clinic AS ( SELECT patient_id FROM raw_pt_clinic WHERE diagnosis_code IN (‘408850009’, ‘232063007’, ‘232053004’) AND totChol BETWEEN 185 and 230 AND diaBP > 100 —- not including 100 )
select count(h.patient_id) FROM history as h INNER JOIN demo as d ON h.patient_id = d.patient_id INNER JOIN clinic as C ON h.patient_id = c.patient_id
all_pts_all_tbls <- rbind(raw_pt_history %>% select(patient_id),
raw_pt_demo %>% select(patient_id),
raw_pt_clinic %>% select(patient_id),
raw_pt_note %>% select(patient_id) ) %>%
distinct()
# there are no patients that meet all criteria
diabetes_pts <- raw_pt_history %>%
filter(diabetes == 1) %>%
inner_join(
raw_pt_demo %>% filter(age < 75), by = 'patient_id'
) %>%
inner_join(
raw_pt_clinic %>%
filter(diagnosis_code == '408850009' |
diagnosis_code == '232063007' |
diagnosis_code == '232053004') %>%
filter(totChol >= 185 & totChol <= 230 & diaBP > 100 ), by = 'patient_id'
) %>%
mutate( diab_flag = 1) %>%
distinct(patient_id, diab_flag)
# lets give counts of each criteria instead:
diabetes_tbl <- raw_pt_history %>%
inner_join(
raw_pt_demo, by = 'patient_id'
) %>%
inner_join(raw_pt_clinic, by = 'patient_id') %>%
mutate(
code408850009 = case_when(
diagnosis_code == '408850009' ~ 1,
T ~ 0
),
code232063007 = case_when(
diagnosis_code == '232063007' ~ 1,
T ~ 0
),
code232053004 = case_when(
diagnosis_code == '232053004' ~ 1,
T ~ 0
),
diabetes_cholesteral = case_when(
totChol >= 185 & totChol <= 230 ~ 1,
T ~ 0
),
diabetes_bp = case_when(
diaBP > 100 ~ 1,
T ~ 0
),
age_under75 = case_when(
age < 75 ~ 1,
T ~ 0
)
) %>%
select( diabetes, code408850009, code232063007, code232053004, diabetes_cholesteral,
diabetes_bp, age_under75)
label(diabetes_tbl$diabetes) <- "Diabetes"
label(diabetes_tbl$code408850009) <- "408850009"
label(diabetes_tbl$code232063007) <- "232063007"
label(diabetes_tbl$code232053004) <- "232053004"
label(diabetes_tbl$diabetes_cholesteral) <- "Total cholesterol between 185 and 230"
label(diabetes_tbl$diabetes_bp) <- "Diastolic blood pressure reading of over 100"
label(diabetes_tbl$age_under75) <- "Age under 75"
diabetes_counts <- diabetes_tbl %>%
tbl_summary()
diabetes_counts %>%
gtsummary::as_gt() %>%
gt::tab_header("Counts of diabetes criteria")
| Counts of diabetes criteria | |
| Characteristic | N = 4,2201 |
|---|---|
| Diabetes | 109 (2.6%) |
| 408850009 | 87 (2.1%) |
| 232063007 | 61 (1.4%) |
| 232053004 | 52 (1.2%) |
| Total cholesterol between 185 and 230 | 1,519 (36%) |
| Diastolic blood pressure reading of over 100 | 317 (7.5%) |
| Age under 75 | |
| Â Â Â Â 1 | 4,220 (100%) |
| 1 n (%) | |
ha_notes <- raw_pt_note %>%
select(patient_id, Note) %>%
mutate(Note = tolower(Note) ) %>%
filter(str_detect(Note, paste0(c("pain","fluttering","pressure","tightness"), collapse = '|') )) %>%
mutate( HA_flag = 1)
ha_notes_count <- ha_notes %>%
summarize( HA_count = n_distinct(patient_id))
task4a <- all_tbls %>%
inner_join(ha_notes %>% select(patient_id, HA_flag), by = 'patient_id') %>%
filter(diagnosis_code == '232065000' | diagnosis_code == 'H35.52' ) %>%
filter(HA_flag == 1) %>%
filter(sex == 'male') %>%
summarize( HA_count = n_distinct(patient_id))
task4b <- all_tbls %>%
left_join(ha_notes %>% select(patient_id, HA_flag), by = 'patient_id') %>%
filter(diagnosis_code == '232065000' | diagnosis_code == 'H35.52' ) %>%
filter(is.na(HA_flag)) %>%
summarize( HA_count = n_distinct(patient_id))
chd_tbl <- all_tbls %>%
filter(!is.na(TenYearCHD) & !is.na(sex))
chd_by_sex <- chd_tbl %>%
tbl_summary(
include = c(TenYearCHD, sex),
by = 'sex'
) %>%
add_p()
chd_by_sex %>%
gtsummary::as_gt() %>%
gt::tab_header("Ten year CHD by sex")
| Ten year CHD by sex | |||
| Characteristic | female N = 2,4151 |
male N = 1,8161 |
p-value2 |
|---|---|---|---|
| TenYearCHD | 299 (12%) | 342 (19%) | <0.001 |
| 1 n (%) | |||
| 2 Pearson’s Chi-squared test | |||
diabetes_cig_tbl <- all_tbls %>%
filter(!is.na(cigsPerDay) & !is.na(diabetes)) %>%
mutate(Diabetes = case_when(
diabetes == 1 ~ 'Diabetic',
T ~ 'Non-Diabetic'
))
# model
m1 <- glm(diabetes ~ cigsPerDay, diabetes_cig_tbl, family = binomial)
cigs_diabets_tbl <- tbl_regression(m1, exponentiate = TRUE)
# figure
p <- ggplot(diabetes_cig_tbl, aes(x=Diabetes, y=cigsPerDay, fill=Diabetes)) +
geom_boxplot() +
stat_summary(fun.y=mean, geom="point", shape=23, size=4) +
theme_classic()
cigs_diabets_tbl %>%
gtsummary::as_gt() %>%
gt::tab_header("Prediction model diabetes by cigarettes per day")
| Prediction model diabetes by cigarettes per day | |||
| Characteristic | OR | 95% CI | p-value |
|---|---|---|---|
| cigsPerDay | 0.98 | 0.96, 1.00 | 0.017 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
p