to do list:
endo_data <- read_csv("BloomEndoscopyStudy_DATA_2021-02-25_0904.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## appt_dte = col_date(format = ""),
## not_approached_note = col_character(),
## declined_note = col_character(),
## date_enrolled = col_date(format = ""),
## other_prep = col_character(),
## edg_result_note = col_character(),
## colo_notes = col_character(),
## other_cirrh_dx = col_character(),
## hcv_load = col_character(),
## hbv_load = col_character(),
## comorbid_med_conditions = col_character(),
## med_list = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
endo_data %>%
mutate(d1_mean = case_when(d1_mean == 0 ~ NA_real_,
d1_mean != 0 ~ d1_mean),
d2_mean = case_when(d2_mean == 0 ~ NA_real_,
d2_mean != 0 ~ d2_mean),
ileum1_mean = case_when(ileum1_mean == 0 ~ NA_real_,
ileum1_mean != 0 ~ ileum1_mean),
ileum2_mean = case_when(ileum2_mean == 0 ~ NA_real_,
ileum2_mean != 0 ~ ileum2_mean),
colo1_mean = case_when(colo1_mean == 0 ~ NA_real_,
ileum1_mean != 0 ~ colo1_mean),
colo2_mean = case_when(colo2_mean == 0 ~ NA_real_,
colo2_mean != 0 ~ colo2_mean)) %>%
rowwise() %>%
mutate(duod_teer_ohmcm2 = mean(c(d1_mean, d2_mean),
na.rm = TRUE),
ileum_teer_ohmcm2 = mean(c(ileum1_mean, ileum2_mean),
na.rm = TRUE),
colo_teer_ohmcm2 = mean(c(colo1_mean, colo2_mean,
na.rm = TRUE))) ->
endo_data_clean
endo_data_clean %>%
select(age, sex, hx_varices, phtn, ascites, he, meld) %>%
tbl_summary()
## Warning: The `.dots` argument of `group_by()` is deprecated as of dplyr 1.0.0.
| Characteristic | N = 591 |
|---|---|
| age | 59 (55, 68) |
| Unknown | 43 |
| sex | 10 (62%) |
| Unknown | 43 |
| hx_varices | 11 (69%) |
| Unknown | 43 |
| phtn | 13 (81%) |
| Unknown | 43 |
| ascites | 8 (50%) |
| Unknown | 43 |
| he | 3 (19%) |
| Unknown | 43 |
| meld | |
| 6 | 1 (6.2%) |
| 7 | 6 (38%) |
| 8 | 2 (12%) |
| 10 | 2 (12%) |
| 11 | 3 (19%) |
| 12 | 1 (6.2%) |
| 141 | 1 (6.2%) |
| Unknown | 43 |
|
1
Median (IQR); n (%)
|
|
Pretty small N (mostly duodenum), so not expecting much.
endo_data_clean %>%
select(study_id, phtn, he, probiotics_4wks, lactulose_4wks, contains("ohmcm2")) %>%
pivot_longer(cols = contains("ohmcm2"),
names_to = "location",
values_to = "teer_ohmcm2") %>%
separate(location, sep = "_",
into = c("location", "teer", "units")) %>%
select(study_id, phtn, he, probiotics_4wks, lactulose_4wks, location, teer_ohmcm2) %>%
filter(!is.na(teer_ohmcm2)) ->
location_teer
location_teer %>%
tabyl(location) %>%
flextable::flextable()
location | n | percent |
colo | 2 | 0.125 |
duod | 12 | 0.750 |
ileum | 2 | 0.125 |
Lots of duodenum.
On to modeling!
location_teer %>%
lm(formula = teer_ohmcm2 ~ location, data = .) ->
teer_model
print("colon is reference category")
## [1] "colon is reference category"
teer_model %>%
broom::tidy() %>%
flextable()
term | estimate | std.error | statistic | p.value |
(Intercept) | 9.374500 | 2.026709 | 4.6254789 | 0.0004751878 |
locationduod | 5.064875 | 2.189096 | 2.3136834 | 0.0376890605 |
locationileum | -2.755125 | 2.866199 | -0.9612468 | 0.3539672743 |
# with gtsummary::tbl_regression
teer_model %>%
tbl_regression()
| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| location | |||
| colo | — | — | |
| duod | 5.1 | 0.34, 9.8 | 0.038 |
| ileum | -2.8 | -8.9, 3.4 | 0.4 |
|
1
CI = Confidence Interval
|
|||
teer_model %>%
broom::glance() %>%
flextable()
r.squared | adj.r.squared | sigma | statistic | p.value | df | logLik | AIC | BIC | deviance | df.residual | nobs |
0.5529703 | 0.4841965 | 2.866199 | 8.040421 | 0.005335639 | 2 | -37.88969 | 83.77938 | 86.86974 | 106.7963 | 13 | 16 |
location_teer %>%
lm(formula = teer_ohmcm2 ~ location +
# phtn +
# he +
# probiotics_4wks +
# lactulose_4wks +
study_id +
NULL,
data = .) ->
teer_model
print("colon is reference category")
## [1] "colon is reference category"
teer_model %>%
broom::tidy() %>%
flextable()
term | estimate | std.error | statistic | p.value |
(Intercept) | -26.26449005 | 48.5754568 | -0.5406947 | 0.59861118 |
locationduod | 4.62831722 | 2.3068666 | 2.0063220 | 0.06789744 |
locationileum | -2.75512500 | 2.9183815 | -0.9440592 | 0.36375979 |
study_id | 0.07937414 | 0.1080882 | 0.7343460 | 0.47684277 |
teer_model %>%
broom::glance() %>%
flextable()
r.squared | adj.r.squared | sigma | statistic | p.value | df | logLik | AIC | BIC | deviance | df.residual | nobs |
0.5721953 | 0.4652441 | 2.918381 | 5.35006 | 0.01429469 | 3 | -37.53803 | 85.07605 | 88.939 | 102.2034 | 12 | 16 |