knitr::opts_chunk$set(
echo = TRUE,
warning = FALSE,
fig.align = "center",
dev = "png",
cache = TRUE,
error = FALSE,
include = TRUE,
message = FALSE)
library("labelled")
library("knitr")
library("gdtools")
library("rlang")
library("htmltools")
library("kableExtra")
library("reactable")
library("ggthemes")
library("naniar")
library("BlandAltmanLeh")
library("reactable")
library("ggsignif")
library("sjPlot")
library("readxl")
library("janitor")
library("skimr")
library("lubridate")
library("tableone")
library("finalfit")
library("scales")
library("ggsci")
library("glue")
library("blandr")
library("nortest")
library("CBCgrps")
library("gt")
library("here")
library("cowplot")
library("googlesheets4")
library("ggpubr")
library("htmltools")
# remotes::install_github("ddsjoberg/gtsummary")
# library("gtsummary")
# library("gtsummary")
library("gtsummary")
library("ggalluvial")
library("car")
library("broom")
library("labelled")
# remotes::install_github("jthomasmock/espnscrapeR")
library("espnscrapeR")
# install.packages("espnscrapeR")
library("tidyverse")
rm(list = ls())
n_wrap_cols <- 7
height_wrap <- 17
width_wrap <- 10
wrap_font_size <- 9
f_num <- 0
t_num <- 0
square_figure_size <- c(5, 5)
def_cardiotoxicity_decline <- 10
def_cardiotoxicity_cutoff <- 50
col_MIM <- "#00bfc4"
col_muga <- "#f8766d"
check_cross <- function(x, y) {
k <- length(x)
if ((min(x)>def_cardiotoxicity_cutoff & min(y)>def_cardiotoxicity_cutoff) | k==1)
return(FALSE)
diff <- numeric((k / 2) * (k - 1))
z <- 0
for (i in 1:(k - 1)) {
for (j in (i + 1):k) {
z <- z + 1
diff[z] <- min(x[j] - x[i], y[j] - y[i], x[j] - y[i], y[j] - x[i])
}
}
return(any(diff < (-def_cardiotoxicity_decline)))
}
# which.min(x[j] - x[i], y[j] - y[i], x[j] - y[i], y[j] - x[i])
check_cross_update <- function(x, y) {
k <- length(x)
if (k==1)
return(FALSE)
diff <- numeric((k / 2) * (k - 1))
z <- 0
for (i in 1:(k - 1)) {
for (j in (i + 1):k) {
z <- z + 1
v1 <- c(x[j], y[j], x[j], y[j])
v2 <- c(x[i], y[i], y[i], x[i])
v <- v1-v2
diff[z] <- min(v)
w <- which.min(v)
if(v1[w]>def_cardiotoxicity_cutoff) diff[z] <- 0
}
}
return(any(diff < (-def_cardiotoxicity_decline)))
}
check_both <- function(x, y=NULL) {
k <- length(x)
if (k==1)
return(FALSE)
diff <- numeric((k / 2) * (k - 1))
z <- 0
for (i in 1:(k - 1)) {
for (j in (i + 1):k) {
z <- z + 1
if (is.null(y)) {
v1=x[j]
v2=x[i]
} else {
v1 <- c(x[j], y[j], x[j], y[j])
v2 <- c(x[i], y[i], y[i], x[i])
}
v <- v1-v2
diff[z] <- min(v)
w <- which.min(v)
if(v1[w]>=def_cardiotoxicity_cutoff) diff[z] <- 0
}
}
return(any(diff < (-def_cardiotoxicity_decline)))
}
check <- function(x) {
k <- length(x)
if (min(x)>def_cardiotoxicity_cutoff | k == 1)
return(FALSE)
diff <- numeric((k / 2) * (k - 1))
z <- 0
for (i in 1:(k - 1)) {
for (j in (i + 1):k) {
z <- z + 1
diff[z] <- x[j] - x[i]
}
}
return(any(diff < (-def_cardiotoxicity_decline)))
}
f_add <- function() {
f_num <<- f_num + 1
}
t_add <- function() {
t_num <<- t_num + 1
}
get_n_obs <- function() {
return (ALL %>% nrow())
}
get_n_patients <- function() {
return (ALL %>% distinct(study_id) %>% nrow)
}
fix_table_header <- function(x) {
# "Overall" column present?
if ("stat_0" %in% colnames(x$table_body)) {
modify_header(
x,
stat_by = "**{level}**<br>N = {n}",
stat_0 = "**Overall**<br>N = {n}"
)
} else {
modify_header(
x,
stat_by = "**{level}**<br>N = {n}"
)
}
}
# source("loading_initial_files.R")
na_strings <- c("NA", "N A", "N / A", "N/A", "N/ A", "n/a","Not Available", "NOt available", "n.a", "n.a.","unknown","Unknown", "none", "")
load_file <- here("data","DefiningCancerTherap_DATA_LABELS_2021-04-05_1207.csv")
ALL <- load_file %>%
read_csv(na = na_strings) %>%
clean_names() %>%
remove_empty()
n_obs_initial <- get_n_obs()
n_patients_initial <- get_n_patients()
ALL <- ALL %>%
select(-contains("time_of_muga"),-contains("complete"),-contains("evaluator")) %>%
select(
-"first_name",
-"last_name",
-"zip_code",
-"other_comments",
-"date_echo_was_checked",
-"other_comments",
-"reason_for_exam_1",
-"date_visit_was_checked",
-"type_of_echocardiogram",-"echo_images_are_available_on_workstation",-"source_of_death_data_choice_other"
) %>%
# select(
# -insurance,
# -baseline_data_available,
# -if_other_please_describe,
# -(
# anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin:total_number_of_pembrolizumab_doses
# )
# ) %>%
# # select(-(coronary_artery_disease_choice_no:cancer_choice_unknown)) %>%
# # # select(-(cocaine_use:did_patient_have_a_visit_or_follow_up)) %>%
mutate_if(is.character, funs(str_replace_all(., "^Yes", "1"))) %>%
mutate_if(is.character, funs(str_replace_all(., "^No", "0"))) %>%
mutate_if(is.character, funs(str_replace_all(., "^Unchecked", "0"))) %>%
mutate_if(is.character, funs(str_replace_all(., "^Checked", "1"))) %>%
rename(
MIM = automatically_calculated_ef,
JS = automatically_calculated_ef_1,
visit_number = event_name,
MIM_date = date_of_muga_scan,
JS_date = date_of_muga_scan_1
) %>%
mutate(dose_mci = parse_number(as.character(dose))) %>%
mutate(
full_wrap_name = str_c("Patient ", ALL$study_id),
difference_in_ef = (JS - MIM),
difference_more_5_prc = ifelse(difference_in_ef > 5, 1, 0),
type_of_cancer = as.factor(type_of_cancer),
abs_difference_in_ef = abs(difference_in_ef),
visit_number = fct_relevel(visit_number, as.character(c(1:50))),
study_id = fct_relevel(as.factor(study_id), as.character(c(1:1500))),
did_patient_have_a_visit_or_follow_up = as.factor(did_patient_have_a_visit_or_follow_up),
vitals_available = as.factor(vitals_available)
) %>%
mutate(full_wrap_name=as.factor(full_wrap_name)) %>%
mutate(full_wrap_name=fct_reorder(full_wrap_name, as.numeric(study_id))) %>%
mutate(difference_between_modalities=JS-MIM) %>%
mutate(difference_betweem_modialieits_more_eq_5=ifelse(difference_between_modalities>=5,1,0)) %>%
mutate(difference_betweem_modialieits_more_eq_10=ifelse(difference_between_modalities>=10,1,0)) %>%
mutate(difference_betweem_modialieits_more_eq_15=ifelse(difference_between_modalities>=15,1,0)) %>%
mutate(visit_number = recode(visit_number, "Death" = "3")) %>%
mutate(type_of_cancer = recode(type_of_cancer, "Head and Neck" = "Other")) %>%
mutate_at(vars(contains("date")), as_date) %>%
mutate_at(vars(gender,race:baseline_data_available), as.factor) %>%
mutate_at(vars(cancer:type_of_cancer, day_of_the_week_performed_1), as.factor) %>%
mutate_at(vars(anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin:other), as.factor) %>%
mutate_at(vars(coronary_artery_disease_choice_no:ivda, number_of_frames), as.factor) %>%
mutate_at(vars(event_type_choice_routine_follow_up:what_was_the_cause_of_hospitalization_choice_other), as.factor) %>%
mutate_at(vars(discharge_summary_in_epic, muga_scan_performed, day_of_the_week_performed, scanner), as.factor) %>%
mutate(heart_rate_1=as.numeric(heart_rate_1)) %>%
mutate_at(vars(x9_fascicular_block_choice_none:x9_fascicular_block_choice_left_posterior_fascicular_block, echo_performed, did_patient_die), as.factor) %>%
mutate_at(vars(what_was_the_cause_of_death_choice_cardiac_myocardial_infarction:source_of_death_data_choice_social_security_death_index,
manual_correction, manual_correction_1, muga_scan_performed_1, difference_more_5_prc), as.factor) %>%
mutate_at(vars(contains("difference_betweem_modialieits")), as_factor) %>%
mutate(black = ifelse(race=="Black or African American",1,0)) %>%
mutate(docitaxel_abraxin_paclitaxel_combined_of_3 = ifelse((docetaxel_taxotere == 1 | abraxane_paclitaxel_albumin_stabilized_nanoparticle_formulation == 1),1,0)) %>%
mutate_at(vars(docitaxel_abraxin_paclitaxel_combined_of_3),as.factor) %>%
relocate(MIM, .after = visit_number) %>%
relocate(JS, .after = visit_number) %>%
relocate(type_of_cancer, .after = cancer) %>%
relocate(reason_for_exam, .after = cancer) %>%
relocate(docitaxel_abraxin_paclitaxel_combined_of_3, .before = docetaxel_taxotere)
ALL <- ALL %>%
mutate(
coronary_artery_disease=replace_na(coronary_artery_disease, 0),
prior_myocardial_infarction=replace_na(prior_myocardial_infarction, 0),
prior_pci_balloon_angioplasty_or_stent=replace_na(prior_pci_balloon_angioplasty_or_stent, 0),
coronary_artery_bypass_surgery=replace_na(coronary_artery_bypass_surgery, 0),
asthma=replace_na(asthma, 0),
copd=replace_na(copd, 0),
) %>%
mutate(cad_combined_of_4=ifelse((coronary_artery_disease == 1 | prior_myocardial_infarction == 1 |
prior_pci_balloon_angioplasty_or_stent == 1 | coronary_artery_bypass_surgery == 1),1,0)) %>%
mutate(pulm_disease_combined_of_2 = ifelse((asthma ==1 | copd == 1),1,0)) %>%
relocate(pulm_disease_combined_of_2, .before = asthma) %>%
relocate(cad_combined_of_4, .before = coronary_artery_disease)
ALL <- ALL %>% # remove non-cancer patients
group_by(study_id) %>%
fill(cancer, type_of_cancer, mrn, gender, date_of_birth, race, ethnicity) %>%
mutate(cancer=replace_na(cancer,1)) %>%
filter(cancer != 0)
n_obs_no_cancer <- get_n_obs()
n_patients_no_cancer <- get_n_patients()
ALL <- ALL %>% # remove patents with no MUGA on follow up
drop_na(JS_date)
n_obs_has_study_on_follow_up <- get_n_obs()
n_patients_has_study_on_follow_up <- get_n_patients()
ALL <- ALL %>% # remove patents with EF not loading on follow up
drop_na(MIM)
n_obs_has_ef_study_on_follow_up <- get_n_obs()
n_patients_has_ef_study_on_follow_up <- get_n_patients()
ALL <- ALL %>% # patients after exclusions from here on
add_count(study_id, name="n_total_visits") %>%
relocate(n_total_visits, .after=visit_number)
ALL <- ALL %>%
group_by(study_id) %>%
mutate(cardiotoxicity_algorithm_MIM=check_both(MIM)) %>%
mutate(cardiotoxicity_algorithm_JS=check_both(JS)) %>%
mutate(cardiotoxicity_algorithm_crossover=check_both(JS, MIM)) %>%
ungroup() %>%
mutate(muga_reclassified=ifelse(cardiotoxicity_algorithm_JS!=cardiotoxicity_algorithm_MIM,1,0)) %>%
mutate(cardiotox_any_pure=ifelse((cardiotoxicity_algorithm_MIM==1 | cardiotoxicity_algorithm_JS==1),1,0)) %>%
mutate_at(vars(contains("algorithm")), as.numeric)
ALL_DIST <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE)
n_obs_total_follow <- ALL %>% filter(n_total_visits>1) %>% nrow
n_patients_total_follow <- ALL_DIST %>% nrow
n_total <- n_patients_total_follow
# ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(cardiotoxicity_algorithm_MIM) %>% sum()
# ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(cardiotoxicity_algorithm_JS) %>% sum()
# ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(muga_reclassified) %>% sum()
#
# ALL_DIST %>%
# select(cardiotoxicity_algorithm_MIM, cardiotoxicity_algorithm_JS, muga_reclassified) %>% write_csv("taa.csv")
sign_number_perc <- 3
#pure
n_cardiot_JS <- ALL_DIST %>% summarize(sum(cardiotoxicity_algorithm_JS)) %>% pull()
n_cardiot_MIM <- ALL_DIST %>% summarize(sum(cardiotoxicity_algorithm_MIM)) %>% pull()
n_cardiot_any <-ALL_DIST %>% summarize(sum(cardiotox_any_pure)) %>% pull()
cardiot_JS_perc <- round(n_cardiot_JS/n_total,3)*100
cardiot_MIM_perc <- round(n_cardiot_MIM/n_total,sign_number_perc)*100
cardiot_any_perc <- round(n_cardiot_any/n_total,sign_number_perc)*100
#crossover
n_cardiotoxicity_any_crossover <- sum(ALL_DIST$cardiotoxicity_algorithm_crossover)
cardiotoxicity_any_crossover_perc <- round(n_cardiotoxicity_any_crossover/n_total,sign_number_perc)*100
ALL <- ALL %>% mutate(muga_reclassified_crossover_MIM = ifelse(cardiotoxicity_algorithm_crossover != cardiotoxicity_algorithm_MIM,1,0))
ALL <- ALL %>% mutate(muga_reclassified_crossover_JS = ifelse(cardiotoxicity_algorithm_crossover != cardiotoxicity_algorithm_JS,1,0))
ALL_DIST <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE)
f_add()
ALL <- ALL %>%
mutate(age_years = time_length(difftime(JS_date,date_of_birth), "year"))
BMI <- read_csv(here("data", "bmi.csv")) %>%
mutate(study_id=as.factor(study_id)) %>%
mutate(visit_number=as.factor(visit_number))
ALL <- ALL %>%
left_join(BMI, by=c("study_id","visit_number"))
ALL <- ALL %>%
# mutate(bmi_combined = ifelse(is_na(bmi.x),bmi,bmi.y)) %>%
# pull(bmi_combined)
mutate(
bmi_combined = case_when(
is.na(bmi.x) ~ bmi.y,
!is.na(bmi.x) ~ bmi.x
)
)
# ALL %>%
# mutate(across(where(c(anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin,herceptin_trastuzumab)), ~na_if(., "0")))
# #
# # ALL %>% view()
# ALL <- ALL %>%
# mutate(across((anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin:herceptin_trastuzumab), ~replace_na(.x, 0)))
# ALL$anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin
ALL <- ALL %>%
mutate(
across(anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin:lynparza_olaparib, ~replace_na(.x, 0))
)
# Or if you're pipe shy:
ALL <- ALL %>%
mutate(anthracycyclines = anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin) %>%
mutate(trastuzumab = herceptin_trastuzumab) %>%
mutate(alkylating_agents = cyclophosphamide) %>%
mutate(antimicrotubule_agents = ifelse(docetaxel_taxotere == 1 | paclitaxel_taxol == 1 | abraxane_paclitaxel_albumin_stabilized_nanoparticle_formulation ==1, 1,0)) %>%
mutate(antimetabolites = ifelse(tamoxifen_nolvadex_or_soltamox== 1 | methotrexate==1 | cytarabine==1 |x5_fu_fluorouracil_injection==1 | cladribine==1,1,0)) %>%
mutate(immuno_checkpoint_inhibitors =ifelse(pembrolizumab==1 | nivolumab==1 | ipilimumab==1 | atezolizumab==1 | avelumab==1 | durvalumab==1,1,0)) %>%
mutate(other = ifelse(etoposide ==1 | other ==1,1,0))
# mutate(ankylating_agents = ifelse(cisplatin ==1 | carboplatin == 1))
# ALL$abraxane_paclitaxel_albumin_stabilized_nanoparticle_formulation
# tabyl(ALL$etoposide)
# ALL$other
# ALL %>% view()
#
# ALL %>% view()
Initially we start with 719 Epic visits. Table and flowchart below summarizes patient numbers:
t_add()
theme_gtsummary_compact()
tribble(
~ Class,
~ Initial,
~ "N Cancer",
~ "N Follow up",
~ "N follow up MUGA EF laded",
~ "N in patients >=2 MUGAs",
"Number of obs.",
n_obs_initial,
n_obs_no_cancer,
n_obs_has_study_on_follow_up,
n_obs_has_ef_study_on_follow_up,
n_obs_total_follow,
"Number of patients",
n_patients_initial,
n_patients_no_cancer,
n_patients_has_study_on_follow_up,
n_patients_has_ef_study_on_follow_up,
n_patients_total_follow
) %>%
gt(
rowname_col = "aaaaaaaaaaaaaaaaa",
) %>%
gt_theme_538()
Class | Initial | N Cancer | N Follow up | N follow up MUGA EF laded | N in patients >=2 MUGAs |
---|---|---|---|---|---|
Number of obs. | 719 | 687 | 591 | 582 | 359 |
Number of patients | 322 | 312 | 312 | 310 | 87 |
# fmt_number(
# columns = vars(perc_with_cardiotoxicity),
# decimals = 1
# ) %>%
# tab_header("Table 1, Patient numbers") %>%
# cols_label(
# country_name = "Name",
# year = "Year",
# population = "Population"
# )
#
# kbl(caption = glue("Table {t_num}, Patient numbers")) %>%
# kable_classic(full_width = F, html_font = "Cambria")
# read PNG i wyswietl go potem, zamiast bezposrednio go ladowac tutaj (nizej)
#stworz funkcje ktora tworzy wykres i ma f_add
# ggsave("plot2.png", scale = 1, dpi=600)
Here is a plot with correlation of MUGA software and comparison between them:
Red dashed line is MIM EF = Jetstream EF. We see out best fit line is shifted to the left in relation to red dashed line, meaning that Jetstream EF is on average higher than MIM. Pretty good correlation with R = 0.876, with few ourliers, which we would need to investigate more. However, correlation studies show the relationship between one variable and another, not the differences, and are not recommended as a method for assessing the comparability between methods. (Giavarina 2015) (Bland and Altman 1986)
f_add()
theme_set(theme_pubr())
scatter <- ALL %>%
ggplot +
aes(MIM,JS) +
geom_point(
size = 2,
alpha = 0.5,
shape = 20,
# add = "reg.l
) +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.001)+
labs(
# title="Correlation plot between - MUGA modalities",
caption = " ",
y = "MUGA LVEF - JS",
x = "MUGA LVEF - MIM "
) +
scale_y_continuous(
breaks = breaks_width(20),
limits = c(15, 90),
labels = scales::percent_format(accuracy = 1, scale = 1)
) +
scale_x_continuous(
breaks = breaks_width(20),
limits = c(15, 90),
labels = scales::percent_format(accuracy = 1, scale = 1)
) +
geom_abline(
slope = 1,
intercept = 0,
colour = "red",
linetype = 2,
size = 0.75
) +
stat_smooth(
size = 1,
color = "black",
alpha = 0.5,
method = "lm"
)
bland_stats <- blandr.statistics (ALL$JS, ALL$MIM, sig.level = 0.95)
# summary(bland_stats)
text_up <- glue(
"+1.96 SD, mean = {mid} (95%CI {low}%-{up}%)",
up = round(bland_stats$upperLOA_upperCI, 1),
mid = round(bland_stats$upperLOA, 1),
low = round(bland_stats$upperLOA_lowerCI, 1)
)
text_down <- glue(
"+1.96 SD, mean = {mid} (95%CI {low}%-{up}%)",
up = round(bland_stats$lowerLOA_upperCI, 1),
mid = round(bland_stats$lowerLOA, 1),
low = round(bland_stats$lowerLOA_lowerCI, 1)
)
text_me <- glue(
"+1.96 SD, mean = {mid} (95%CI {low}%-{up}%)",
up = round(bland_stats$biasUpperCI, 1),
mid = round(bland_stats$bias, 1),
low = round(bland_stats$biasLowerCI, 1)
)
bland <-
blandr.draw(
ALL$JS,
ALL$MIM,
method1name = "Jetstream",
method2name = "MIM",
plotTitle = "",
annotate = TRUE,
normalHigh = TRUE
) +
labs(
y = "Difference MUGA LVEF between JS and MIM ",
x = "Mean LVEF JS and MIM"
# title="Correlation plot between - MUGA modalities",
# caption = glue("Figure {f_num}. Scatter plot and Bland-Altmann plot")
)+
scale_y_continuous(breaks=breaks_width(20),labels = scales::percent_format(accuracy = 1, scale = 1)) +
scale_x_continuous(breaks=breaks_width(20),labels = scales::percent_format(accuracy = 1, scale = 1)) +
annotate("text", x = 41.5, y = 21.5, label = text_up) +
annotate("text", x = 41, y = 11.5, label = text_me) +
annotate("text", x = 41, y = 1.5, label = text_down)
plot_grid(scatter, bland, labels = "AUTO")
The Bland–Altman plot is a method for comparing two measurements of the same variable. The concept is that X-axis is the mean of your two measurements, and the Y-axis is the difference between the two measurements. The chart can then highlight anomalies, for example, if one method always gives too high a result, then all points are above or below the zero line. It can also reveal that one method overestimates high values and underestimates low values. If the points on the Bland–Altman plot are scattered all over the place, above and below zero, then it suggests that there is no consistent bias of one approach versus the other. It is, therefore, a good first step for two measurement techniques of a variable. It shows that on average Jestrtram EF is 9.91% higher than MIM.
f_add()
END <- ALL %>%
select(study_id, visit_number, MIM, JS)
END <- pivot_longer(
END,
cols = c(MIM, JS),
names_to = "muga_type",
values_to = "ejection_fraction"
)
END$visit_number <- as.numeric(END$visit_number)
p <- ggboxplot(
END,
x = "muga_type",
y = "ejection_fraction",
color = "muga_type",
palette = c(col_MIM, col_muga, alpha = 1),
# palette = "jco",
add = "jitter",
add.params = list(alpha = 0.6, size = 1.5)
)
figure_boxplot <- p +
geom_hline(
yintercept = def_cardiotoxicity_cutoff,
linetype = "dashed",
color = "black",
size = 0.75)+
geom_signif(
comparisons = list(c("MIM", "JS")),
map_signif_level = FALSE,
textsize = 4,
annotation = c("p < 0.001") #XXXXXXX
) +
labs(
# title ="Comparison of ejection fraction between MUGA modalities",
# caption = glue(
# "Figure {f_num}. Comparison of ejection fractions between MUGA softwares"
# ),
# subtitle = "between MUGA modalities",
x = "MUGA Software",
y = "MUGA LVEF"
) +
scale_y_continuous(breaks = breaks_width(20),
labels = scales::percent_format(accuracy = 1, scale = 1)) +
theme(legend.position = "none") +
scale_x_discrete(labels = c("MIM", "JS"))
figure_trand <- ALL %>%
filter(n_total_visits > 2) %>%
select(study_id,
visit_number,
MIM,
JS,
MIM_date,
JS_date) %>%
group_by(study_id) %>%
mutate(date_diff_month = (MIM_date - MIM_date[1]) / 30.5) %>%
ungroup() %>%
mutate(date_diff_month = round(parse_number(as.character(date_diff_month)))) %>%
group_by(date_diff_month) %>%
mutate(date_diff_month = plyr::round_any(date_diff_month, 6)) %>%
pivot_longer(cols = c(JS, MIM)) %>%
group_by(name, date_diff_month) %>%
summarise(
median=median(value),
sd=sd(value),
n=n(),
se=ifelse((standard.error=TRUE), sd/sqrt(n)
, sd)
) %>%
filter(n>1) %>%
ggplot() +
aes(x = date_diff_month, y = median, color = name) +
geom_line(size = 1.5) +
geom_point(size = 2) +
labs(
title = "",
# caption = glue("Figure {f_num}. Errorbar represents standard error"),
x = "Months from baseline",
y = "LVEF"
) +
scale_y_continuous(label = label_percent(scale = 1, accuracy = 1)) +
scale_x_continuous(breaks = seq(0, 54, by = 6)) +
geom_errorbar(aes(ymin = median-se, ymax = median+se), width = 0.8) +
geom_ribbon(aes(ymin = median-sd, ymax = median+sd, fill=name), alpha=0.15, outline.type = "full", color=NA) +
# geom_ribbon(aes(ymin = MIM, ymax = JS)) +
geom_hline(
yintercept = def_cardiotoxicity_cutoff,
linetype = "dashed",
color = "black",
size = 0.5
) +
theme_pubr() +
theme(legend.title = element_blank())
plot_grid(figure_boxplot, figure_trand, labels="AUTO")
Clearly, Jet Stream has higher median, we knew that.
theme_set(theme_pubr())
my_comparisons <- list( c("Forte", "SKYLight"), c("Brightview", "Forte"), c("Brightview", "SKYLight"))
cameras_figure <-
ALL %>%
drop_na(scanner) %>%
ggplot() +
aes(y=difference_between_modalities, x=scanner, fill=scanner) +
geom_boxplot() +
# labs(title=paste("n =",nrow(ALL))) +
labs(y="Difference in LVEF between softwares") +
scale_y_continuous(labels = percent_format(accuracy = 1, scale = 1)) +
stat_compare_means(comparisons = my_comparisons) +
geom_hline(
yintercept = 10,
linetype = "dashed",
color = "black",
size = 0.5) +
theme(legend.position = "none") +
scale_fill_manual(values=c("gray95", "gray55", "gray27")) +
labs(x="")
my_comparisons2 <- list(c("16", "32"))
frames_figure <-
ALL %>%
drop_na(number_of_frames) %>%
ggplot() +
aes(y=difference_between_modalities, x=number_of_frames, fill=number_of_frames) +
geom_boxplot() +
labs(y="Difference in LVEF between softwares") +
scale_y_continuous(labels = percent_format(accuracy = 1, scale = 1)) +
# labs(title=paste("n =",nrow(ALL))) +
stat_compare_means(comparisons = my_comparisons2) +
geom_hline(
yintercept = 10,
linetype = "dashed",
color = "black",
size = 0.5) +
theme(legend.position = "none") +
scale_fill_manual(values=c("gray74", "gray58", "gray34")) +
labs(x="Number of Frames")
ALLI <-
readxl::read_xlsx("Interobserver variability_AR-GS.xlsx",
na = na_strings,
skip = 1)
ALLI <- clean_names(ALLI)
ALLI <- ALLI[, colSums(is.na(ALLI)) < nrow(ALLI)]
ALLI <- ALLI %>%
filter(!is.na(mrn)) %>%
filter(str_length(abstractor) < 3) %>%
select(case_no, abstractor, ef_5, ef_17) %>%
rename(JS_ef = ef_5) %>%
rename(MIM_ef = ef_17) %>%
mutate(MIM_ef = as.numeric(MIM_ef)) %>%
mutate(JS_ef = as.numeric(JS_ef)) %>%
mutate(MIM_ef = round(MIM_ef))
ALLI <- ALLI %>% pivot_wider(
names_from = abstractor, values_from = c(JS_ef, MIM_ef)
)
ALLS <- read.csv(here("data", "surrender_pivoted.csv"))
f_add()
interreader <- ALLS %>%
ggplot(aes(x = AR, y = GS, palette = "#d2d2d2")) +
geom_abline(
slope = 1,
intercept = 0,
colour = "red",
linetype = 2,
size = 0.5
) +
geom_point(size = 1.5, alpha = 0.5) +
labs(
x = "Observer 1, LVEF",
y = "Observer 2, LVEF"
# caption = glue("Figure {f_num}. inter-observer variation"
) +
stat_smooth(
size = 1,
color = "black",
alpha = 0.5,
method = "lm"
) +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.01) +
scale_y_continuous(breaks = breaks_width(10),
labels = scales::percent_format(accuracy = 1, scale = 1)) +
scale_x_continuous(breaks = breaks_width(10),
labels = scales::percent_format(accuracy = 1, scale = 1)) +
theme_pubr()
intra2 <- read_csv(here("data","intraobserver_variability_pivoted.csv")) %>%
clean_names() %>%
remove_empty()
intrareader <- intra2 %>%
ggplot(aes(x = ms1, y = ms2, palette = "#d2d2d2")) +
geom_abline(
slope = 1,
intercept = 0,
colour = "red",
linetype = 2,
size = 0.5
) +
geom_point(size = 1.5, alpha = 0.5) +
labs(
x = "Measurement 1, LVEF",
y = "Measurement 2, LVEF"
# caption = glue("Figure {f_num}. inter-observer variation"
) +
stat_smooth(
size = 1,
color = "black",
alpha = 0.5,
method = "lm"
) +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.01) +
scale_y_continuous(breaks = breaks_width(10),
labels = scales::percent_format(accuracy = 1, scale = 1)) +
scale_x_continuous(breaks = breaks_width(10),
labels = scales::percent_format(accuracy = 1, scale = 1)) +
theme_pubr()
plot_grid(cameras_figure, frames_figure, interreader, intrareader,labels = "AUTO")
An amazing plot
ALL %>%
drop_na(scanner) %>%
group_by(scanner) %>%
summarise(median(difference_between_modalities)) %>%
gt() %>%
gt_theme_538()
scanner | median(difference_between_modalities) |
---|---|
Brightview | 9.845 |
Forte | 10.560 |
SKYLight | 9.330 |
ALL %>%
drop_na(number_of_frames) %>%
group_by(number_of_frames) %>%
summarise(median(difference_between_modalities)) %>%
gt() %>%
gt_theme_538()
number_of_frames | median(difference_between_modalities) |
---|---|
16 | 10.585 |
32 | 9.810 |
END <- ALL %>%
group_by(study_id, n()) %>%
summarise(n(), study_id, visit_number, MIM, JS, full_wrap_name, n_total_visits) %>%
filter(`n()` > 1) %>%
ungroup()
END <- pivot_longer(
END,
cols = c(MIM, JS),
names_to = "muga_type",
values_to = "ejection_fraction"
)
END %>%
filter(n_total_visits > 2) %>%
filter(study_id %in% c(84,90,141,284)) %>%
ggplot(aes(x = visit_number, y = ejection_fraction, color = muga_type)) +
geom_line(aes(group = muga_type), size = 1.5) +
scale_y_continuous(labels = percent_format(scale=1) ) +
geom_point(size = 2) +
geom_hline(
yintercept = def_cardiotoxicity_cutoff,
linetype = "dashed",
color = "black",
size = 0.5
) +
facet_wrap("full_wrap_name", scales = 'free_x') +
labs(
y="LVEF",
x=""
) +
theme_pubr()
I was thinking how to look at trends and came up with an idea to do plots for each patient with >=2 imaging visits (n=87):
END <- ALL %>%
group_by(study_id, n()) %>%
summarise(n(), study_id, visit_number, MIM, JS, full_wrap_name) %>%
filter(`n()` > 1) %>%
ungroup()
END <- pivot_longer(
END,
cols = c(MIM, JS),
names_to = "muga_type",
values_to = "ejection_fraction"
)
f_add()
figure <-
ggplot(END,
aes(x = visit_number, y = ejection_fraction, color = muga_type)) +
geom_line(aes(group = muga_type)) +
geom_point() +
geom_hline(
yintercept = def_cardiotoxicity_cutoff,
linetype = "dashed",
color = "black",
size = 0.5
) +
scale_y_continuous(breaks = c(20, 50, 80)
,
labels = scales::percent_format(scale = 1)) +
scale_x_discrete(guide = guide_axis(n.dodge = 1.5))
figure +
labs(
title = glue(
"Automated ejection fraction in MUGA study, n={n_patients_total_follow}"
),
subtitle = glue("by MUGA test type and visit number, dashed line represents Ejection Fraction = {def_cardiotoxicity_cutoff}%")
# caption = glue("Figure {f_num}")
) +
facet_wrap("full_wrap_name", scales = 'free_x', ncol = n_wrap_cols) +
theme_fivethirtyeight() +
theme(
legend.position = "top",
axis.text = element_text(size = wrap_font_size),
strip.text.x = element_text(size = wrap_font_size)
)
The problem is in patients whose lines are not parallel, because different MUGA softwares could lead to different EFs and different therapeutic decisions - to stop or not to stop chemo.
We could identify patients where the lines are crossing and see what are the characteristics of these patients. Some of them are very parallel and some cross over a lot.
Eventually I came up with simplest - I think - idea to categorize patients into those that have cardiotoxicity and that that do not. According to one definition (Schwartz et al. 1987):
** Decline in LVEF > 10% to final LVEF < 50% **
So I decided to see how many of our patients that have >=2 visits fulfill this criteria of cardiotoxicity in both MUGA softwares, or in various combinations of MUGA softwares. Here is the table:
t_add()
tribble(
~ modality,
~ "N patients with cardiotxitity",
~ "% patients with cardiotoxicity",
"MIM",
n_cardiot_MIM,
cardiot_MIM_perc,
"JS",
n_cardiot_JS,
cardiot_JS_perc,
"Any software crossover",
n_cardiotoxicity_any_crossover,
cardiotoxicity_any_crossover_perc
) %>%
gt(
rowname_col = "aaaaaaaaaaaaaaaaa",
) %>% fmt_number(
columns = vars("% patients with cardiotoxicity"),
decimals = 1
) %>%
tab_header("Table 2, Cardiotoxicity ") %>%
gt_theme_538()
Table 2, Cardiotoxicity | ||
---|---|---|
modality | N patients with cardiotxitity | % patients with cardiotoxicity |
MIM | 23 | 26.4 |
JS | 10 | 11.5 |
Any software crossover | 38 | 43.7 |
First 2 rows are straightforward. They show how many patients would fulfill the cardiotoxicity definition if we followed them only through 1 MUGA modality, such are MIM in 1st row and JS in 2nd row. It is as if we followed line of only 1 color in Figure 5. We see that there is many more patients with cardiotoxicity in MIM VS JS (28.7% VS 10.3%). That is a little bit surprising to me because we can see on Figure 3 that there is bigger variability in JS so all thongs being equal there should be more decline in EF. However if might be more difficult for JETSTRAM to cross EF=50 threshold as it starts from higher lever, which could explain it.
Next 2 rows show numbers and percentages of patients that fulfill cardiotoxicity definition if we change MUGA modalities over time. Row 3 is MIM -> JS and row 4 the oposite change. I calculated it by checking the highest and lowest EF of any visit for each patient for each modality, and later calculating difference taking highest MIM and lowest JS in row 3 and highest JS and lowest MIM in row 4. If this difference is >10% and at the same time lower EF is < 50 (per cardiotoxicity definition decline in LVEF of at least 10% to an LVEF of less than 50). The results are very interesting but not very suprising. The percentage of patients with cardiotoxicity is much higher if we go JS -> MIM than the opposite way (43.7% vs 4.6%). This is explained by generally higher JS EF’s so if we start with it we would have much bigger drop if consecutive modality is MIM.
5th row shows how many pts fulfull cardiotoxicity definition if we can go either way so MIM -> JS or JS -> MIM, but it turns out to be the same as row 4, JS -> MIM, because it already covers all patients fulfilling cardiotoxicity definition (MIM->JS does not add any more).
I think that this could be the main point of our paper. The cardiotoxicity definition is elegant, we already cite it in our manuscript so I think it could be the best way of highlighting the problem. The variability of patients fulfilling the cardiotoxicity definition between modalities is shocking, as low as 4.6% with MIM->JS modality and as high as 43.7% with JESTREAM->MIM. I think it could be a very good selling point. We can also show p-values between groups:
t_add()
p_pure <-
prop.test(
c(n_cardiot_JS, n_cardiot_MIM),
c(n_patients_total_follow, n_patients_total_follow)
)
p_crossover1 <-
prop.test(
c(n_cardiot_MIM, n_cardiotoxicity_any_crossover),
c(n_patients_total_follow, n_patients_total_follow)
)
p_crossover2 <-
prop.test(
c(n_cardiot_JS, n_cardiotoxicity_any_crossover),
c(n_patients_total_follow, n_patients_total_follow)
)
n_reclassified_pure <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(muga_reclassified) %>% sum()
n_reclassified_cross_mim <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(muga_reclassified_crossover_MIM) %>% sum()
n_reclassified_cross_js <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(muga_reclassified_crossover_JS) %>% sum()
tribble(
~ "Software 1",
~ "Software 2",
~ "Number reclassified",
~ "p-value",
~ "CI",
glue("MIM = {n_cardiot_MIM}"),glue("JS = {n_cardiot_JS}"),
n_reclassified_pure,
round(p_pure$p.value, 6),
round(p_pure$conf.int, 2),
glue("MIM = {n_cardiot_MIM}"),
glue("Any software crossover = {n_cardiotoxicity_any_crossover}"),
n_reclassified_cross_mim,
round(p_crossover1$p.value, 6),
round(p_crossover1$conf.int, 2),
glue("JS = {n_cardiot_JS}"),
glue("Any software crossover = {n_cardiotoxicity_any_crossover}"),
n_reclassified_cross_js,
round(p_crossover2$p.value, 6),
# 909090,
round(p_crossover2$conf.int, 2)
) %>%
gt() %>%
fmt_number(
columns = vars("p-value"),
decimals = 3,
# n_sigfig=1,
drop_trailing_dec_mark=FALSE
) %>%
tab_header("Table 3, Comparison between softwares ") %>%
gt_theme_538()
Table 3, Comparison between softwares | ||||
---|---|---|---|---|
Software 1 | Software 2 | Number reclassified | p-value | CI |
MIM = 23 | JS = 10 | 15 | 0.020 | -0.28, -0.02 |
MIM = 23 | Any software crossover = 38 | 15 | 0.026 | -0.32, -0.02 |
JS = 10 | Any software crossover = 38 | 28 | 0.000 | -0.46, -0.19 |
# mutate(
# nr = row_number(),
# p = case_when(
# p < 0.001 ~ "<0.001",
# TRUE ~ form(p, 3)
# ),
# form <- function(x, nsmall = 2)
# format(x, digits = 0, nsmall = nsmall, scientific = FALSE)
# CI musi sie budowac na roznicy miedzu proporcjami
# wywalic CI
#recznie zmutowac na caracter
# kbl(caption = glue("Table 2, Comparison between softwares")) %>%
# kable_classic(full_width = F, html_font = "Cambria")
\(\color{red}{\text{IT DOES NOT ADD UP (MIM AND JS) I AM FIXING IT}}\)
In row 1 and 3 there is a clear difference in the number of cardiotoxicity diagnoses when using different softwares / software cross-overs, as evidenced by p<0.05. I used Test of Equal or Given Proportions for that.
Below I show Figure 6 with added red rectangle to indicate patients with cardiotoxicity defined by MUGA_any_software_crossover (most inclusive definition).
rect_list <- ALL_DIST %>%
filter(cardiotoxicity_algorithm_crossover==1) %>%
pull(study_id)
END <- ALL %>%
filter(n_total_visits>1) %>%
summarise(n(),study_id,visit_number,MIM,JS)
END <- pivot_longer(END,cols = c(MIM,JS),
names_to = "muga_type",
values_to = "ejection_fraction")
END$visit_number <- as.integer(END$visit_number)
END$visit_number <- as.factor(END$visit_number)
# END$study_id <- as.integer(END$study_id)
figure+
labs(
title =glue("Automated ejection fraction in MUGA study, n={n_patients_total_follow}"),
subtitle = glue("by MUGA test type and visit number, dashed line represents EF={def_cardiotoxicity_cutoff}%\nRed rectangle indicates patients with cardiotoxicity (MUGA_any_software_crossover)
")
# , caption = "Figure 6"
) +
geom_rect(data = subset(END, study_id %in% rect_list),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf, size=1.5) +
# geom_rect(data = subset(END, study_id %in% rect__blue_list),
# fill = NA, colour = "blue", alpha=0.3, xmin = -Inf,xmax = Inf,
# ymin = -Inf,ymax = Inf, size=0.3) +
facet_wrap("study_id", scales = 'free_x', ncol = n_wrap_cols) +
theme_fivethirtyeight() +
theme(legend.position = "top")
theme_set(theme_pubr())
I think that additionally showing something like Figure 5 or 6 would could help us persuade readers that there is a lot of software crossover and variability in modalities and will be helpful to in pushing out point.
I also created a graph based on Fig 2 in publication (Moey, Liles, and Carabello 2019) It shows aggregate percentage decrease from baseline by month from baseline.
n_cartiotox_MIM_yes <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% filter(cardiotoxicity_algorithm_MIM==1) %>% nrow()
n_cartiotox_MIM_no <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% filter(cardiotoxicity_algorithm_MIM==0) %>% nrow()
n_cartiotox_JS_yes <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% filter(cardiotoxicity_algorithm_JS==1) %>% nrow()
n_cartiotox_JS_no <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% filter(cardiotoxicity_algorithm_JS==0) %>% nrow()
ALL %>%
filter(n_total_visits>1) %>%
distinct(study_id, .keep_all = TRUE) %>%
count(cardiotoxicity_algorithm_MIM, cardiotoxicity_algorithm_JS, muga_reclassified, name = "freq") %>%
mutate(muga_reclassified=as.factor(muga_reclassified)) %>%
mutate(cardiotoxicity_algorithm_MIM=as.factor(cardiotoxicity_algorithm_MIM)) %>%
mutate(muga_reclassified = as.factor(muga_reclassified)) %>%
mutate(cardiotoxicity_algorithm_MIM = recode_factor(cardiotoxicity_algorithm_MIM,`0` = glue("MIM no \n cardiotoxicity \n n={n_cartiotox_MIM_no}"), `1` = glue("MIM \n cardiotoxicity \n n={n_cartiotox_MIM_yes}") )) %>%
mutate(cardiotoxicity_algorithm_JS = recode_factor(cardiotoxicity_algorithm_JS,`0` = glue("JS no \n cardiotoxicity \n n={n_cartiotox_JS_no}"), `1` = glue("JS \n cardiotoxicity \n n={n_cartiotox_JS_yes}"))) %>%
mutate(gr_color=c("A","A", "B", "B")) %>%
ggplot(
aes(y = freq,
# fill=gr_color,
axis1 = cardiotoxicity_algorithm_MIM, axis2 = cardiotoxicity_algorithm_JS
)) +
geom_stratum(width = 1/5,
fill = "grey",
color = "black"
) +
geom_alluvium(aes(
# fill = "pink", colow="yellow"
), width = 1/5) +
# geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(limits = c("cardiotoxicity_algorithm_MIM", "cardiotoxicity_algorithm_JS"))+
geom_text(aes(label = cardiotoxicity_algorithm_MIM), stat = "stratum") +
geom_text(aes(label = cardiotoxicity_algorithm_JS), stat = "stratum")+
annotate("text", x =1.5, y=18, label = "reclassified") +
annotate("text", x =1.5, y=15, label = "(15/87)") +
theme_void() +
theme(legend.position="top") +
theme(legend.position = "none")
# data(vaccinations)
# vaccinations2 <- vaccinations %>%
# filter(survey != "ms460_NSA", response %in% c("Always", "Never")) %>%
# select(-start_date, -end_date) %>%
# filter(subject %in% c(1, 6, 17, 21))
# vaccinations2
# ggplot(vaccinations2, aes(x = survey, stratum = response, alluvium = subject,
# weight = freq, fill = response, label = response)) +
# geom_flow() +
# geom_stratum(alpha = 0.5) +
# geom_text(stat = "stratum", size = 3) +
# theme(legend.position = "none") +
# ggtitle("vaccination survey responses at three points in time")
ALL %>%
filter(n_total_visits>1) %>%
distinct(study_id, .keep_all = TRUE) %>%
count(cardiotoxicity_algorithm_MIM, cardiotoxicity_algorithm_JS, muga_reclassified, name = "freq") %>%
mutate(muga_reclassified=as.factor(muga_reclassified)) %>%
mutate(cardiotoxicity_algorithm_MIM=as.factor(cardiotoxicity_algorithm_MIM)) %>%
mutate(muga_reclassified = as.factor(muga_reclassified)) %>%
mutate(cardiotoxicity_algorithm_JS=as.factor(cardiotoxicity_algorithm_JS )) %>%
pivot_longer(1:2) %>%
mutate(figure_column=rep(c("MIM","JS"),4)) %>%
mutate(id=rep(c(1:4),each=2)) %>%
ggplot(aes(x = name, stratum = value, alluvium = id,
y = freq, fill = value, label = value)) +
geom_flow() +
geom_stratum(alpha = 0.5) +
geom_text(stat = "stratum", size = 3) +
theme(legend.position = "none") +
ggtitle("Colored flowy to fix")
# theme_void()
# mutate(cardiotoxicity_algorithm_MIM = recode_factor(cardiotoxicity_algorithm_MIM,`0` = glue("MIM no \n cardiotoxicity \n n={n_cartiotox_MIM_no}"), `1` = glue("MIM \n cardiotoxicity \n n={n_cartiotox_MIM_yes}") )) %>%
# mutate(cardiotoxicity_algorithm_JS = recode_factor(cardiotoxicity_algorithm_JS,`0` = glue("JS no \n cardiotoxicity \n n={n_cartiotox_JS_no}"), `1` = glue("JS \n cardiotoxicity \n n={n_cartiotox_JS_yes}"))) %>%
# mutate(gr_color=c("A","A", "B", "B")) %>%
# ggplot(
# aes(y = freq,
# x=figure_column
# # fill=gr_color,
# # axis1 = cardiotoxicity_algorithm_MIM, axis2 = cardiotoxicity_algorithm_JS
# )) +
# geom_stratum(width = 1/5,
# fill = "grey",
# color = "black"
# ) +
# geom_alluvium(aes(
# # fill = "pink", colow="yellow"
# ), width = 1/5) +
# # geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
# # scale_x_discrete(limits = c("cardiotoxicity_algorithm_MIM", "cardiotoxicity_algorithm_JS"))+
# # geom_text(aes(label = cardiotoxicity_algorithm_MIM), stat = "stratum") +
# # geom_text(aes(label = cardiotoxicity_algorithm_JS), stat = "stratum")+
# # annotate("text", x =1.5, y=18, label = "reclassified") +
# # annotate("text", x =1.5, y=15, label = "(15/87)") +
# theme_void() +
# theme(legend.position="top") +
# theme(legend.position = "none")
Some more afterthoughts / questions:
We can suggest correction of automated EF while switching modalities, for example to substract 10% from EF when going from JS to MIM. I am going to do Bland Altman plot next.
Suggested figure with a diagram simillar to (Huang et al. 2017)
Now its getting serious because we are getting closer to finishing and I want to share some of my data handling responsibility with others, so if we go down, we go down together :D
Difference between scanners oscillate around 10%
ALL %>%
# drop_na(heart_rate_1) %>%
ggplot() +
aes(x=heart_rate_1, y=difference_between_modalities)+
geom_point() +
geom_smooth(method="lm") +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.001) +
labs(title=paste("n =",nrow(ALL))) +
geom_hline(
yintercept = 10,
linetype = "dashed",
color = "black",
size = 0.5)
Very small correlation, non-significant
ALL %>%
# drop_na(heart_rate_1) %>%
ggplot() +
aes(x=bmi_combined, y=difference_between_modalities)+
geom_point() +
geom_smooth(method="lm") +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.001) +
labs(title=paste("n =",nrow(ALL))) +
geom_hline(
yintercept = 10,
linetype = "dashed",
color = "black",
size = 0.5)
Moderate, significant correlation
library("gtsummary")
theme_gtsummary_compact()
var_label(ALL) <- list(
age_years = "Age (years)",
gender = "Gender",
race = "Race",
bmi_combined = "Body mass index (kg/m2)",
bsa = "Body surface area (m2)",
ethnicity = "Ethnicity",
hypertension = "Hypertension",
hyperlipidemia = "Hyperlipidemia",
coronary_artery_disease = "Coronary artery disease",
diabetes_mellitus = "Diabetes mellitus",
atrial_fibrillation = "Atrial fibrillation",
peripheral_vascular_disease = "Peripherial vascular disease",
chronic_kidney_disease = "Chronic kidney disease",
stroke_or_tia = "Stroke or TIA",
type_of_cancer = "Type of Cancer",
scanner = "Scanner",
number_of_frames = "Number of frames",
MIM = "MIM LVEF (%)",
JS = "JS LVEF (%)",
ed_volume_1 = "ED volume",
es_volume_1 = "ES volume",
heart_rate_1 = "Heart rate during MUGA (beats/min.)",
beats_accepted = "Beats accepted",
beats_rejected = "Beats rejected",
difference_betweem_modialieits_more_eq_10 = "Difference in MUGA software >=10%",
black = "Black race",
n_total_visits = "Total number of visits",
cad_combined_of_4 = "Coronary artery disease",
pulm_disease_combined_of_2 = "Pulmonary disease",
tobacco_use = "Tobacco use",
high_r_r_value = "High R-R value",
low_r_r_value = "Low R-R value",
ed_time = "ED time",
es_time = "ES time",
frame_time = "Frame time",
heart_rate = "Heart rate during vitals",
dose_mci = "Radioisotope dose (mCi)",
anthracycyclines = "Anthracyclines",
trastuzumab = "Trastuzumab",
alkylating_agents = "Alkylating agents",
antimicrotubule_agents = "Antimicrotubule agents",
antimetabolites = "Antimetabolites",
immuno_checkpoint_inhibitors = "Immunologic checkpoint inhibitors",
other = "Other chemotherapy"
)
tbl_demo_all_vars <-
c(
"age_years",
"gender",
"race",
"ethnicity",
"bmi_combined",
"bsa",
"hypertension",
"diabetes_mellitus",
"hyperlipidemia",
"cad_combined_of_4",
"stroke_or_tia",
"atrial_fibrillation",
"chronic_kidney_disease",
"pulm_disease_combined_of_2",
"tobacco_use",
"type_of_cancer",
"anthracycyclines",
"trastuzumab",
"alkylating_agents",
"antimicrotubule_agents",
"antimetabolites",
"immuno_checkpoint_inhibitors",
"other"
)
# ALL %>%
# # fill(age_years) %>%
# mutate(difference_betweem_modialieits_more_eq_10 = recode_factor(difference_betweem_modialieits_more_eq_10,`0` = "< 10%", `1` = ">= 10%")) %>%
# # filter(n_total_visits>1) %>%
# distinct(study_id, .keep_all = TRUE) %>%
# # select(variables_tables) %>%
# tbl_summary(
# include = tbl_demo_all_vars,
# by = difference_betweem_modialieits_more_eq_10,
# sort=list(everything() ~ "frequency"), missing="no",
# value = list(everything() ~ 1)
# ) %>%
# add_p() %>%
# add_overall() %>%
# modify_spanning_header(c("stat_1", "stat_2") ~ "**Difference in LVEF between MUGA modalities**") %>%
# bold_p()
# # add_stat_label()
ALL %>%
# fill(age_years) %>%
# mutate(difference_betweem_modialieits_more_eq_10 = recode_factor(difference_betweem_modialieits_more_eq_10,`0` = "< 10%", `1` = ">= 10%")) %>%
# filter(n_total_visits>1) %>%
distinct(study_id, .keep_all = TRUE) %>%
tbl_summary(
include = tbl_demo_all_vars
,
# by = difference_betweem_modialieits_more_eq_10,
sort=list(everything() ~ "frequency"), missing="no",
value = list(everything() ~ 1)
)
Characteristic | N = 3101 |
---|---|
Age (years) | 54 (44, 63) |
Gender | |
Female | 221 (71%) |
Male | 89 (29%) |
Race | |
Other Race | 148 (48%) |
Black or African American | 126 (41%) |
White | 26 (8.4%) |
Asian | 10 (3.2%) |
Ethnicity | |
NOT Hispanic or Latino | 181 (58%) |
Hispanic or Latino | 127 (41%) |
Unknown / Not Reported | 2 (0.6%) |
Body mass index (kg/m2) | 28 (24, 32) |
Body surface area (m2) | 1.80 (1.60, 1.92) |
Hypertension | 134 (44%) |
Diabetes mellitus | 56 (18%) |
Hyperlipidemia | 52 (17%) |
Coronary artery disease | 22 (7.1%) |
Stroke or TIA | 4 (1.3%) |
Atrial fibrillation | 8 (2.6%) |
Chronic kidney disease | 8 (2.6%) |
Pulmonary disease | 33 (11%) |
Tobacco use | |
Never | 180 (58%) |
Former | 89 (29%) |
Current | 39 (13%) |
Type of Cancer | |
Breast | 142 (46%) |
Lymphoma | 82 (27%) |
Sarcoma | 25 (8.1%) |
Gastric | 16 (5.2%) |
Other | 11 (3.6%) |
Ovarian | 9 (2.9%) |
Uterine | 7 (2.3%) |
Bone | 6 (1.9%) |
Multiple myeloma | 6 (1.9%) |
Lung | 4 (1.3%) |
Anthracyclines | 198 (64%) |
Trastuzumab | 64 (21%) |
Alkylating agents | 134 (43%) |
Antimicrotubule agents | 138 (45%) |
Antimetabolites | 33 (11%) |
Immunologic checkpoint inhibitors | 2 (0.6%) |
Other chemotherapy | 182 (63%) |
1
Median (IQR); n (%)
|
# add_p() %>%
# add_overall() %>%
# modify_spanning_header(c("stat_1", "stat_2") ~ "**Difference in LVEF between MUGA modalities**") %>%
# bold_p()
# add_stat_label()
tbl_tach_all_vars <- c(
"difference_betweem_modialieits_more_eq_10",
"heart_rate_1",
"bmi_combined",
"scanner",
"number_of_frames",
"dose_mci",
"MIM",
"JS"
)
theme_gtsummary_compact()
ALL %>%
# fill(age_years) %>%
mutate(difference_betweem_modialieits_more_eq_10 = recode_factor(difference_betweem_modialieits_more_eq_10,`0` = "< 10%", `1` = "≥ 10%")) %>%
# filter(n_total_visits>1) %>%
# select(variables_tables) %>%
tbl_summary(
include = tbl_tach_all_vars,
by = difference_betweem_modialieits_more_eq_10,
sort=list(everything() ~ "frequency"), missing="no"
) %>%
add_p() %>%
add_overall() %>%
modify_spanning_header(c("stat_1", "stat_2") ~ "**Difference in LVEF between MUGA modalities**") %>%
bold_p()
Characteristic | Overall, N = 5821 | Difference in LVEF between MUGA modalities | p-value2 | |
---|---|---|---|---|
< 10%, N = 2701 | = 10%, N = 3121 | |||
Heart rate during MUGA (beats/min.) | 76 (67, 87) | 73 (65, 87) | 77 (69, 86) | 0.029 |
Body mass index (kg/m2) | 28 (24, 32) | 28 (24, 32) | 29 (25, 34) | 0.006 |
Scanner | 0.12 | |||
Forte | 365 (63%) | 158 (59%) | 207 (67%) | |
Brightview | 132 (23%) | 67 (25%) | 65 (21%) | |
SKYLight | 80 (14%) | 43 (16%) | 37 (12%) | |
Number of frames | 0.046 | |||
16 | 346 (60%) | 149 (56%) | 197 (64%) | |
32 | 231 (40%) | 119 (44%) | 112 (36%) | |
Radioisotope dose (mCi) | 21.30 (20.80, 21.80) | 21.30 (20.90, 21.80) | 21.30 (20.80, 21.80) | 0.4 |
MIM LVEF (%) | 56 (49, 61) | 57 (51, 64) | 54 (48, 59) | <0.001 |
JS LVEF (%) | 66 (59, 72) | 63 (57, 70) | 67 (61, 73) | <0.001 |
1
Median (IQR); n (%)
2
Wilcoxon rank sum test; Pearson's Chi-squared test
|
# add_stat_label()
theme_gtsummary_compact()
# ALL %>%
# # fill(age_years) %>%
# mutate(muga_reclassified_crossover_MIM = recode_factor(muga_reclassified_crossover_MIM,`0` = "No reclassification", `1` = "Reclassification")) %>%
# filter(n_total_visits>1) %>%
# distinct(study_id, .keep_all = TRUE) %>%
# select(variables_tables) %>%
# tbl_summary(
# by = muga_reclassified_crossover_MIM,
# label = age_years ~ "Age (years)",
# missing="no",
# value = list(everything() ~ 1)
# ) %>%
# add_p() %>%
# add_overall() %>%
# modify_spanning_header(c("stat_1", "stat_2") ~ "**Reclassification between software crossover and MIM**") %>%
# bold_p()
a <- lm(
difference_between_modalities ~ number_of_frames + bmi.x + heart_rate_1
+ ed_volume_1 + beats_accepted + gender + age_years
,
data = ALL
) %>%
tbl_regression()
ALL$age_cut <-cut(ALL$age_years, c(0,65,100))
ALL$bmi_cut <-cut(ALL$bmi_combined, c(0,20,25,30,35,100))
ALL$heart_cut <- cut(ALL$heart_rate_1, c(0,75,300))
var_label(ALL) <- list(
age_cut = "Age (years)",
bmi_cut = "BMI",
heart_cut = "HR")
xx <- ALL %>%
filter(n_total_visits>1)
theme_gtsummary_compact()
# ALL$black
# ALL %>%
# distinct(study_id, .keep_all = TRUE) %>%
# count(race,ethnicity)
# hist(ALL$age_years)
# cut()
# ALL %>%
# count(bmi_cut)
glm(
muga_reclassified ~
# race +
# black +\
age_years +
# age_cut +
gender +
# bmi_combined +
bmi_cut +
heart_rate_1 +
# heart_cut +
# + beats_rejected +
scanner +
# es_volume_1 +
number_of_frames,
data = xx,
family = "binomial"
) %>%
# tab_model()
tbl_regression(exponentiate=TRUE)%>% bold_p() %>%
add_glance_source_note(
include = c(nobs)
)
Characteristic | OR1 | 95% CI1 | p-value |
---|---|---|---|
Age (years) | 0.95 | 0.93, 0.97 | <0.001 |
Gender | |||
Female | — | — | |
Male | 0.62 | 0.08, 2.84 | 0.6 |
BMI | |||
(0,20] | — | — | |
(20,25] | 0.08 | 0.01, 0.37 | 0.003 |
(25,30] | 0.80 | 0.34, 1.96 | 0.6 |
(30,35] | 1.27 | 0.49, 3.38 | 0.6 |
(35,100] | 2.57 | 1.08, 6.46 | 0.037 |
Heart rate during MUGA (beats/min.) | 0.98 | 0.96, 1.00 | 0.054 |
Scanner | |||
Brightview | — | — | |
Forte | 0.19 | 0.01, 1.12 | 0.13 |
SKYLight | 0.61 | 0.23, 1.53 | 0.3 |
Number of frames | |||
16 | — | — | |
32 | 0.31 | 0.02, 1.77 | 0.3 |
nobs = 356 | |||
1
OR = Odds Ratio, CI = Confidence Interval
|
# glance(a)
# add_glance_table(
# # include = c(AIC)
# )
# add_n()
# hist(ALL$bmi_combined)
#
# explanatory = c(
# # "Age",
# "age_cut",
# "gender","bmi_cut","heart_cut","scanner","number_of_frames")
# dependent = "muga_reclassified"
#
# plot <- ALL %>%
# or_plot(dependent, explanatory, remove_ref=TRUE, table_text_size=4, title_text_size=14, dependent_label = "Multivariate analysis: odds of reclassification",
# breaks = c(0.1,0.5,1,5),
# # column_space=10,
# plot_opts=list(xlab("OR, 95% CI")
# ,theme(axis.title = element_text(size=10))) )
# xx %>%
# distinct(study_id, .keep_all = TRUE) %>%
# # group_by(study_id) %>%
# count(muga_reclassified)
A lot of these variables are used in regression models, NA need to be checked and fixed and strange values need to be collected.
Link to tables: https://docs.google.com/spreadsheets/d/1dUNGFJh649CwbixpODYFLA3Iki4r54tW1AuOjejEB0k/edit?usp=sharing