knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  fig.align = "center",
  dev = "png",
  cache = TRUE,
  error = FALSE,
  include = TRUE,
  message = FALSE)

library("labelled")
library("knitr")  
library("gdtools")
library("rlang")
library("htmltools")
library("kableExtra")
library("reactable")
library("ggthemes")
library("naniar")
library("BlandAltmanLeh")
library("reactable")
library("ggsignif")
library("sjPlot")
library("readxl")  
library("janitor")
library("skimr")
library("lubridate")
library("tableone")
library("finalfit")
library("scales")
library("ggsci")
library("glue")
library("blandr")
library("nortest")
library("CBCgrps")
library("gt")
library("here")
library("cowplot")
library("googlesheets4")
library("ggpubr")
library("htmltools")
# remotes::install_github("ddsjoberg/gtsummary")
# library("gtsummary")
# library("gtsummary")

library("gtsummary")

library("ggalluvial")
library("car")
library("broom")

library("labelled")
# remotes::install_github("jthomasmock/espnscrapeR")
library("espnscrapeR")


# install.packages("espnscrapeR")

library("tidyverse")

rm(list = ls())

n_wrap_cols <- 7
height_wrap <- 17
width_wrap <- 10
wrap_font_size <- 9

f_num <- 0
t_num <- 0

square_figure_size <- c(5, 5)

def_cardiotoxicity_decline <- 10

def_cardiotoxicity_cutoff <- 50

col_MIM <- "#00bfc4"
col_muga <- "#f8766d"

check_cross <- function(x, y) {
  k <- length(x)
  if ((min(x)>def_cardiotoxicity_cutoff & min(y)>def_cardiotoxicity_cutoff) | k==1)
    return(FALSE)
  diff <- numeric((k / 2) * (k - 1))
  z <- 0
  for (i in 1:(k - 1)) {
    for (j in (i + 1):k) {
      z <- z + 1
      diff[z] <- min(x[j] - x[i], y[j] - y[i], x[j] - y[i], y[j] - x[i])
    }
  }
  return(any(diff < (-def_cardiotoxicity_decline)))
} 

# which.min(x[j] - x[i], y[j] - y[i], x[j] - y[i], y[j] - x[i])


check_cross_update <- function(x, y) {
  k <- length(x)
  if (k==1)
    return(FALSE)
  diff <- numeric((k / 2) * (k - 1))
  z <- 0
  for (i in 1:(k - 1)) {
    for (j in (i + 1):k) {
      z <- z + 1
      v1 <- c(x[j], y[j], x[j], y[j])
      v2 <- c(x[i], y[i], y[i], x[i])
      v <- v1-v2
      diff[z] <- min(v)
      w <- which.min(v)
      if(v1[w]>def_cardiotoxicity_cutoff) diff[z] <- 0
    }
  }
  return(any(diff < (-def_cardiotoxicity_decline)))
} 

check_both <- function(x, y=NULL) {
  k <- length(x)
  if (k==1)
    return(FALSE)
  diff <- numeric((k / 2) * (k - 1))
  z <- 0
  for (i in 1:(k - 1)) {
    for (j in (i + 1):k) {
      z <- z + 1
      if (is.null(y)) {
        v1=x[j]
        v2=x[i]
      } else {
        v1 <- c(x[j], y[j], x[j], y[j])
        v2 <- c(x[i], y[i], y[i], x[i])
      }
      v <- v1-v2
      diff[z] <- min(v)
      w <- which.min(v)
      if(v1[w]>=def_cardiotoxicity_cutoff) diff[z] <- 0
    }
  }
  return(any(diff < (-def_cardiotoxicity_decline)))
} 

check <- function(x) {
  k <- length(x)

  if (min(x)>def_cardiotoxicity_cutoff | k == 1)
    return(FALSE)
  diff <- numeric((k / 2) * (k - 1))
  z <- 0
  for (i in 1:(k - 1)) {
    for (j in (i + 1):k) {
      z <- z + 1
      diff[z] <- x[j] - x[i]
    }
  }
  return(any(diff < (-def_cardiotoxicity_decline)))
} 

f_add <- function() {
  f_num <<- f_num + 1
}

t_add <- function() {
  t_num <<- t_num + 1
}

get_n_obs <- function() {
  return (ALL %>% nrow())
}

get_n_patients <- function() {
  return (ALL %>%  distinct(study_id) %>% nrow)
}

fix_table_header <- function(x) {
  # "Overall" column present?
  if ("stat_0" %in% colnames(x$table_body)) {  
    modify_header(
      x,
      stat_by = "**{level}**<br>N = {n}",
      stat_0 = "**Overall**<br>N = {n}"
    )
  } else {
    modify_header(
      x,
      stat_by = "**{level}**<br>N = {n}"
    )
  }
}

# source("loading_initial_files.R")

na_strings <- c("NA", "N A", "N / A", "N/A", "N/ A", "n/a","Not Available", "NOt available", "n.a", "n.a.","unknown","Unknown", "none", "")

load_file <- here("data","DefiningCancerTherap_DATA_LABELS_2021-04-05_1207.csv")

ALL <- load_file %>%
  read_csv(na = na_strings) %>%
  clean_names() %>%
  remove_empty()

n_obs_initial <- get_n_obs()
n_patients_initial <- get_n_patients()

ALL <- ALL %>%
  select(-contains("time_of_muga"),-contains("complete"),-contains("evaluator")) %>%
  select(
    -"first_name",
    -"last_name",
    -"zip_code",
    -"other_comments",
    -"date_echo_was_checked",
    -"other_comments",
    -"reason_for_exam_1", 
    -"date_visit_was_checked", 
    -"type_of_echocardiogram",-"echo_images_are_available_on_workstation",-"source_of_death_data_choice_other"
  ) %>%
  # select(
  #   -insurance,
  #   -baseline_data_available,
  #   -if_other_please_describe,
  #   -(
  #     anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin:total_number_of_pembrolizumab_doses
  #   )
  # ) %>%
  # #   select(-(coronary_artery_disease_choice_no:cancer_choice_unknown)) %>%
  # #   # select(-(cocaine_use:did_patient_have_a_visit_or_follow_up)) %>%
mutate_if(is.character, funs(str_replace_all(., "^Yes", "1"))) %>%
  mutate_if(is.character, funs(str_replace_all(., "^No", "0"))) %>%
  mutate_if(is.character, funs(str_replace_all(., "^Unchecked", "0"))) %>%
  mutate_if(is.character, funs(str_replace_all(., "^Checked", "1"))) %>%
  rename(
    MIM = automatically_calculated_ef,
    JS = automatically_calculated_ef_1,
    visit_number = event_name,
    MIM_date = date_of_muga_scan,
    JS_date = date_of_muga_scan_1
  ) %>% 
  mutate(dose_mci = parse_number(as.character(dose))) %>%
  mutate(
    full_wrap_name = str_c("Patient ", ALL$study_id),
    difference_in_ef = (JS - MIM),
    difference_more_5_prc = ifelse(difference_in_ef > 5, 1, 0),
    type_of_cancer = as.factor(type_of_cancer),
    abs_difference_in_ef = abs(difference_in_ef),
    visit_number = fct_relevel(visit_number, as.character(c(1:50))),
    study_id = fct_relevel(as.factor(study_id), as.character(c(1:1500))),
    did_patient_have_a_visit_or_follow_up = as.factor(did_patient_have_a_visit_or_follow_up),
    vitals_available = as.factor(vitals_available)
  ) %>% 
  mutate(full_wrap_name=as.factor(full_wrap_name)) %>% 
  mutate(full_wrap_name=fct_reorder(full_wrap_name, as.numeric(study_id))) %>% 
  
  mutate(difference_between_modalities=JS-MIM) %>% 
  mutate(difference_betweem_modialieits_more_eq_5=ifelse(difference_between_modalities>=5,1,0)) %>% 
  mutate(difference_betweem_modialieits_more_eq_10=ifelse(difference_between_modalities>=10,1,0)) %>% 
  mutate(difference_betweem_modialieits_more_eq_15=ifelse(difference_between_modalities>=15,1,0)) %>% 

  mutate(visit_number = recode(visit_number, "Death" = "3")) %>%
  mutate(type_of_cancer = recode(type_of_cancer, "Head and Neck" = "Other")) %>%
  mutate_at(vars(contains("date")), as_date) %>% 
  mutate_at(vars(gender,race:baseline_data_available), as.factor) %>%
  mutate_at(vars(cancer:type_of_cancer, day_of_the_week_performed_1), as.factor) %>%
  mutate_at(vars(anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin:other), as.factor) %>%
  mutate_at(vars(coronary_artery_disease_choice_no:ivda, number_of_frames), as.factor) %>%
  mutate_at(vars(event_type_choice_routine_follow_up:what_was_the_cause_of_hospitalization_choice_other), as.factor) %>% 
  mutate_at(vars(discharge_summary_in_epic, muga_scan_performed, day_of_the_week_performed, scanner), as.factor) %>% 
  mutate(heart_rate_1=as.numeric(heart_rate_1)) %>% 
  mutate_at(vars(x9_fascicular_block_choice_none:x9_fascicular_block_choice_left_posterior_fascicular_block, echo_performed, did_patient_die), as.factor) %>% 
  mutate_at(vars(what_was_the_cause_of_death_choice_cardiac_myocardial_infarction:source_of_death_data_choice_social_security_death_index, 
                 manual_correction, manual_correction_1, muga_scan_performed_1, difference_more_5_prc), as.factor) %>% 
  mutate_at(vars(contains("difference_betweem_modialieits")), as_factor) %>% 
  mutate(black = ifelse(race=="Black or African American",1,0)) %>% 
  mutate(docitaxel_abraxin_paclitaxel_combined_of_3 = ifelse((docetaxel_taxotere == 1 | abraxane_paclitaxel_albumin_stabilized_nanoparticle_formulation == 1),1,0)) %>% 
  mutate_at(vars(docitaxel_abraxin_paclitaxel_combined_of_3),as.factor) %>%
  relocate(MIM, .after = visit_number) %>%
  relocate(JS, .after = visit_number) %>%
  relocate(type_of_cancer, .after = cancer) %>%
  relocate(reason_for_exam, .after = cancer) %>%
  relocate(docitaxel_abraxin_paclitaxel_combined_of_3, .before = docetaxel_taxotere) 

ALL <- ALL %>% 
  mutate(
    coronary_artery_disease=replace_na(coronary_artery_disease, 0),
    prior_myocardial_infarction=replace_na(prior_myocardial_infarction, 0),
    prior_pci_balloon_angioplasty_or_stent=replace_na(prior_pci_balloon_angioplasty_or_stent, 0),
    coronary_artery_bypass_surgery=replace_na(coronary_artery_bypass_surgery, 0),
    asthma=replace_na(asthma, 0),
    copd=replace_na(copd, 0),
      ) %>% 
  mutate(cad_combined_of_4=ifelse((coronary_artery_disease == 1 | prior_myocardial_infarction == 1 | 
                                     prior_pci_balloon_angioplasty_or_stent == 1 | coronary_artery_bypass_surgery == 1),1,0)) %>% 
  mutate(pulm_disease_combined_of_2 = ifelse((asthma ==1 | copd == 1),1,0)) %>% 
  relocate(pulm_disease_combined_of_2, .before = asthma) %>%
  relocate(cad_combined_of_4, .before = coronary_artery_disease)

ALL <- ALL %>%  # remove non-cancer patients
  group_by(study_id) %>% 
  fill(cancer, type_of_cancer, mrn, gender, date_of_birth, race, ethnicity) %>% 
  mutate(cancer=replace_na(cancer,1)) %>% 
  filter(cancer != 0)

n_obs_no_cancer <- get_n_obs()
n_patients_no_cancer <- get_n_patients()

ALL <- ALL %>% # remove patents with no MUGA on follow up
  drop_na(JS_date)

n_obs_has_study_on_follow_up <- get_n_obs()
n_patients_has_study_on_follow_up <- get_n_patients()

ALL <- ALL %>% # remove patents with EF not loading on follow up
  drop_na(MIM)

n_obs_has_ef_study_on_follow_up <- get_n_obs()
n_patients_has_ef_study_on_follow_up <- get_n_patients()

ALL <- ALL %>% # patients after exclusions from here on 
  add_count(study_id, name="n_total_visits") %>% 
  relocate(n_total_visits, .after=visit_number)

ALL <- ALL %>%
  group_by(study_id) %>%
  mutate(cardiotoxicity_algorithm_MIM=check_both(MIM)) %>%
  mutate(cardiotoxicity_algorithm_JS=check_both(JS)) %>%
  mutate(cardiotoxicity_algorithm_crossover=check_both(JS, MIM)) %>% 
  ungroup() %>%

  mutate(muga_reclassified=ifelse(cardiotoxicity_algorithm_JS!=cardiotoxicity_algorithm_MIM,1,0)) %>%

  mutate(cardiotox_any_pure=ifelse((cardiotoxicity_algorithm_MIM==1 | cardiotoxicity_algorithm_JS==1),1,0)) %>% 
  mutate_at(vars(contains("algorithm")), as.numeric) 

ALL_DIST <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) 

n_obs_total_follow <- ALL %>% filter(n_total_visits>1) %>% nrow
n_patients_total_follow <- ALL_DIST %>% nrow

n_total <- n_patients_total_follow

# ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(cardiotoxicity_algorithm_MIM) %>% sum()
# ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(cardiotoxicity_algorithm_JS) %>% sum()
# ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(muga_reclassified) %>% sum()
# 
# ALL_DIST %>% 
#   select(cardiotoxicity_algorithm_MIM, cardiotoxicity_algorithm_JS, muga_reclassified) %>% write_csv("taa.csv")
  
  


sign_number_perc <- 3

#pure
n_cardiot_JS <- ALL_DIST %>% summarize(sum(cardiotoxicity_algorithm_JS)) %>% pull()
n_cardiot_MIM <- ALL_DIST %>% summarize(sum(cardiotoxicity_algorithm_MIM)) %>% pull()
n_cardiot_any <-ALL_DIST %>% summarize(sum(cardiotox_any_pure)) %>% pull()

cardiot_JS_perc <- round(n_cardiot_JS/n_total,3)*100
cardiot_MIM_perc <- round(n_cardiot_MIM/n_total,sign_number_perc)*100
cardiot_any_perc <- round(n_cardiot_any/n_total,sign_number_perc)*100

#crossover
n_cardiotoxicity_any_crossover <- sum(ALL_DIST$cardiotoxicity_algorithm_crossover)

cardiotoxicity_any_crossover_perc <- round(n_cardiotoxicity_any_crossover/n_total,sign_number_perc)*100

ALL <- ALL %>% mutate(muga_reclassified_crossover_MIM = ifelse(cardiotoxicity_algorithm_crossover != cardiotoxicity_algorithm_MIM,1,0))

ALL <- ALL %>% mutate(muga_reclassified_crossover_JS = ifelse(cardiotoxicity_algorithm_crossover != cardiotoxicity_algorithm_JS,1,0))

ALL_DIST <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE)

f_add()

ALL <- ALL %>% 
  mutate(age_years = time_length(difftime(JS_date,date_of_birth), "year"))

BMI <- read_csv(here("data", "bmi.csv")) %>% 
  mutate(study_id=as.factor(study_id)) %>% 
  mutate(visit_number=as.factor(visit_number))

ALL <- ALL %>% 
  left_join(BMI, by=c("study_id","visit_number"))

ALL <- ALL %>% 
 # mutate(bmi_combined = ifelse(is_na(bmi.x),bmi,bmi.y)) %>% 
 # pull(bmi_combined)
  mutate(
    bmi_combined = case_when(
      is.na(bmi.x) ~ bmi.y,
      !is.na(bmi.x) ~ bmi.x
    )
  )

# ALL %>%
#    mutate(across(where(c(anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin,herceptin_trastuzumab)), ~na_if(., "0")))
# # 
# # ALL %>% view()
# ALL <- ALL %>% 
# mutate(across((anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin:herceptin_trastuzumab), ~replace_na(.x, 0)))

# ALL$anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin
ALL <- ALL %>% 
  mutate(
    across(anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin:lynparza_olaparib, ~replace_na(.x, 0)) 
  )

# Or if you're pipe shy:

ALL <- ALL %>%
  mutate(anthracycyclines = anthracycline_or_equivalent_such_as_daunorubicin_doxorubicin_epirubicin_idarubicin) %>%
  mutate(trastuzumab = herceptin_trastuzumab) %>%
  mutate(alkylating_agents = cyclophosphamide) %>% 
  mutate(antimicrotubule_agents = ifelse(docetaxel_taxotere == 1 | paclitaxel_taxol == 1 | abraxane_paclitaxel_albumin_stabilized_nanoparticle_formulation ==1, 1,0)) %>% 
  mutate(antimetabolites = ifelse(tamoxifen_nolvadex_or_soltamox== 1 | methotrexate==1 | cytarabine==1 |x5_fu_fluorouracil_injection==1 | cladribine==1,1,0)) %>% 
  mutate(immuno_checkpoint_inhibitors =ifelse(pembrolizumab==1 | nivolumab==1 | ipilimumab==1 | atezolizumab==1 | avelumab==1 | durvalumab==1,1,0)) %>% 
  mutate(other = ifelse(etoposide ==1 | other ==1,1,0))


#   mutate(ankylating_agents = ifelse(cisplatin ==1 | carboplatin == 1))
# ALL$abraxane_paclitaxel_albumin_stabilized_nanoparticle_formulation
# tabyl(ALL$etoposide)
# ALL$other

# ALL %>% view()
# 
# ALL %>% view()

Cardiotoxicity set at EF=50 %

Patient flow (Figure 1)

Initially we start with 719 Epic visits. Table and flowchart below summarizes patient numbers:

Table with flow

t_add()
theme_gtsummary_compact()

tribble(
  ~ Class,
  ~ Initial,
  ~ "N Cancer",
  ~ "N Follow up",
  ~ "N follow up MUGA EF laded",
  ~ "N in patients >=2 MUGAs",
  "Number of obs.",
  n_obs_initial,
  n_obs_no_cancer,
  n_obs_has_study_on_follow_up,
  n_obs_has_ef_study_on_follow_up,
  n_obs_total_follow,
  "Number of patients",
  n_patients_initial,
  n_patients_no_cancer,
  n_patients_has_study_on_follow_up,
  n_patients_has_ef_study_on_follow_up,
  n_patients_total_follow
) %>%
    gt(
    rowname_col = "aaaaaaaaaaaaaaaaa",
  ) %>% 
  gt_theme_538()

Class	Initial	N Cancer	N Follow up	N follow up MUGA EF laded	N in patients >=2 MUGAs
Number of obs.	719	687	591	582	359
Number of patients	322	312	312	310	87

  # fmt_number(
  #   columns = vars(perc_with_cardiotoxicity),
  #   decimals = 1
  # ) %>% 
  # tab_header("Table 1, Patient numbers") %>% 
  #   cols_label(
  #   country_name = "Name",
  #   year = "Year",
  #   population = "Population"
  # )
  # 
  # kbl(caption = glue("Table {t_num}, Patient numbers")) %>%
  # kable_classic(full_width = F, html_font = "Cambria")

# read PNG i wyswietl go potem, zamiast bezposrednio go ladowac tutaj (nizej)

#stworz funkcje ktora tworzy wykres i ma f_add
# ggsave("plot2.png", scale = 1, dpi=600)

Flowchart

Figure 1. Flowchart of the study

Comparison of MUGA software (Figure 2)

Scatterplot and Bland Altmann plot

Here is a plot with correlation of MUGA software and comparison between them:

Red dashed line is MIM EF = Jetstream EF. We see out best fit line is shifted to the left in relation to red dashed line, meaning that Jetstream EF is on average higher than MIM. Pretty good correlation with R = 0.876, with few ourliers, which we would need to investigate more. However, correlation studies show the relationship between one variable and another, not the differences, and are not recommended as a method for assessing the comparability between methods. (Giavarina 2015) (Bland and Altman 1986)

f_add()

theme_set(theme_pubr())

scatter <- ALL %>% 
  ggplot +
  aes(MIM,JS) +
  geom_point(
    size = 2,
    alpha = 0.5,
    shape = 20,
    # add = "reg.l
  ) +
  stat_cor(p.accuracy = 0.001, r.accuracy = 0.001)+
labs(
    # title="Correlation plot between - MUGA modalities",
    caption = " ",
    y = "MUGA LVEF - JS",
    x = "MUGA LVEF - MIM "
  ) +
    scale_y_continuous(
    breaks = breaks_width(20),
    limits = c(15, 90),
    labels = scales::percent_format(accuracy = 1, scale = 1)
  ) +
  scale_x_continuous(
    breaks = breaks_width(20),
    limits = c(15, 90),
    labels = scales::percent_format(accuracy = 1, scale = 1)
  ) + 
    geom_abline(
    slope = 1,
    intercept = 0,
    colour = "red",
    linetype = 2,
    size = 0.75
  ) +
  stat_smooth(
    size = 1,
    color = "black",
    alpha = 0.5,
    method = "lm"
  )

bland_stats <- blandr.statistics (ALL$JS, ALL$MIM, sig.level = 0.95)

# summary(bland_stats)
text_up <- glue(
  "+1.96 SD, mean = {mid} (95%CI {low}%-{up}%)",
  up = round(bland_stats$upperLOA_upperCI, 1),
  mid = round(bland_stats$upperLOA, 1),
  low = round(bland_stats$upperLOA_lowerCI, 1)
)
text_down <- glue(
  "+1.96 SD, mean = {mid} (95%CI {low}%-{up}%)",
  up = round(bland_stats$lowerLOA_upperCI, 1),
  mid = round(bland_stats$lowerLOA, 1),
  low = round(bland_stats$lowerLOA_lowerCI, 1)
)
text_me <- glue(
  "+1.96 SD, mean = {mid} (95%CI {low}%-{up}%)",
  up = round(bland_stats$biasUpperCI, 1),
  mid = round(bland_stats$bias, 1),
  low = round(bland_stats$biasLowerCI, 1)
)

bland <-
  blandr.draw(
    ALL$JS,
    ALL$MIM,
    method1name = "Jetstream",
    method2name = "MIM",
    plotTitle = "",
    annotate = TRUE,
    normalHigh = TRUE
  ) +
  labs(
    y = "Difference MUGA LVEF between JS and MIM ",
    x = "Mean LVEF JS and MIM"
    # title="Correlation plot between - MUGA modalities",
    # caption = glue("Figure {f_num}. Scatter plot and Bland-Altmann plot")
  )+
  scale_y_continuous(breaks=breaks_width(20),labels = scales::percent_format(accuracy = 1, scale = 1)) +
  scale_x_continuous(breaks=breaks_width(20),labels = scales::percent_format(accuracy = 1, scale = 1)) +
  annotate("text", x = 41.5, y = 21.5, label = text_up) +
  annotate("text", x = 41, y = 11.5, label = text_me) +
  annotate("text", x = 41, y = 1.5, label = text_down)

plot_grid(scatter, bland, labels = "AUTO")

The Bland–Altman plot is a method for comparing two measurements of the same variable. The concept is that X-axis is the mean of your two measurements, and the Y-axis is the difference between the two measurements. The chart can then highlight anomalies, for example, if one method always gives too high a result, then all points are above or below the zero line. It can also reveal that one method overestimates high values and underestimates low values. If the points on the Bland–Altman plot are scattered all over the place, above and below zero, then it suggests that there is no consistent bias of one approach versus the other. It is, therefore, a good first step for two measurement techniques of a variable. It shows that on average Jestrtram EF is 9.91% higher than MIM.

Boxplot and trends (Figure 3)

f_add()

END <- ALL %>%
  select(study_id, visit_number, MIM, JS)

END <- pivot_longer(
  END,
  cols = c(MIM, JS),
  names_to = "muga_type",
  values_to = "ejection_fraction"
)

END$visit_number <- as.numeric(END$visit_number)

p <- ggboxplot(
  END,
  x = "muga_type",
  y = "ejection_fraction",
  color = "muga_type",
  palette = c(col_MIM, col_muga, alpha = 1),
  # palette = "jco",
  
  add = "jitter",
  add.params = list(alpha = 0.6, size = 1.5)
)
figure_boxplot <- p +
    geom_hline(
    yintercept = def_cardiotoxicity_cutoff,
    linetype = "dashed",
    color = "black",
    size = 0.75)+
  geom_signif(
    comparisons = list(c("MIM", "JS")),
    map_signif_level = FALSE,
    textsize = 4,
    annotation = c("p < 0.001") #XXXXXXX
  ) +
  labs(
    # title ="Comparison of ejection fraction between MUGA modalities",
    # caption = glue(
    #   "Figure {f_num}. Comparison of ejection fractions between MUGA softwares"
    # ),
    # subtitle = "between MUGA modalities",
    x = "MUGA Software",
    y = "MUGA LVEF"
  ) +
  scale_y_continuous(breaks = breaks_width(20),
                     labels = scales::percent_format(accuracy = 1, scale = 1)) +
  theme(legend.position = "none") +
  scale_x_discrete(labels = c("MIM", "JS"))

figure_trand <- ALL %>%
  filter(n_total_visits > 2) %>%
  select(study_id,
         visit_number,
         MIM,
         JS,
         MIM_date,
         JS_date) %>%
  group_by(study_id) %>%
  mutate(date_diff_month = (MIM_date - MIM_date[1]) / 30.5) %>%
  ungroup() %>%
  mutate(date_diff_month = round(parse_number(as.character(date_diff_month)))) %>%
  group_by(date_diff_month) %>%
  mutate(date_diff_month = plyr::round_any(date_diff_month, 6)) %>% 
  pivot_longer(cols = c(JS, MIM)) %>%
  group_by(name, date_diff_month) %>% 
  summarise(
    median=median(value),
    sd=sd(value),
    n=n(),
    se=ifelse((standard.error=TRUE), sd/sqrt(n)
              , sd)
  ) %>%  
  filter(n>1) %>% 
   
  ggplot() +
  aes(x = date_diff_month, y = median, color = name) +
  geom_line(size = 1.5) +
  geom_point(size = 2) +
  labs(
    title = "",
    # caption = glue("Figure {f_num}. Errorbar represents standard error"),
    x = "Months from baseline",
    y = "LVEF"
  ) +
  scale_y_continuous(label = label_percent(scale = 1, accuracy = 1)) +
  scale_x_continuous(breaks = seq(0, 54, by = 6)) +
  geom_errorbar(aes(ymin = median-se, ymax = median+se), width = 0.8) +
  geom_ribbon(aes(ymin = median-sd, ymax = median+sd, fill=name), alpha=0.15, outline.type = "full", color=NA) +
  # geom_ribbon(aes(ymin = MIM, ymax = JS)) +
  geom_hline(
    yintercept = def_cardiotoxicity_cutoff,
    linetype = "dashed",
    color = "black",
    size = 0.5
  ) +
  theme_pubr() +
  theme(legend.title = element_blank())


plot_grid(figure_boxplot, figure_trand, labels="AUTO")

Clearly, Jet Stream has higher median, we knew that.

Data acquisition graph (Figure 4)

theme_set(theme_pubr())
my_comparisons <- list( c("Forte", "SKYLight"), c("Brightview", "Forte"), c("Brightview", "SKYLight"))

cameras_figure <-
  ALL %>% 
  drop_na(scanner) %>% 
  ggplot() +
  aes(y=difference_between_modalities, x=scanner, fill=scanner) +
  geom_boxplot() +
  # labs(title=paste("n =",nrow(ALL))) +
  labs(y="Difference in LVEF between softwares") +
  scale_y_continuous(labels = percent_format(accuracy = 1, scale = 1)) +
  stat_compare_means(comparisons = my_comparisons) +
    geom_hline(
    yintercept = 10,
    linetype = "dashed",
    color = "black",
    size = 0.5) +
  theme(legend.position = "none") +
    scale_fill_manual(values=c("gray95", "gray55", "gray27")) +
  labs(x="")

my_comparisons2 <- list(c("16", "32"))
frames_figure <-
  ALL %>% 
  drop_na(number_of_frames) %>% 
  ggplot() +
  aes(y=difference_between_modalities, x=number_of_frames, fill=number_of_frames) +
  geom_boxplot() +
  labs(y="Difference in LVEF between softwares") +
  scale_y_continuous(labels = percent_format(accuracy = 1, scale = 1)) +
  # labs(title=paste("n =",nrow(ALL))) +
  stat_compare_means(comparisons = my_comparisons2) +
    geom_hline(
    yintercept = 10,
    linetype = "dashed",
    color = "black",
    size = 0.5) +
  theme(legend.position = "none") +
    scale_fill_manual(values=c("gray74", "gray58", "gray34")) +
  labs(x="Number of Frames")

ALLI <-
  readxl::read_xlsx("Interobserver variability_AR-GS.xlsx",
                    na = na_strings,
                    skip = 1)
ALLI <- clean_names(ALLI)
ALLI <- ALLI[, colSums(is.na(ALLI)) < nrow(ALLI)]

ALLI <- ALLI %>%
  filter(!is.na(mrn)) %>%
  filter(str_length(abstractor) < 3) %>%
  select(case_no, abstractor, ef_5, ef_17) %>%
  rename(JS_ef = ef_5) %>%
  rename(MIM_ef = ef_17) %>%
  mutate(MIM_ef = as.numeric(MIM_ef)) %>%
  mutate(JS_ef = as.numeric(JS_ef)) %>%
  mutate(MIM_ef = round(MIM_ef))

ALLI <- ALLI %>% pivot_wider(
  names_from = abstractor, values_from = c(JS_ef, MIM_ef)
)

ALLS <- read.csv(here("data", "surrender_pivoted.csv"))

f_add()
interreader <- ALLS %>%
  ggplot(aes(x = AR, y = GS, palette = "#d2d2d2")) +
    geom_abline(
    slope = 1,
    intercept = 0,
    colour = "red",
    linetype = 2,
    size = 0.5
  ) +
  geom_point(size = 1.5, alpha = 0.5) +
  labs(
    x = "Observer 1, LVEF",
    y = "Observer 2, LVEF"
    # caption = glue("Figure {f_num}. inter-observer variation"
                   
  ) +
  stat_smooth(
    size = 1,
    color = "black",
    alpha = 0.5,
    method = "lm"
  ) +
  stat_cor(p.accuracy = 0.001, r.accuracy = 0.01) +
  scale_y_continuous(breaks = breaks_width(10),
                     labels = scales::percent_format(accuracy = 1, scale = 1)) +
  scale_x_continuous(breaks = breaks_width(10),
                     labels = scales::percent_format(accuracy = 1, scale = 1)) +

  theme_pubr()


intra2 <- read_csv(here("data","intraobserver_variability_pivoted.csv")) %>% 
  clean_names() %>% 
  remove_empty()

intrareader <- intra2 %>%
  ggplot(aes(x = ms1, y = ms2, palette = "#d2d2d2")) +
    geom_abline(
    slope = 1,
    intercept = 0,
    colour = "red",
    linetype = 2,
    size = 0.5
  ) +
  geom_point(size = 1.5, alpha = 0.5) +
  labs(
    x = "Measurement 1, LVEF",
    y = "Measurement 2, LVEF"
    # caption = glue("Figure {f_num}. inter-observer variation"
                   
  ) +
  stat_smooth(
    size = 1,
    color = "black",
    alpha = 0.5,
    method = "lm"
  ) +
  stat_cor(p.accuracy = 0.001, r.accuracy = 0.01) +
  scale_y_continuous(breaks = breaks_width(10),
                     labels = scales::percent_format(accuracy = 1, scale = 1)) +
  scale_x_continuous(breaks = breaks_width(10),
                     labels = scales::percent_format(accuracy = 1, scale = 1)) +

  theme_pubr()

plot_grid(cameras_figure, frames_figure, interreader, intrareader,labels = "AUTO")

An amazing plot

Medians for scanner and frames

ALL %>% 
  drop_na(scanner) %>% 
  group_by(scanner) %>% 
  summarise(median(difference_between_modalities)) %>% 
  gt() %>% 
  gt_theme_538()

scanner	median(difference_between_modalities)
Brightview	9.845
Forte	10.560
SKYLight	9.330

ALL %>% 
  drop_na(number_of_frames) %>% 
  group_by(number_of_frames) %>% 
  summarise(median(difference_between_modalities)) %>% 
  gt() %>% 
  gt_theme_538()

number_of_frames	median(difference_between_modalities)
16	10.585
32	9.810

Individual trends (Figure 5)

END <- ALL %>%
  group_by(study_id, n()) %>%
  summarise(n(), study_id, visit_number, MIM, JS, full_wrap_name, n_total_visits) %>%
  filter(`n()` > 1) %>%
  ungroup()

END <- pivot_longer(
  END,
  cols = c(MIM, JS),
  names_to = "muga_type",
  values_to = "ejection_fraction"
)

END %>%
  filter(n_total_visits > 2) %>%
  filter(study_id %in% c(84,90,141,284)) %>% 
  ggplot(aes(x = visit_number, y = ejection_fraction, color = muga_type)) +
  geom_line(aes(group = muga_type), size = 1.5) +
  scale_y_continuous(labels = percent_format(scale=1) ) +
  geom_point(size = 2) +
  geom_hline(
    yintercept = def_cardiotoxicity_cutoff,
    linetype = "dashed",
    color = "black",
    size = 0.5
  ) + 
  facet_wrap("full_wrap_name", scales = 'free_x') +
  labs(
    y="LVEF",
    x=""
  ) +
  theme_pubr()

Multiple Trends

I was thinking how to look at trends and came up with an idea to do plots for each patient with >=2 imaging visits (n=87):

END <- ALL %>%
  group_by(study_id, n()) %>%
  summarise(n(), study_id, visit_number, MIM, JS, full_wrap_name) %>%
  filter(`n()` > 1) %>%
  ungroup()

END <- pivot_longer(
  END,
  cols = c(MIM, JS),
  names_to = "muga_type",
  values_to = "ejection_fraction"
)

f_add()

figure <-
  ggplot(END,
         aes(x = visit_number, y = ejection_fraction, color = muga_type)) +
  geom_line(aes(group = muga_type)) +
  geom_point() +
  geom_hline(
    yintercept = def_cardiotoxicity_cutoff,
    linetype = "dashed",
    color = "black",
    size = 0.5
  ) +
  scale_y_continuous(breaks = c(20, 50, 80)
                     ,
                     labels = scales::percent_format(scale = 1)) +
  scale_x_discrete(guide = guide_axis(n.dodge = 1.5))
figure +
  labs(
    title = glue(
      "Automated ejection fraction in MUGA study, n={n_patients_total_follow}"
    ),
    subtitle = glue("by MUGA test type and visit number, dashed line represents Ejection Fraction = {def_cardiotoxicity_cutoff}%")
    # caption = glue("Figure {f_num}")
  ) +
  facet_wrap("full_wrap_name", scales = 'free_x', ncol = n_wrap_cols) +
  theme_fivethirtyeight() +
  theme(
    legend.position = "top",
    axis.text = element_text(size = wrap_font_size),
    strip.text.x = element_text(size = wrap_font_size)
  )

Defining cardiotoxicity, (Tables)

The problem is in patients whose lines are not parallel, because different MUGA softwares could lead to different EFs and different therapeutic decisions - to stop or not to stop chemo.

We could identify patients where the lines are crossing and see what are the characteristics of these patients. Some of them are very parallel and some cross over a lot.

Eventually I came up with simplest - I think - idea to categorize patients into those that have cardiotoxicity and that that do not. According to one definition (Schwartz et al. 1987):

** Decline in LVEF > 10% to final LVEF < 50% **

So I decided to see how many of our patients that have >=2 visits fulfill this criteria of cardiotoxicity in both MUGA softwares, or in various combinations of MUGA softwares. Here is the table:

Cardiotoxicity comparison

t_add()

tribble(
  ~ modality,
  ~ "N patients with cardiotxitity",
  ~ "% patients with cardiotoxicity",
  "MIM",
  n_cardiot_MIM,
  cardiot_MIM_perc,
  "JS",
  n_cardiot_JS,
  cardiot_JS_perc,
  "Any software crossover",
  n_cardiotoxicity_any_crossover,
  cardiotoxicity_any_crossover_perc
) %>%
  gt(
    rowname_col = "aaaaaaaaaaaaaaaaa",
    
  ) %>% fmt_number(
    columns = vars("% patients with cardiotoxicity"),
    decimals = 1
  ) %>% 
  tab_header("Table 2, Cardiotoxicity ") %>% 
  gt_theme_538()

Table 2, Cardiotoxicity

modality	N patients with cardiotxitity	% patients with cardiotoxicity
MIM	23	26.4
JS	10	11.5
Any software crossover	38	43.7

First 2 rows are straightforward. They show how many patients would fulfill the cardiotoxicity definition if we followed them only through 1 MUGA modality, such are MIM in 1st row and JS in 2nd row. It is as if we followed line of only 1 color in Figure 5. We see that there is many more patients with cardiotoxicity in MIM VS JS (28.7% VS 10.3%). That is a little bit surprising to me because we can see on Figure 3 that there is bigger variability in JS so all thongs being equal there should be more decline in EF. However if might be more difficult for JETSTRAM to cross EF=50 threshold as it starts from higher lever, which could explain it.

Next 2 rows show numbers and percentages of patients that fulfill cardiotoxicity definition if we change MUGA modalities over time. Row 3 is MIM -> JS and row 4 the oposite change. I calculated it by checking the highest and lowest EF of any visit for each patient for each modality, and later calculating difference taking highest MIM and lowest JS in row 3 and highest JS and lowest MIM in row 4. If this difference is >10% and at the same time lower EF is < 50 (per cardiotoxicity definition decline in LVEF of at least 10% to an LVEF of less than 50). The results are very interesting but not very suprising. The percentage of patients with cardiotoxicity is much higher if we go JS -> MIM than the opposite way (43.7% vs 4.6%). This is explained by generally higher JS EF’s so if we start with it we would have much bigger drop if consecutive modality is MIM.

5th row shows how many pts fulfull cardiotoxicity definition if we can go either way so MIM -> JS or JS -> MIM, but it turns out to be the same as row 4, JS -> MIM, because it already covers all patients fulfilling cardiotoxicity definition (MIM->JS does not add any more).

I think that this could be the main point of our paper. The cardiotoxicity definition is elegant, we already cite it in our manuscript so I think it could be the best way of highlighting the problem. The variability of patients fulfilling the cardiotoxicity definition between modalities is shocking, as low as 4.6% with MIM->JS modality and as high as 43.7% with JESTREAM->MIM. I think it could be a very good selling point. We can also show p-values between groups:

Comparison between modialities

t_add()
p_pure <-
  prop.test(
    c(n_cardiot_JS, n_cardiot_MIM),
    c(n_patients_total_follow, n_patients_total_follow)
  )
p_crossover1 <-
  prop.test(
    c(n_cardiot_MIM, n_cardiotoxicity_any_crossover),
    c(n_patients_total_follow, n_patients_total_follow)
  )
p_crossover2 <-
  prop.test(
    c(n_cardiot_JS, n_cardiotoxicity_any_crossover),
    c(n_patients_total_follow, n_patients_total_follow)
  )



n_reclassified_pure <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(muga_reclassified) %>% sum()
n_reclassified_cross_mim <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(muga_reclassified_crossover_MIM) %>% sum()
n_reclassified_cross_js <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% pull(muga_reclassified_crossover_JS) %>% sum()

tribble(
  ~ "Software 1",
  ~ "Software 2",
  ~ "Number reclassified",
  ~ "p-value",
  ~ "CI",
  glue("MIM = {n_cardiot_MIM}"),glue("JS = {n_cardiot_JS}"),
  n_reclassified_pure,
  round(p_pure$p.value, 6),
  round(p_pure$conf.int, 2),
  glue("MIM = {n_cardiot_MIM}"),
  glue("Any software crossover = {n_cardiotoxicity_any_crossover}"),
  n_reclassified_cross_mim,
  round(p_crossover1$p.value, 6),
  round(p_crossover1$conf.int, 2),
  glue("JS = {n_cardiot_JS}"),
  glue("Any software crossover = {n_cardiotoxicity_any_crossover}"),
  n_reclassified_cross_js,
  round(p_crossover2$p.value, 6),
  # 909090,
  round(p_crossover2$conf.int, 2)
) %>% 
  gt() %>% 
   fmt_number(
    columns = vars("p-value"),
    decimals = 3,
    # n_sigfig=1,
    drop_trailing_dec_mark=FALSE
  ) %>% 
  tab_header("Table 3, Comparison between softwares ") %>% 
  gt_theme_538()

Table 3, Comparison between softwares

Software 1	Software 2	Number reclassified	p-value	CI
MIM = 23	JS = 10	15	0.020	-0.28, -0.02
MIM = 23	Any software crossover = 38	15	0.026	-0.32, -0.02
JS = 10	Any software crossover = 38	28	0.000	-0.46, -0.19

# mutate(
#       nr = row_number(),
#       p = case_when(
#         p < 0.001 ~ "<0.001",
#         TRUE ~ form(p, 3)
#       ),

# form <- function(x, nsmall = 2)
#   format(x, digits = 0, nsmall = nsmall, scientific = FALSE)

# CI musi sie budowac na roznicy miedzu proporcjami

# wywalic CI

#recznie zmutowac na caracter
  
  # kbl(caption = glue("Table 2, Comparison between softwares")) %>%
  # kable_classic(full_width = F, html_font = "Cambria")

\(\color{red}{\text{IT DOES NOT ADD UP (MIM AND JS) I AM FIXING IT}}\)

In row 1 and 3 there is a clear difference in the number of cardiotoxicity diagnoses when using different softwares / software cross-overs, as evidenced by p<0.05. I used Test of Equal or Given Proportions for that.

Trends with cardiotoxicity

Below I show Figure 6 with added red rectangle to indicate patients with cardiotoxicity defined by MUGA_any_software_crossover (most inclusive definition).

rect_list <- ALL_DIST %>%
  filter(cardiotoxicity_algorithm_crossover==1) %>% 
  pull(study_id)

END <- ALL %>% 
  filter(n_total_visits>1) %>% 
  summarise(n(),study_id,visit_number,MIM,JS)

END <- pivot_longer(END,cols = c(MIM,JS),
                    names_to = "muga_type",
                    values_to = "ejection_fraction")

END$visit_number <- as.integer(END$visit_number)
END$visit_number <- as.factor(END$visit_number)

# END$study_id <- as.integer(END$study_id)


figure+
  labs(
  title =glue("Automated ejection fraction in MUGA study, n={n_patients_total_follow}"),
  subtitle = glue("by MUGA test type and visit number, dashed line represents EF={def_cardiotoxicity_cutoff}%\nRed rectangle indicates patients with cardiotoxicity (MUGA_any_software_crossover)
  ")
  # ,  caption = "Figure 6"
  ) +
  geom_rect(data = subset(END, study_id %in% rect_list),
                          fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
            ymin = -Inf,ymax = Inf, size=1.5) +
  #   geom_rect(data = subset(END, study_id %in% rect__blue_list), 
  #                         fill = NA, colour = "blue", alpha=0.3, xmin = -Inf,xmax = Inf,
  #           ymin = -Inf,ymax = Inf, size=0.3) +
  facet_wrap("study_id", scales = 'free_x', ncol = n_wrap_cols) +
  theme_fivethirtyeight() +
  theme(legend.position = "top")

theme_set(theme_pubr())

I think that additionally showing something like Figure 5 or 6 would could help us persuade readers that there is a lot of software crossover and variability in modalities and will be helpful to in pushing out point.

Single trend figure

I also created a graph based on Fig 2 in publication (Moey, Liles, and Carabello 2019) It shows aggregate percentage decrease from baseline by month from baseline.

Sankey diagram (Figure 6)

n_cartiotox_MIM_yes <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% filter(cardiotoxicity_algorithm_MIM==1) %>% nrow()
n_cartiotox_MIM_no <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% filter(cardiotoxicity_algorithm_MIM==0) %>% nrow()

n_cartiotox_JS_yes <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% filter(cardiotoxicity_algorithm_JS==1) %>% nrow()
n_cartiotox_JS_no <- ALL %>% filter(n_total_visits>1) %>% distinct(study_id, .keep_all = TRUE) %>% filter(cardiotoxicity_algorithm_JS==0) %>% nrow()


ALL %>% 
  filter(n_total_visits>1) %>% 
  distinct(study_id, .keep_all = TRUE) %>% 
  count(cardiotoxicity_algorithm_MIM, cardiotoxicity_algorithm_JS, muga_reclassified, name = "freq") %>%
  mutate(muga_reclassified=as.factor(muga_reclassified)) %>% 
  mutate(cardiotoxicity_algorithm_MIM=as.factor(cardiotoxicity_algorithm_MIM)) %>%
  mutate(muga_reclassified = as.factor(muga_reclassified)) %>%
  mutate(cardiotoxicity_algorithm_MIM = recode_factor(cardiotoxicity_algorithm_MIM,`0` = glue("MIM no \n cardiotoxicity \n n={n_cartiotox_MIM_no}"), `1` = glue("MIM  \n cardiotoxicity \n n={n_cartiotox_MIM_yes}") )) %>%
  mutate(cardiotoxicity_algorithm_JS = recode_factor(cardiotoxicity_algorithm_JS,`0` = glue("JS no \n cardiotoxicity \n n={n_cartiotox_JS_no}"), `1` = glue("JS \n cardiotoxicity \n n={n_cartiotox_JS_yes}"))) %>%
  mutate(gr_color=c("A","A", "B", "B")) %>% 

  ggplot(
       aes(y = freq, 
           # fill=gr_color,
           axis1 = cardiotoxicity_algorithm_MIM, axis2 = cardiotoxicity_algorithm_JS
            )) +
  geom_stratum(width = 1/5, 
               fill = "grey",
               color = "black"
               ) +
  geom_alluvium(aes(
    # fill = "pink", colow="yellow"
    ), width = 1/5) +
  # geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("cardiotoxicity_algorithm_MIM", "cardiotoxicity_algorithm_JS"))+
  geom_text(aes(label = cardiotoxicity_algorithm_MIM), stat = "stratum") +
  geom_text(aes(label = cardiotoxicity_algorithm_JS), stat = "stratum")+
  annotate("text", x =1.5, y=18, label = "reclassified") +
  annotate("text", x =1.5, y=15, label = "(15/87)") +
  theme_void() +
  theme(legend.position="top") +
  theme(legend.position = "none")

# data(vaccinations)
# vaccinations2 <- vaccinations %>% 
#   filter(survey != "ms460_NSA", response %in% c("Always", "Never")) %>% 
#   select(-start_date, -end_date) %>% 
#   filter(subject %in% c(1, 6, 17, 21))
# vaccinations2
# ggplot(vaccinations2, aes(x = survey, stratum = response, alluvium = subject,
#   weight = freq, fill = response, label = response)) +
#   geom_flow() +
#   geom_stratum(alpha = 0.5) +
#   geom_text(stat = "stratum", size = 3) +
#   theme(legend.position = "none") +
#   ggtitle("vaccination survey responses at three points in time")



ALL %>% 
  filter(n_total_visits>1) %>% 
  distinct(study_id, .keep_all = TRUE) %>% 
  count(cardiotoxicity_algorithm_MIM, cardiotoxicity_algorithm_JS, muga_reclassified, name = "freq") %>%
  mutate(muga_reclassified=as.factor(muga_reclassified)) %>% 
  mutate(cardiotoxicity_algorithm_MIM=as.factor(cardiotoxicity_algorithm_MIM)) %>%
  mutate(muga_reclassified = as.factor(muga_reclassified)) %>%
  mutate(cardiotoxicity_algorithm_JS=as.factor(cardiotoxicity_algorithm_JS )) %>% 
  pivot_longer(1:2) %>% 
  mutate(figure_column=rep(c("MIM","JS"),4)) %>% 
  mutate(id=rep(c(1:4),each=2)) %>% 
  ggplot(aes(x = name, stratum = value, alluvium = id,
  y = freq, fill = value, label = value)) +
  geom_flow() +
  geom_stratum(alpha = 0.5) +
  geom_text(stat = "stratum", size = 3) +
  theme(legend.position = "none") +
  ggtitle("Colored flowy to fix")

  # theme_void()
    
  # mutate(cardiotoxicity_algorithm_MIM = recode_factor(cardiotoxicity_algorithm_MIM,`0` = glue("MIM no \n cardiotoxicity \n n={n_cartiotox_MIM_no}"), `1` = glue("MIM  \n cardiotoxicity \n n={n_cartiotox_MIM_yes}") )) %>%
  # mutate(cardiotoxicity_algorithm_JS = recode_factor(cardiotoxicity_algorithm_JS,`0` = glue("JS no \n cardiotoxicity \n n={n_cartiotox_JS_no}"), `1` = glue("JS \n cardiotoxicity \n n={n_cartiotox_JS_yes}"))) %>%
  # mutate(gr_color=c("A","A", "B", "B")) %>% 

  # ggplot(
  #      aes(y = freq,
  #          x=figure_column
  #          # fill=gr_color,
  #          # axis1 = cardiotoxicity_algorithm_MIM, axis2 = cardiotoxicity_algorithm_JS
  #           )) +
  # geom_stratum(width = 1/5,
  #              fill = "grey",
  #              color = "black"
  #              ) +
  # geom_alluvium(aes(
  #   # fill = "pink", colow="yellow"
  #   ), width = 1/5) +
  # # geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
  # # scale_x_discrete(limits = c("cardiotoxicity_algorithm_MIM", "cardiotoxicity_algorithm_JS"))+
  # # geom_text(aes(label = cardiotoxicity_algorithm_MIM), stat = "stratum") +
  # # geom_text(aes(label = cardiotoxicity_algorithm_JS), stat = "stratum")+
  # # annotate("text", x =1.5, y=18, label = "reclassified") +
  # # annotate("text", x =1.5, y=15, label = "(15/87)") +
  # theme_void() +
  # theme(legend.position="top") +
  # theme(legend.position = "none")

Some more afterthoughts / questions:

We can suggest correction of automated EF while switching modalities, for example to substract 10% from EF when going from JS to MIM. I am going to do Bland Altman plot next.

Suggested figure with a diagram simillar to (Huang et al. 2017)

Now its getting serious because we are getting closer to finishing and I want to share some of my data handling responsibility with others, so if we go down, we go down together :D

Graphs

Combined with all data acquisition

Difference between scanners oscillate around 10%

HR

ALL %>%
  # drop_na(heart_rate_1) %>%
  ggplot() +
  aes(x=heart_rate_1, y=difference_between_modalities)+
  geom_point() +
  geom_smooth(method="lm") +
  stat_cor(p.accuracy = 0.001, r.accuracy = 0.001) +
  labs(title=paste("n =",nrow(ALL))) +
    geom_hline(
    yintercept = 10,
    linetype = "dashed",
    color = "black",
    size = 0.5)

Very small correlation, non-significant

BMI

ALL %>%
  # drop_na(heart_rate_1) %>%
  ggplot() +
  aes(x=bmi_combined, y=difference_between_modalities)+
  geom_point() +
  geom_smooth(method="lm") +
  stat_cor(p.accuracy = 0.001, r.accuracy = 0.001) +
  labs(title=paste("n =",nrow(ALL))) +
    geom_hline(
    yintercept = 10,
    linetype = "dashed",
    color = "black",
    size = 0.5)

Moderate, significant correlation

Tables

Table with all patients

library("gtsummary")
theme_gtsummary_compact()

var_label(ALL) <- list(
                       age_years = "Age (years)",
                       gender = "Gender",
                       race = "Race",
                       bmi_combined = "Body mass index (kg/m2)",
                       bsa = "Body surface area (m2)",
                       ethnicity = "Ethnicity",
                       hypertension = "Hypertension",
                       hyperlipidemia = "Hyperlipidemia",
                       coronary_artery_disease = "Coronary artery disease",
                       diabetes_mellitus = "Diabetes mellitus",
                       atrial_fibrillation = "Atrial fibrillation",
                       peripheral_vascular_disease = "Peripherial vascular disease",
                       chronic_kidney_disease = "Chronic kidney disease",
                       stroke_or_tia = "Stroke or TIA",
                       type_of_cancer = "Type of Cancer",
                       scanner = "Scanner",
                       number_of_frames = "Number of frames",
                       MIM = "MIM LVEF (%)",
                       JS = "JS LVEF (%)",
                       ed_volume_1 = "ED volume",
                       es_volume_1 = "ES volume",
                       heart_rate_1 = "Heart rate during MUGA (beats/min.)",
                       beats_accepted = "Beats accepted",
                       beats_rejected = "Beats rejected",
                       difference_betweem_modialieits_more_eq_10 = "Difference in MUGA software >=10%",
                       black = "Black race",
                       n_total_visits = "Total number of visits",
                       cad_combined_of_4 = "Coronary artery disease",
                       pulm_disease_combined_of_2 = "Pulmonary disease",
                       tobacco_use = "Tobacco use",
                       high_r_r_value = "High R-R value",
                       low_r_r_value = "Low R-R value",
                       ed_time = "ED time",
                       es_time = "ES time",
                       frame_time = "Frame time",
                       heart_rate = "Heart rate during vitals",
                       dose_mci = "Radioisotope dose (mCi)",
                       anthracycyclines = "Anthracyclines",
                       trastuzumab = "Trastuzumab",
                       alkylating_agents = "Alkylating agents",
                       antimicrotubule_agents = "Antimicrotubule agents",
                       antimetabolites = "Antimetabolites",
                       immuno_checkpoint_inhibitors = "Immunologic checkpoint inhibitors",
                       other = "Other chemotherapy"
                       )

tbl_demo_all_vars <-
  c(
    "age_years",
    "gender",
    "race",
    "ethnicity",
    "bmi_combined",
    "bsa",
    
    "hypertension",
    "diabetes_mellitus",
    "hyperlipidemia",
    "cad_combined_of_4",
    "stroke_or_tia",
    "atrial_fibrillation",
    "chronic_kidney_disease",
    "pulm_disease_combined_of_2",
    "tobacco_use",
    "type_of_cancer",
    "anthracycyclines",
    "trastuzumab",
    "alkylating_agents",
    "antimicrotubule_agents",
    "antimetabolites",
    "immuno_checkpoint_inhibitors",
    "other"
  )



# ALL %>%
#   # fill(age_years) %>% 
#   mutate(difference_betweem_modialieits_more_eq_10 = recode_factor(difference_betweem_modialieits_more_eq_10,`0` = "< 10%", `1` = ">= 10%")) %>%
#   # filter(n_total_visits>1) %>%
#   distinct(study_id, .keep_all = TRUE) %>% 
#   # select(variables_tables) %>% 
#   tbl_summary(
#     include = tbl_demo_all_vars,
#     by = difference_betweem_modialieits_more_eq_10,
#               sort=list(everything() ~ "frequency"), missing="no",
#               value = list(everything() ~ 1)
#               ) %>% 
#   add_p() %>%
#   add_overall() %>%
#   modify_spanning_header(c("stat_1", "stat_2") ~ "**Difference in LVEF between MUGA modalities**") %>%
#   bold_p()
#   # add_stat_label() 



ALL %>%
  # fill(age_years) %>% 
  # mutate(difference_betweem_modialieits_more_eq_10 = recode_factor(difference_betweem_modialieits_more_eq_10,`0` = "< 10%", `1` = ">= 10%")) %>%
  # filter(n_total_visits>1) %>%
  distinct(study_id, .keep_all = TRUE) %>% 
  tbl_summary(
    include = tbl_demo_all_vars
    ,
    # by = difference_betweem_modialieits_more_eq_10,
              sort=list(everything() ~ "frequency"), missing="no",
              value = list(everything() ~ 1)
              )

Characteristic	N = 310¹
Age (years)	54 (44, 63)
Gender
Female	221 (71%)
Male	89 (29%)
Race
Other Race	148 (48%)
Black or African American	126 (41%)
White	26 (8.4%)
Asian	10 (3.2%)
Ethnicity
NOT Hispanic or Latino	181 (58%)
Hispanic or Latino	127 (41%)
Unknown / Not Reported	2 (0.6%)
Body mass index (kg/m2)	28 (24, 32)
Body surface area (m2)	1.80 (1.60, 1.92)
Hypertension	134 (44%)
Diabetes mellitus	56 (18%)
Hyperlipidemia	52 (17%)
Coronary artery disease	22 (7.1%)
Stroke or TIA	4 (1.3%)
Atrial fibrillation	8 (2.6%)
Chronic kidney disease	8 (2.6%)
Pulmonary disease	33 (11%)
Tobacco use
Never	180 (58%)
Former	89 (29%)
Current	39 (13%)
Type of Cancer
Breast	142 (46%)
Lymphoma	82 (27%)
Sarcoma	25 (8.1%)
Gastric	16 (5.2%)
Other	11 (3.6%)
Ovarian	9 (2.9%)
Uterine	7 (2.3%)
Bone	6 (1.9%)
Multiple myeloma	6 (1.9%)
Lung	4 (1.3%)
Anthracyclines	198 (64%)
Trastuzumab	64 (21%)
Alkylating agents	134 (43%)
Antimicrotubule agents	138 (45%)
Antimetabolites	33 (11%)
Immunologic checkpoint inhibitors	2 (0.6%)
Other chemotherapy	182 (63%)
¹ Median (IQR); n (%)

  # add_p() %>%
  # add_overall() %>%
  # modify_spanning_header(c("stat_1", "stat_2") ~ "**Difference in LVEF between MUGA modalities**") %>%
  # bold_p()
  # add_stat_label()

All MUGAS - technical table

tbl_tach_all_vars <- c(
  "difference_betweem_modialieits_more_eq_10",
  "heart_rate_1",
  "bmi_combined",
  "scanner",
  "number_of_frames",
  "dose_mci",
  "MIM",
  "JS"
)
theme_gtsummary_compact()
ALL %>%
  # fill(age_years) %>% 
  mutate(difference_betweem_modialieits_more_eq_10 = recode_factor(difference_betweem_modialieits_more_eq_10,`0` = "< 10%", `1` = "≥ 10%")) %>%
  # filter(n_total_visits>1) %>% 
  # select(variables_tables) %>% 
  tbl_summary(
    include = tbl_tach_all_vars,
    by = difference_betweem_modialieits_more_eq_10,
              sort=list(everything() ~ "frequency"), missing="no"
              ) %>% 
  add_p() %>% 
  add_overall() %>% 
  modify_spanning_header(c("stat_1", "stat_2") ~ "**Difference in LVEF between MUGA modalities**") %>% 
  bold_p()

Characteristic	Overall, N = 582¹	Difference in LVEF between MUGA modalities		p-value²
Characteristic	Overall, N = 582¹	< 10%, N = 270¹	= 10%, N = 312¹	p-value²
Heart rate during MUGA (beats/min.)	76 (67, 87)	73 (65, 87)	77 (69, 86)	0.029
Body mass index (kg/m2)	28 (24, 32)	28 (24, 32)	29 (25, 34)	0.006
Scanner				0.12
Forte	365 (63%)	158 (59%)	207 (67%)
Brightview	132 (23%)	67 (25%)	65 (21%)
SKYLight	80 (14%)	43 (16%)	37 (12%)
Number of frames				0.046
16	346 (60%)	149 (56%)	197 (64%)
32	231 (40%)	119 (44%)	112 (36%)
Radioisotope dose (mCi)	21.30 (20.80, 21.80)	21.30 (20.90, 21.80)	21.30 (20.80, 21.80)	0.4
MIM LVEF (%)	56 (49, 61)	57 (51, 64)	54 (48, 59)	<0.001
JS LVEF (%)	66 (59, 72)	63 (57, 70)	67 (61, 73)	<0.001
¹ Median (IQR); n (%) ² Wilcoxon rank sum test; Pearson's Chi-squared test

  # add_stat_label()

theme_gtsummary_compact()

# ALL %>%
#   # fill(age_years) %>%
#   mutate(muga_reclassified_crossover_MIM = recode_factor(muga_reclassified_crossover_MIM,`0` = "No reclassification", `1` = "Reclassification")) %>%
#   filter(n_total_visits>1) %>%
#   distinct(study_id, .keep_all = TRUE) %>%
#   select(variables_tables) %>%
#   tbl_summary(
#     by = muga_reclassified_crossover_MIM,
#               label = age_years ~ "Age (years)",
#               missing="no",
#     value = list(everything() ~ 1)
#               ) %>%
#   add_p() %>%
#   add_overall() %>%
#   modify_spanning_header(c("stat_1", "stat_2") ~ "**Reclassification between software crossover and MIM**") %>%
#   bold_p()

Modelling

a <- lm(
  difference_between_modalities ~ number_of_frames + bmi.x + heart_rate_1
  + ed_volume_1 + beats_accepted + gender + age_years
  ,
  data = ALL
) %>%
  tbl_regression()


ALL$age_cut <-cut(ALL$age_years, c(0,65,100))

ALL$bmi_cut <-cut(ALL$bmi_combined, c(0,20,25,30,35,100))
ALL$heart_cut <- cut(ALL$heart_rate_1, c(0,75,300))

var_label(ALL) <- list(
                       age_cut = "Age (years)",
                       bmi_cut = "BMI",
                       heart_cut = "HR")


xx <- ALL %>% 
  filter(n_total_visits>1)


theme_gtsummary_compact()
# ALL$black

# ALL %>% 
#   distinct(study_id, .keep_all = TRUE) %>% 
#   count(race,ethnicity)

# hist(ALL$age_years)

# cut()
# ALL %>% 
#   count(bmi_cut)

  glm(
  muga_reclassified ~
  # race +
    # black +\
    age_years +
    # age_cut +
    gender +
    # bmi_combined +
    bmi_cut +
    heart_rate_1 +
    # heart_cut +
  # + beats_rejected + 

    scanner +
    # es_volume_1 + 
    number_of_frames,
  data = xx,
  family = "binomial"
) %>% 
  # tab_model()
  tbl_regression(exponentiate=TRUE)%>% bold_p() %>% 
  add_glance_source_note(
    include = c(nobs)
  )

Characteristic	OR¹	95% CI¹	p-value
Age (years)	0.95	0.93, 0.97	<0.001
Gender
Female	—	—
Male	0.62	0.08, 2.84	0.6
BMI
(0,20]	—	—
(20,25]	0.08	0.01, 0.37	0.003
(25,30]	0.80	0.34, 1.96	0.6
(30,35]	1.27	0.49, 3.38	0.6
(35,100]	2.57	1.08, 6.46	0.037
Heart rate during MUGA (beats/min.)	0.98	0.96, 1.00	0.054
Scanner
Brightview	—	—
Forte	0.19	0.01, 1.12	0.13
SKYLight	0.61	0.23, 1.53	0.3
Number of frames
16	—	—
32	0.31	0.02, 1.77	0.3
nobs = 356
¹ OR = Odds Ratio, CI = Confidence Interval

# glance(a)

    
#   add_glance_table(
#     # include = c(AIC)
# )
  # add_n()
# hist(ALL$bmi_combined)
# 
# explanatory = c(
#                 # "Age",
#                 "age_cut",
#                 "gender","bmi_cut","heart_cut","scanner","number_of_frames")
# dependent = "muga_reclassified"
# 
# plot <- ALL %>%
#   or_plot(dependent, explanatory, remove_ref=TRUE, table_text_size=4, title_text_size=14, dependent_label = "Multivariate analysis: odds of reclassification",
#           breaks = c(0.1,0.5,1,5), 
#           # column_space=10,
#           plot_opts=list(xlab("OR, 95% CI") 
#                          ,theme(axis.title = element_text(size=10)))          )

# xx %>% 
#   distinct(study_id, .keep_all = TRUE) %>%  
#   # group_by(study_id) %>% 
#   count(muga_reclassified)

Importmant problems with data

A lot of these variables are used in regression models, NA need to be checked and fixed and strange values need to be collected.

Links

Link to tables: https://docs.google.com/spreadsheets/d/1dUNGFJh649CwbixpODYFLA3Iki4r54tW1AuOjejEB0k/edit?usp=sharing

References

Bland, J. M., and D. G. Altman. 1986. “Statistical methods for assessing agreement between two methods of clinical measurement.” Lancet (London, England) 1 (8476): 307–10.

Giavarina, Davide. 2015. “Understanding Bland Altman Analysis.” Biochemia Medica 25 (2): 141–51. https://doi.org/10.11613/BM.2015.015.

Huang, Hans, Prabhjot S. Nijjar, Jeffrey R. Misialek, Anne Blaes, Nicholas P. Derrico, Felipe Kazmirczak, Igor Klem, Afshin Farzaneh-Far, and Chetan Shenoy. 2017. “Accuracy of Left Ventricular Ejection Fraction by Contemporary Multiple Gated Acquisition Scanning in Patients with Cancer: Comparison with Cardiovascular Magnetic Resonance.” Journal of Cardiovascular Magnetic Resonance: Official Journal of the Society for Cardiovascular Magnetic Resonance 19 (1): 34. https://doi.org/10.1186/s12968-017-0348-4.

Moey, Melissa Y. Y., Darla K. Liles, and Blase A. Carabello. 2019. “Concomitant Use of Renin-Angiotensin-Aldosterone System Inhibitors Prevent Trastuzumab-Induced Cardiotoxicity in Her2+ Breast Cancer Patients: An Institutional Retrospective Study.” Cardio-Oncology 5 (1): 9. https://doi.org/10.1186/s40959-019-0043-8.

Schwartz, Ronald G., William B. McKenzie, Jonathan Alexander, Philip Sager, Anthony D’Souza, Amita Manatunga, Peter E. Schwartz, et al. 1987. “Congestive Heart Failure and Left Ventricular Dysfunction Complicating Doxorubicin Therapy: Seven-Year Experience Using Serial Radionuclide Angiocardiography.” The American Journal of Medicine 82 (6): 1109–18. https://doi.org/10.1016/0002-9343(87)90212-9.

MUGA manuscript markdown

MT

12 April, 2021, 21:02