1 Setup

# check where I am
if (basename(getwd()) == "programs"){
  path = "../"
} else {
  path = ""
}

library(tidyverse)
library(RocheRWDS)
library(dplyr)
library(DT)
library(tableone)
library(survival)
library(survminer)
library(DiagrammeR)
library(irr)
library(ICC)
library(raters)
library(rel)
library(psych)
library(BlandAltmanLeh)

2 Get data

d_lines <- readr::read_csv(
  paste0(path,"import/lines.csv"),
    col_types = cols(
      PatientID = col_character(),
      start_induct = col_date(format = ""),
      end_induct = col_date(format = ""),
      selectdrugs_induct = col_character(),
      Transplant = col_date(format = ""),
      start_cons = col_date(format = ""),
      end_cons = col_date(format = ""),
      selectdrugs_cons = col_character(),
      start_maint = col_date(format = ""),
      end_maint = col_date(format = ""),
      selectdrugs_maint = col_character(),
      coder = col_character(),
      line_notes = col_character()
  )
)

 d_lines %>%
   summarise(
     Rows = n(),
     Patients = n_distinct(PatientID),
     EarliestLineStart = min(start_induct)
   ) %>%
   knitr::kable(
     caption = "Patient count on lines table"
   )
Patient count on lines table
Rows Patients EarliestLineStart
357 100 2011-02-01

2.1 Clean data

# add in line count
  d_lines <- d_lines %>%
    group_by(PatientID,coder) %>%
    arrange(PatientID,start_induct) %>%
    mutate(
      LineNumber = row_number(),
      TotalLines = n()
    ) 

d_lines %>%
  head(3) %>% 
  knitr::kable(
    caption = "First 3 rows of the line table"
  )
First 3 rows of the line table
PatientID start_induct end_induct selectdrugs_induct Transplant start_cons end_cons selectdrugs_cons start_maint end_maint selectdrugs_maint coder line_notes LineNumber TotalLines
F00E4B82A2B05 2016-08-15 2016-11-14 bortezomib, dexamethasone, lenalidomide NA 2017-10-15 2017-10-15 No consolidation 2017-10-15 2017-10-15 No maintenance wassnere Add notes here 1 1
F00E4B82A2B05 2016-08-15 2016-09-25 bortezomib, dexamethasone NA 2017-11-08 2017-11-08 No consolidation 2017-11-08 2017-11-08 No maintenance byonj Add notes here 1 2
F00E4B82A2B05 2016-09-26 2017-06-22 bortezomib, dexamethasone, lenalidomide NA 2017-11-08 2017-11-08 No consolidation 2017-11-08 2017-11-08 No maintenance byonj Add notes here 2 2

Both PDCO colleagues identified an issue with FE0CCE4BF9D2B:

I ran into an issue on a pat. with double transplant: when recording the second transplant induction came up again in the record (Dble record for hte indcution regimen for Pat. )

This patient appears to have received a double auto transplant. Unable to code both transplants in the same line of therapy, but both transplants should be considered as part of the same line of therapy.

How to embed transplants in lines is a discussion point for Flatiron.

2.2 Summarise

  d_lines %>%
    group_by(coder) %>%
    summarise(
      `Line table` = n_distinct(PatientID)
    ) %>%
  knitr::kable(
    caption = "100 in both line table"
  )
100 in both line table
coder Line table
byonj 100
wassnere 100

3 Induction Analysis

  # take just first line
    d_lines_induction <- d_lines %>%
      filter(LineNumber == 1) %>% ungroup
  
  # put side by side
    d_lines_induction <- left_join(
      d_lines_induction %>%
        filter(coder == "wassnere") %>%
        select(
          PatientID,
          start_induct_w = start_induct,
          end_induct_w = end_induct,
          selectdrugs_induct_w = selectdrugs_induct,
          TotalLines_w=TotalLines
        ),
      d_lines_induction %>%
        filter(coder == "byonj") %>%
        select(
          PatientID,
          start_induct_b = start_induct,
          end_induct_b = end_induct,
          selectdrugs_induct_b = selectdrugs_induct,
          TotalLines_b=TotalLines
        ),
      by = "PatientID"
    )  %>%
    
    
  # check if lines are the same
    mutate(
      duration_b = as.numeric(end_induct_b - start_induct_b),
      duration_w = as.numeric(end_induct_w - start_induct_w),
      # check if same
      same_induction = ifelse(selectdrugs_induct_b == selectdrugs_induct_w,1,0),
      # pretty same
      same_inductionC = ifelse(same_induction == 1,"Same 1L induction","Different 1L induction"),
      start_date_diff = as.numeric(start_induct_b-start_induct_w),
      duration_diff = duration_b - duration_w,
      #check if same number of total lines
      same_numlines = ifelse(TotalLines_b == TotalLines_w, 1, 0),
      same_numLinesC = ifelse(same_numlines == 1, "Same number of lines", "Different number of lines")
    )

3.1 Induction First Line Name

Check if the induction 1L has the same name.

t_sameinduction <-  d_lines_induction %>%
  rename(`1L induction` = same_inductionC) %>%
  group_by(
    `1L induction`
  ) %>%
  summarise(
    n = n()
  ) %>%
  mutate(
    n = paste0(n," (",round(100*n/sum(n)),"%)")
  )

3.1.1 Number with the same 1L name

t_sameinduction %>% knitr::kable(
  caption = "Number with the same 1L"
)
Number with the same 1L
1L induction n
Different 1L induction 14 (14%)
Same 1L induction 86 (86%)

3.1.2 Inter-rater reliability using Fleiss’ Kappa

Method 1 = kappam.fleiss function Method 2 = ckap function

# Check for inter-rater reliability of induction line name
induction_1L <- d_lines_induction[, c("selectdrugs_induct_w", "selectdrugs_induct_b")]
result <- kappam.fleiss(
  induction_1L
  )

result
##  Fleiss' Kappa for m Raters
## 
##  Subjects = 100 
##    Raters = 2 
##     Kappa = 0.828 
## 
##         z = 18.4 
##   p-value = 0

Without needing to look at the p-value, the Kappa statistic is 0.8281049 using the kappam.fleiss function, suggesting a high level of agreement between the two raters.

result <- ckap(
  data = induction_1L, weight = c("unweighted"),
  std.err = c("Fleiss"), conf.level = 0.95, R = 0
  )

result
## Call:
## ckap(data = induction_1L, weight = c("unweighted"), std.err = c("Fleiss"), 
##     conf.level = 0.95, R = 0)
## 
##       Estimate   StdErr  LowerCB UpperCB
## Const 0.828242 0.041323 0.746248  0.9102
## 
## Maximum kappa = 0.89
## Kappa/maximum kappa = 0.93
## Confidence level = 95%
## Observations = 2
## Sample size = 100

This is repeated using ckap, where the Kappa statistic is 0.8282419 with a 0.95% confidence interval of 0.746 to 0.91.

3.2 Induction Number of Lines

t_same_numlines <- d_lines_induction %>%
  group_by(
    same_numLinesC
  ) %>%
  summarise(
    n = n()
  ) %>%
  mutate(
    n = paste0(n," (",round(100*n/sum(n)),"%)")
  )

3.2.1 Number with the same number of total lines

t_same_numlines %>% knitr::kable(
  caption = "Number with the same number of total lines"
)
Number with the same number of total lines
same_numLinesC n
Different number of lines 23 (23%)
Same number of lines 77 (77%)

3.2.2 Inter-rater reliability

total_lines <-d_lines_induction[, c("TotalLines_w", "TotalLines_b")]
result <- kappam.fleiss(
  total_lines
  )

result
##  Fleiss' Kappa for m Raters
## 
##  Subjects = 100 
##    Raters = 2 
##     Kappa = 0.628 
## 
##         z = 9.81 
##   p-value = 0

Without needing to look at the p-value, the Kappa statistic is 0.6282528 using the kappam.fleiss function, suggesting a high level of agreement between the two raters.

result <- ckap(
  data = total_lines, weight = c("unweighted"),
  std.err = c("Fleiss"), conf.level = 0.95, R = 0
  )

result
## Call:
## ckap(data = total_lines, weight = c("unweighted"), std.err = c("Fleiss"), 
##     conf.level = 0.95, R = 0)
## 
##       Estimate   StdErr  LowerCB UpperCB
## Const 0.629689 0.060728 0.509191  0.7502
## 
## Maximum kappa = 0.86
## Kappa/maximum kappa = 0.74
## Confidence level = 95%
## Observations = 2
## Sample size = 100

This is repeated using ckap, where the Kappa statistic is 0.6296893 with a 0.95% confidence interval of 0.509 to 0.75.

3.3 Timing of lines

Among those with same 1L induction line name:

# Filter to those with same 1L induction line name
induction_data <- d_lines_induction %>%
  filter(same_induction==1) 

3.3.1 First line start

Difference in days between the two raters in the nrow(induction_data) patients with the same 1L.

jb_getstats <- function(x, round = 2){
  temp <- t.test(x, conf.level=0.95)
  mean <- paste0(
    round(temp$estimate,round),
    " (95%CI ",paste(round(temp$conf.int,round), collapse = ", "),")"
  )
  
  median = paste0(
    round(median(x),round)," (IQR ",
    round(quantile(x, probs = 0.25),round),", ",
    round(quantile(x, probs = 0.75),round),")"
  )
  
  range = range(x)
  
  return(
    list(
      mean = mean,
      median = median,
      range = range
    )
  )
}

result <- jb_getstats(induction_data$start_date_diff)
  • Mean: -14.09 (95%CI -39.4, 11.21)
  • Median: 0 (IQR 0, 0)
  • Range: -1095, 8

Histogram of difference in days between 1L start.

ggplot(
  induction_data,
  mapping=aes(start_date_diff)) +
  labs(
    title= "Difference in start day of 1L between PDCO coders",
    subtitle = paste0(
      "In the ",
      nrow(induction_data),
      " with the same 1L"),
    y = "Count",
    x = "Difference in 1L start (Days)"
    ) +
  geom_histogram(binwidth = 10) +
  theme_classic()

OUTLIER # F86D752B756B6: Coder W seems to have copied the wrong line start and end from the abstracted data.

 

3.3.2 1L Induction Start Date, ICC (TBD)

Only done for duration at the moment.

#Check for ICC

3.4 1L Induction duration

Among those with same 1L induction line name:

duration_1L <-induction_data[, c("duration_b", "duration_w")] 

3.4.1 1L Induction duration, ICC

induction_data %>%
  # reformat data for function
  select(
    duration_b, duration_w
  ) %>% 
  # run function
  irr::icc()
##  Single Score Intraclass Correlation
## 
##    Model: oneway 
##    Type : consistency 
## 
##    Subjects = 86 
##      Raters = 2 
##      ICC(1) = 0.962
## 
##  F-Test, H0: r0 = 0 ; H1: r0 > 0 
##    F(85,86) = 52 , p = 1.3e-50 
## 
##  95%-Confidence Interval for ICC Population Values:
##   0.943 < ICC < 0.975

3.4.2 1L Induction duration, Blant-Altman Plot

# temp not used currently - move to ggplot2 later?
#temp <- bland.altman.stats(induction_data$duration_b, induction_data$duration_w)

bland.altman.plot(
  induction_data$duration_b, 
  induction_data$duration_w, 
  main="Bland Altman Plot for Difference in 1L Duration",  
  xlab="Mean Duration", 
  ylab="Differences in Days",
  conf.int=.95
  )

## NULL

3.4.3 1L Induction duration, Histogram

result <- jb_getstats(induction_data$duration_diff)
  • Mean: -0.7 (95%CI -17.9, 16.5)
  • Median: 0 (IQR 0, 0)
  • Range: -365, 525
ggplot(
  d_lines_induction %>% filter(same_induction == 1),
  mapping=aes(duration_diff)) +
  labs(
    title= "Difference in duration of 1L between PDCO coders",
    subtitle = paste0(
      "In the ",
      nrow(induction_data),
      " with the same 1L"),
    y = "Count",
    x = "Difference in duration (Days)"
    ) +
  geom_histogram(binwidth = 10) +
  theme_classic()

OUTLIER # F15146C46C327: Coders agree on startdate, but coder W thinks ends >1 year later than coder B

OUTLIER # F5F02439862C4: Coders agree on startdate, but coder B thinks ends >1 year later than coder W

OUTLIER # F941EA1E93C3F: Coder B assigned (-) followup time.

 

4 All treatment sequence

Among the all patients, check how many have the same line names across all lines. Coders have coded up to 6 lines.

for(i in 1:6) {
  for(j in c("byonj", "wassnere")){
  assign(paste("d_lines_induction_", i, "_", j, sep=""), 
         subset(d_lines %>% ungroup, LineNumber== i & coder ==j)
         ) 
    }
}

d_lines_induction_new <-
left_join(d_lines_induction_1_byonj %>% 
              select(PatientID, induct_1_byonj = selectdrugs_induct), 
          d_lines_induction_1_wassnere %>%
              select(PatientID, induct_1_wassnere = selectdrugs_induct), 
          by='PatientID')  %>%
left_join(., 
          d_lines_induction_2_byonj %>%
              select(PatientID, induct_2_byonj = selectdrugs_induct),
          by='PatientID') %>%
left_join(., 
          d_lines_induction_2_wassnere %>%
              select(PatientID, induct_2_wassnere = selectdrugs_induct),
          by='PatientID') %>%
left_join(., 
          d_lines_induction_3_byonj %>%
              select(PatientID, induct_3_byonj = selectdrugs_induct),
          by='PatientID') %>%
left_join(., 
          d_lines_induction_3_wassnere %>%
              select(PatientID, induct_3_wassnere = selectdrugs_induct),
          by='PatientID') %>%
left_join(., 
          d_lines_induction_4_byonj %>%
              select(PatientID, induct_4_byonj = selectdrugs_induct),
          by='PatientID') %>%
left_join(., 
          d_lines_induction_4_wassnere %>%
              select(PatientID, induct_4_wassnere = selectdrugs_induct),
          by='PatientID') %>%
left_join(., 
          d_lines_induction_5_byonj %>%
              select(PatientID, induct_5_byonj = selectdrugs_induct),
          by='PatientID') %>%
left_join(., 
          d_lines_induction_5_wassnere %>%
              select(PatientID, induct_5_wassnere = selectdrugs_induct),
          by='PatientID') %>%
left_join(., 
          d_lines_induction_6_byonj %>%
              select(PatientID, induct_6_byonj = selectdrugs_induct),
          by='PatientID') %>%
left_join(., 
          d_lines_induction_6_wassnere %>%
              select(PatientID, induct_6_wassnere = selectdrugs_induct),
          by='PatientID') %>%
  
  mutate(
    same_induction_1L=ifelse(induct_1_byonj==induct_1_wassnere, 1, 0),
    same_induction_2L=ifelse(induct_2_byonj==induct_2_wassnere|(is.na(induct_2_byonj) &is.na(induct_2_wassnere)), 1, 0),
    same_induction_3L=ifelse(induct_3_byonj==induct_3_wassnere|(is.na(induct_3_byonj) &is.na(induct_3_wassnere)), 1, 0),
    same_induction_4L=ifelse(induct_4_byonj==induct_4_wassnere|(is.na(induct_4_byonj) &is.na(induct_4_wassnere)), 1, 0),
    same_induction_5L=ifelse(induct_5_byonj==induct_5_wassnere|(is.na(induct_5_byonj) &is.na(induct_5_wassnere)), 1, 0),
    same_induction_6L=ifelse(induct_6_byonj==induct_6_wassnere|(is.na(induct_6_byonj) &is.na(induct_6_wassnere)), 1, 0),
  
    all_same_induction=ifelse((same_induction_1L==0|is.na(same_induction_1L))
       |(same_induction_2L==0|is.na(same_induction_2L))
       |(same_induction_3L==0|is.na(same_induction_3L))
       |(same_induction_4L==0|is.na(same_induction_4L))
       |(same_induction_5L==0|is.na(same_induction_5L))
       |(same_induction_6L==0|is.na(same_induction_6L)), "Different induction", "Same induction for all sequence")
)
all_same_induction <-  d_lines_induction_new %>%
  rename(`All induction` = all_same_induction) %>%
  group_by(
    `All induction`
  ) %>%
  summarise(
    n = n()
  ) %>%
  mutate(
    n = paste0(n," (",round(100*n/sum(n)),"%)")
  )

4.1 Number with the same induction line across all line numbers

all_same_induction %>% knitr::kable(
  caption = "Number with the same 1L"
)
Number with the same 1L
All induction n
Different induction 31 (31%)
Same induction for all sequence 69 (69%)

 

5 Version

RWDShelpers::gitStatus()
Git details
Git hash is: bc5dc92
Currently in the branch: master
Your branch is up-to-date with ‘origin/master’.
The remote is at: git@github.roche.com:RWDS/rwds_1052.git
