Setup
# check where I am
if (basename(getwd()) == "programs"){
path = "../"
} else {
path = ""
}
library(tidyverse)
library(RocheRWDS)
library(dplyr)
library(DT)
library(tableone)
library(survival)
library(survminer)
library(DiagrammeR)
library(irr)
library(ICC)
library(raters)
library(rel)
library(psych)
library(BlandAltmanLeh)
Get data
d_lines <- readr::read_csv(
paste0(path,"import/lines.csv"),
col_types = cols(
PatientID = col_character(),
start_induct = col_date(format = ""),
end_induct = col_date(format = ""),
selectdrugs_induct = col_character(),
Transplant = col_date(format = ""),
start_cons = col_date(format = ""),
end_cons = col_date(format = ""),
selectdrugs_cons = col_character(),
start_maint = col_date(format = ""),
end_maint = col_date(format = ""),
selectdrugs_maint = col_character(),
coder = col_character(),
line_notes = col_character()
)
)
d_lines %>%
summarise(
Rows = n(),
Patients = n_distinct(PatientID),
EarliestLineStart = min(start_induct)
) %>%
knitr::kable(
caption = "Patient count on lines table"
)
Patient count on lines table
| 357 |
100 |
2011-02-01 |
Clean data
# add in line count
d_lines <- d_lines %>%
group_by(PatientID,coder) %>%
arrange(PatientID,start_induct) %>%
mutate(
LineNumber = row_number(),
TotalLines = n()
)
d_lines %>%
head(3) %>%
knitr::kable(
caption = "First 3 rows of the line table"
)
First 3 rows of the line table
| F00E4B82A2B05 |
2016-08-15 |
2016-11-14 |
bortezomib, dexamethasone, lenalidomide |
NA |
2017-10-15 |
2017-10-15 |
No consolidation |
2017-10-15 |
2017-10-15 |
No maintenance |
wassnere |
Add notes here |
1 |
1 |
| F00E4B82A2B05 |
2016-08-15 |
2016-09-25 |
bortezomib, dexamethasone |
NA |
2017-11-08 |
2017-11-08 |
No consolidation |
2017-11-08 |
2017-11-08 |
No maintenance |
byonj |
Add notes here |
1 |
2 |
| F00E4B82A2B05 |
2016-09-26 |
2017-06-22 |
bortezomib, dexamethasone, lenalidomide |
NA |
2017-11-08 |
2017-11-08 |
No consolidation |
2017-11-08 |
2017-11-08 |
No maintenance |
byonj |
Add notes here |
2 |
2 |
Both PDCO colleagues identified an issue with FE0CCE4BF9D2B:
I ran into an issue on a pat. with double transplant: when recording the second transplant induction came up again in the record (Dble record for hte indcution regimen for Pat. )
This patient appears to have received a double auto transplant. Unable to code both transplants in the same line of therapy, but both transplants should be considered as part of the same line of therapy.
How to embed transplants in lines is a discussion point for Flatiron.
Summarise
d_lines %>%
group_by(coder) %>%
summarise(
`Line table` = n_distinct(PatientID)
) %>%
knitr::kable(
caption = "100 in both line table"
)
100 in both line table
| byonj |
100 |
| wassnere |
100 |
Induction Analysis
# take just first line
d_lines_induction <- d_lines %>%
filter(LineNumber == 1) %>% ungroup
# put side by side
d_lines_induction <- left_join(
d_lines_induction %>%
filter(coder == "wassnere") %>%
select(
PatientID,
start_induct_w = start_induct,
end_induct_w = end_induct,
selectdrugs_induct_w = selectdrugs_induct,
TotalLines_w=TotalLines
),
d_lines_induction %>%
filter(coder == "byonj") %>%
select(
PatientID,
start_induct_b = start_induct,
end_induct_b = end_induct,
selectdrugs_induct_b = selectdrugs_induct,
TotalLines_b=TotalLines
),
by = "PatientID"
) %>%
# check if lines are the same
mutate(
duration_b = as.numeric(end_induct_b - start_induct_b),
duration_w = as.numeric(end_induct_w - start_induct_w),
# check if same
same_induction = ifelse(selectdrugs_induct_b == selectdrugs_induct_w,1,0),
# pretty same
same_inductionC = ifelse(same_induction == 1,"Same 1L induction","Different 1L induction"),
start_date_diff = as.numeric(start_induct_b-start_induct_w),
duration_diff = duration_b - duration_w,
#check if same number of total lines
same_numlines = ifelse(TotalLines_b == TotalLines_w, 1, 0),
same_numLinesC = ifelse(same_numlines == 1, "Same number of lines", "Different number of lines")
)
Induction First Line Name
Check if the induction 1L has the same name.
t_sameinduction <- d_lines_induction %>%
rename(`1L induction` = same_inductionC) %>%
group_by(
`1L induction`
) %>%
summarise(
n = n()
) %>%
mutate(
n = paste0(n," (",round(100*n/sum(n)),"%)")
)
Number with the same 1L name
t_sameinduction %>% knitr::kable(
caption = "Number with the same 1L"
)
Number with the same 1L
| Different 1L induction |
14 (14%) |
| Same 1L induction |
86 (86%) |
Inter-rater reliability using Fleiss’ Kappa
Method 1 = kappam.fleiss function Method 2 = ckap function
# Check for inter-rater reliability of induction line name
induction_1L <- d_lines_induction[, c("selectdrugs_induct_w", "selectdrugs_induct_b")]
result <- kappam.fleiss(
induction_1L
)
result
## Fleiss' Kappa for m Raters
##
## Subjects = 100
## Raters = 2
## Kappa = 0.828
##
## z = 18.4
## p-value = 0
Without needing to look at the p-value, the Kappa statistic is 0.8281049 using the kappam.fleiss function, suggesting a high level of agreement between the two raters.
result <- ckap(
data = induction_1L, weight = c("unweighted"),
std.err = c("Fleiss"), conf.level = 0.95, R = 0
)
result
## Call:
## ckap(data = induction_1L, weight = c("unweighted"), std.err = c("Fleiss"),
## conf.level = 0.95, R = 0)
##
## Estimate StdErr LowerCB UpperCB
## Const 0.828242 0.041323 0.746248 0.9102
##
## Maximum kappa = 0.89
## Kappa/maximum kappa = 0.93
## Confidence level = 95%
## Observations = 2
## Sample size = 100
This is repeated using ckap, where the Kappa statistic is 0.8282419 with a 0.95% confidence interval of 0.746 to 0.91.
Induction Number of Lines
t_same_numlines <- d_lines_induction %>%
group_by(
same_numLinesC
) %>%
summarise(
n = n()
) %>%
mutate(
n = paste0(n," (",round(100*n/sum(n)),"%)")
)
Number with the same number of total lines
t_same_numlines %>% knitr::kable(
caption = "Number with the same number of total lines"
)
Number with the same number of total lines
| Different number of lines |
23 (23%) |
| Same number of lines |
77 (77%) |
Inter-rater reliability
total_lines <-d_lines_induction[, c("TotalLines_w", "TotalLines_b")]
result <- kappam.fleiss(
total_lines
)
result
## Fleiss' Kappa for m Raters
##
## Subjects = 100
## Raters = 2
## Kappa = 0.628
##
## z = 9.81
## p-value = 0
Without needing to look at the p-value, the Kappa statistic is 0.6282528 using the kappam.fleiss function, suggesting a high level of agreement between the two raters.
result <- ckap(
data = total_lines, weight = c("unweighted"),
std.err = c("Fleiss"), conf.level = 0.95, R = 0
)
result
## Call:
## ckap(data = total_lines, weight = c("unweighted"), std.err = c("Fleiss"),
## conf.level = 0.95, R = 0)
##
## Estimate StdErr LowerCB UpperCB
## Const 0.629689 0.060728 0.509191 0.7502
##
## Maximum kappa = 0.86
## Kappa/maximum kappa = 0.74
## Confidence level = 95%
## Observations = 2
## Sample size = 100
This is repeated using ckap, where the Kappa statistic is 0.6296893 with a 0.95% confidence interval of 0.509 to 0.75.
Timing of lines
Among those with same 1L induction line name:
# Filter to those with same 1L induction line name
induction_data <- d_lines_induction %>%
filter(same_induction==1)
First line start
Difference in days between the two raters in the nrow(induction_data) patients with the same 1L.
jb_getstats <- function(x, round = 2){
temp <- t.test(x, conf.level=0.95)
mean <- paste0(
round(temp$estimate,round),
" (95%CI ",paste(round(temp$conf.int,round), collapse = ", "),")"
)
median = paste0(
round(median(x),round)," (IQR ",
round(quantile(x, probs = 0.25),round),", ",
round(quantile(x, probs = 0.75),round),")"
)
range = range(x)
return(
list(
mean = mean,
median = median,
range = range
)
)
}
result <- jb_getstats(induction_data$start_date_diff)
- Mean: -14.09 (95%CI -39.4, 11.21)
- Median: 0 (IQR 0, 0)
- Range: -1095, 8
Histogram of difference in days between 1L start.
ggplot(
induction_data,
mapping=aes(start_date_diff)) +
labs(
title= "Difference in start day of 1L between PDCO coders",
subtitle = paste0(
"In the ",
nrow(induction_data),
" with the same 1L"),
y = "Count",
x = "Difference in 1L start (Days)"
) +
geom_histogram(binwidth = 10) +
theme_classic()
OUTLIER # F86D752B756B6: Coder W seems to have copied the wrong line start and end from the abstracted data.
1L Induction Start Date, ICC (TBD)
Only done for duration at the moment.
#Check for ICC
1L Induction duration
Among those with same 1L induction line name:
duration_1L <-induction_data[, c("duration_b", "duration_w")]
1L Induction duration, ICC
induction_data %>%
# reformat data for function
select(
duration_b, duration_w
) %>%
# run function
irr::icc()
## Single Score Intraclass Correlation
##
## Model: oneway
## Type : consistency
##
## Subjects = 86
## Raters = 2
## ICC(1) = 0.962
##
## F-Test, H0: r0 = 0 ; H1: r0 > 0
## F(85,86) = 52 , p = 1.3e-50
##
## 95%-Confidence Interval for ICC Population Values:
## 0.943 < ICC < 0.975
1L Induction duration, Blant-Altman Plot
# temp not used currently - move to ggplot2 later?
#temp <- bland.altman.stats(induction_data$duration_b, induction_data$duration_w)
bland.altman.plot(
induction_data$duration_b,
induction_data$duration_w,
main="Bland Altman Plot for Difference in 1L Duration",
xlab="Mean Duration",
ylab="Differences in Days",
conf.int=.95
)

## NULL
1L Induction duration, Histogram
result <- jb_getstats(induction_data$duration_diff)
- Mean: -0.7 (95%CI -17.9, 16.5)
- Median: 0 (IQR 0, 0)
- Range: -365, 525
ggplot(
d_lines_induction %>% filter(same_induction == 1),
mapping=aes(duration_diff)) +
labs(
title= "Difference in duration of 1L between PDCO coders",
subtitle = paste0(
"In the ",
nrow(induction_data),
" with the same 1L"),
y = "Count",
x = "Difference in duration (Days)"
) +
geom_histogram(binwidth = 10) +
theme_classic()
OUTLIER # F15146C46C327: Coders agree on startdate, but coder W thinks ends >1 year later than coder B
OUTLIER # F5F02439862C4: Coders agree on startdate, but coder B thinks ends >1 year later than coder W
OUTLIER # F941EA1E93C3F: Coder B assigned (-) followup time.
All treatment sequence
Among the all patients, check how many have the same line names across all lines. Coders have coded up to 6 lines.
for(i in 1:6) {
for(j in c("byonj", "wassnere")){
assign(paste("d_lines_induction_", i, "_", j, sep=""),
subset(d_lines %>% ungroup, LineNumber== i & coder ==j)
)
}
}
d_lines_induction_new <-
left_join(d_lines_induction_1_byonj %>%
select(PatientID, induct_1_byonj = selectdrugs_induct),
d_lines_induction_1_wassnere %>%
select(PatientID, induct_1_wassnere = selectdrugs_induct),
by='PatientID') %>%
left_join(.,
d_lines_induction_2_byonj %>%
select(PatientID, induct_2_byonj = selectdrugs_induct),
by='PatientID') %>%
left_join(.,
d_lines_induction_2_wassnere %>%
select(PatientID, induct_2_wassnere = selectdrugs_induct),
by='PatientID') %>%
left_join(.,
d_lines_induction_3_byonj %>%
select(PatientID, induct_3_byonj = selectdrugs_induct),
by='PatientID') %>%
left_join(.,
d_lines_induction_3_wassnere %>%
select(PatientID, induct_3_wassnere = selectdrugs_induct),
by='PatientID') %>%
left_join(.,
d_lines_induction_4_byonj %>%
select(PatientID, induct_4_byonj = selectdrugs_induct),
by='PatientID') %>%
left_join(.,
d_lines_induction_4_wassnere %>%
select(PatientID, induct_4_wassnere = selectdrugs_induct),
by='PatientID') %>%
left_join(.,
d_lines_induction_5_byonj %>%
select(PatientID, induct_5_byonj = selectdrugs_induct),
by='PatientID') %>%
left_join(.,
d_lines_induction_5_wassnere %>%
select(PatientID, induct_5_wassnere = selectdrugs_induct),
by='PatientID') %>%
left_join(.,
d_lines_induction_6_byonj %>%
select(PatientID, induct_6_byonj = selectdrugs_induct),
by='PatientID') %>%
left_join(.,
d_lines_induction_6_wassnere %>%
select(PatientID, induct_6_wassnere = selectdrugs_induct),
by='PatientID') %>%
mutate(
same_induction_1L=ifelse(induct_1_byonj==induct_1_wassnere, 1, 0),
same_induction_2L=ifelse(induct_2_byonj==induct_2_wassnere|(is.na(induct_2_byonj) &is.na(induct_2_wassnere)), 1, 0),
same_induction_3L=ifelse(induct_3_byonj==induct_3_wassnere|(is.na(induct_3_byonj) &is.na(induct_3_wassnere)), 1, 0),
same_induction_4L=ifelse(induct_4_byonj==induct_4_wassnere|(is.na(induct_4_byonj) &is.na(induct_4_wassnere)), 1, 0),
same_induction_5L=ifelse(induct_5_byonj==induct_5_wassnere|(is.na(induct_5_byonj) &is.na(induct_5_wassnere)), 1, 0),
same_induction_6L=ifelse(induct_6_byonj==induct_6_wassnere|(is.na(induct_6_byonj) &is.na(induct_6_wassnere)), 1, 0),
all_same_induction=ifelse((same_induction_1L==0|is.na(same_induction_1L))
|(same_induction_2L==0|is.na(same_induction_2L))
|(same_induction_3L==0|is.na(same_induction_3L))
|(same_induction_4L==0|is.na(same_induction_4L))
|(same_induction_5L==0|is.na(same_induction_5L))
|(same_induction_6L==0|is.na(same_induction_6L)), "Different induction", "Same induction for all sequence")
)
all_same_induction <- d_lines_induction_new %>%
rename(`All induction` = all_same_induction) %>%
group_by(
`All induction`
) %>%
summarise(
n = n()
) %>%
mutate(
n = paste0(n," (",round(100*n/sum(n)),"%)")
)
Number with the same induction line across all line numbers
all_same_induction %>% knitr::kable(
caption = "Number with the same 1L"
)
Number with the same 1L
| Different induction |
31 (31%) |
| Same induction for all sequence |
69 (69%) |
Version
RWDShelpers::gitStatus()
| Git hash is: bc5dc92 |
| Currently in the branch: master |
| Your branch is up-to-date with ‘origin/master’. |
| The remote is at: git@github.roche.com:RWDS/rwds_1052.git |
