# ~~~~~~~~~~~~~~~~~~~~~~~~~~
# BMJ Statistics Notes
# Data from log-rank
# article
# ~~~~~~~~~~~~~~~~~~~~~~~~~~
# Bland, J. M., & Altman, D. G. (2004).
# The logrank test. Bmj, 328(7447), 1073.
# PDF link: https://www.bmj.com/content/bmj/328/7447/1073.full.pdf
# Here is the dataset from the article along
# with code to replicate analysis shown in article.
# Data Set: bmj_logrank
# Data Dictionary:
# (1) weeks time to death or censoring in weeks
# (2) event censoring indicator (0=censor; 1=death)
# (3) group A = astrocytoma; G = Glioblastoma
## load the data file = bmj_km
load(url("http://www.duke.edu/~sgrambow/crp241data/bmj_logrank.RData"))
# Inspect and summarize the data
# Inspect the raw data
View(bmj_logrank)
# Examine structure of the data
str(bmj_logrank)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 51 obs. of 3 variables:
## $ weeks: num 6 13 21 30 31 37 38 47 49 50 ...
## $ event: num 1 1 1 1 0 1 1 0 1 1 ...
## $ group: chr "A" "A" "A" "A" ...
# weeks and event are numeric
# group is character
# numerical summary of the data
summary(bmj_logrank)
## weeks event group
## Min. : 6.00 Min. :0.0000 Length:51
## 1st Qu.: 22.50 1st Qu.:1.0000 Class :character
## Median : 38.00 Median :1.0000 Mode :character
## Mean : 54.59 Mean :0.8235
## 3rd Qu.: 79.50 3rd Qu.:1.0000
## Max. :219.00 Max. :1.0000
# median of weeks variable = 36
# 82% of subject experienced event of death
# obtain KM table and plot
# Obtain survival estimates using survfit() function
library(survival)
fit.logrank <- survfit(Surv(weeks, event) ~group, data=bmj_logrank)
# - Print table of survival estimates
# -- Default output only shows UNIQUE EVENT times
summary(fit.logrank)
## Call: survfit(formula = Surv(weeks, event) ~ group, data = bmj_logrank)
##
## group=A
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 6 20 1 0.950 0.0487 0.859 1.000
## 13 19 1 0.900 0.0671 0.778 1.000
## 21 18 1 0.850 0.0798 0.707 1.000
## 30 17 1 0.800 0.0894 0.643 0.996
## 37 15 1 0.747 0.0981 0.577 0.966
## 38 14 1 0.693 0.1046 0.516 0.932
## 49 12 1 0.636 0.1107 0.452 0.894
## 50 11 1 0.578 0.1147 0.392 0.853
## 63 10 1 0.520 0.1169 0.335 0.808
## 79 9 1 0.462 0.1173 0.281 0.760
## 86 5 1 0.370 0.1251 0.191 0.718
## 98 4 1 0.277 0.1233 0.116 0.663
## 202 2 1 0.139 0.1158 0.027 0.713
## 219 1 1 0.000 NaN NA NA
##
## group=G
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 10 31 2 0.9355 0.0441 0.85288 1.000
## 12 29 1 0.9032 0.0531 0.80492 1.000
## 13 28 1 0.8710 0.0602 0.76060 0.997
## 14 27 1 0.8387 0.0661 0.71874 0.979
## 15 26 1 0.8065 0.0710 0.67871 0.958
## 16 25 1 0.7742 0.0751 0.64015 0.936
## 17 24 1 0.7419 0.0786 0.60284 0.913
## 18 23 1 0.7097 0.0815 0.56660 0.889
## 20 22 1 0.6774 0.0840 0.53132 0.864
## 24 21 2 0.6129 0.0875 0.46333 0.811
## 25 19 1 0.5806 0.0886 0.43051 0.783
## 28 18 1 0.5484 0.0894 0.39843 0.755
## 30 17 1 0.5161 0.0898 0.36706 0.726
## 33 16 1 0.4839 0.0898 0.33638 0.696
## 35 14 1 0.4493 0.0898 0.30375 0.665
## 37 13 1 0.4147 0.0893 0.27202 0.632
## 40 12 2 0.3456 0.0867 0.21134 0.565
## 46 9 1 0.3072 0.0852 0.17842 0.529
## 48 8 1 0.2688 0.0827 0.14705 0.491
## 76 6 1 0.2240 0.0802 0.11109 0.452
## 81 5 1 0.1792 0.0756 0.07838 0.410
## 82 4 1 0.1344 0.0687 0.04934 0.366
## 91 3 1 0.0896 0.0586 0.02486 0.323
## 112 2 1 0.0448 0.0432 0.00678 0.296
## 181 1 1 0.0000 NaN NA NA
# -- Have to ask R to show censored event times
summary(fit.logrank,censor=TRUE)
## Call: survfit(formula = Surv(weeks, event) ~ group, data = bmj_logrank)
##
## group=A
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 6 20 1 0.950 0.0487 0.859 1.000
## 13 19 1 0.900 0.0671 0.778 1.000
## 21 18 1 0.850 0.0798 0.707 1.000
## 30 17 1 0.800 0.0894 0.643 0.996
## 31 16 0 0.800 0.0894 0.643 0.996
## 37 15 1 0.747 0.0981 0.577 0.966
## 38 14 1 0.693 0.1046 0.516 0.932
## 47 13 0 0.693 0.1046 0.516 0.932
## 49 12 1 0.636 0.1107 0.452 0.894
## 50 11 1 0.578 0.1147 0.392 0.853
## 63 10 1 0.520 0.1169 0.335 0.808
## 79 9 1 0.462 0.1173 0.281 0.760
## 80 8 0 0.462 0.1173 0.281 0.760
## 82 7 0 0.462 0.1173 0.281 0.760
## 86 5 1 0.370 0.1251 0.191 0.718
## 98 4 1 0.277 0.1233 0.116 0.663
## 149 3 0 0.277 0.1233 0.116 0.663
## 202 2 1 0.139 0.1158 0.027 0.713
## 219 1 1 0.000 NaN NA NA
##
## group=G
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 10 31 2 0.9355 0.0441 0.85288 1.000
## 12 29 1 0.9032 0.0531 0.80492 1.000
## 13 28 1 0.8710 0.0602 0.76060 0.997
## 14 27 1 0.8387 0.0661 0.71874 0.979
## 15 26 1 0.8065 0.0710 0.67871 0.958
## 16 25 1 0.7742 0.0751 0.64015 0.936
## 17 24 1 0.7419 0.0786 0.60284 0.913
## 18 23 1 0.7097 0.0815 0.56660 0.889
## 20 22 1 0.6774 0.0840 0.53132 0.864
## 24 21 2 0.6129 0.0875 0.46333 0.811
## 25 19 1 0.5806 0.0886 0.43051 0.783
## 28 18 1 0.5484 0.0894 0.39843 0.755
## 30 17 1 0.5161 0.0898 0.36706 0.726
## 33 16 1 0.4839 0.0898 0.33638 0.696
## 34 15 0 0.4839 0.0898 0.33638 0.696
## 35 14 1 0.4493 0.0898 0.30375 0.665
## 37 13 1 0.4147 0.0893 0.27202 0.632
## 40 12 2 0.3456 0.0867 0.21134 0.565
## 46 9 1 0.3072 0.0852 0.17842 0.529
## 48 8 1 0.2688 0.0827 0.14705 0.491
## 70 7 0 0.2688 0.0827 0.14705 0.491
## 76 6 1 0.2240 0.0802 0.11109 0.452
## 81 5 1 0.1792 0.0756 0.07838 0.410
## 82 4 1 0.1344 0.0687 0.04934 0.366
## 91 3 1 0.0896 0.0586 0.02486 0.323
## 112 2 1 0.0448 0.0432 0.00678 0.296
## 181 1 1 0.0000 NaN NA NA
# generates median and 95% CI for median
summary(fit.logrank)$table
## records n.max n.start events *rmean *se(rmean) median 0.95LCL 0.95UCL
## group=A 20 20 20 14 93.90089 17.786893 79 49 NA
## group=G 31 31 31 28 46.74322 7.731678 33 24 48
# Create the Kaplan Meier plot that estimates the true
# survival curve. see this link for info on plot options:
# https://cran.r-project.org/web/packages/survminer/vignettes/Informative_Survival_Plots.html
# https://www.r-bloggers.com/survival-analysis-basics/
# KM plot
library(survminer)
## Warning: package 'survminer' was built under R version 4.0.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.3
## Loading required package: ggpubr
ggsurvplot(fit.logrank, data = bmj_logrank,
risk.table = TRUE,
surv.median.line = "hv",
title="Survival curve showing probability of death ",
xlab = "time in weeks")
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.

# Compare survival curves by group using log-rank test
survdiff(Surv(weeks, event) ~ group, data=bmj_logrank)
## Call:
## survdiff(formula = Surv(weeks, event) ~ group, data = bmj_logrank)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## group=A 20 14 22.5 3.20 7.5
## group=G 31 28 19.5 3.69 7.5
##
## Chisq= 7.5 on 1 degrees of freedom, p= 0.006
# the output provides the numbers from the article
# in terms of expected events, test statistic, and
# p-value.
# Note there are some small discrepancies in numbers,
# likely due to roundoff.