library(broom)
## Warning: package 'broom' was built under R version 3.2.3
library(survival)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
leukemia.surv <- survfit(Surv(time, status) ~ x, data = aml)
td <- tidy(leukemia.surv)
td
## time n.risk n.event n.censor estimate std.error conf.high conf.low
## 1 9 11 1 0 0.90909091 0.09534626 1.0000000 0.75413385
## 2 13 10 1 1 0.81818182 0.14213381 1.0000000 0.61924899
## 3 18 8 1 0 0.71590909 0.19508758 1.0000000 0.48842629
## 4 23 7 1 0 0.61363636 0.24873417 0.9991576 0.37686706
## 5 28 6 0 1 0.61363636 0.24873417 0.9991576 0.37686706
## 6 31 5 1 0 0.49090909 0.33446777 0.9455850 0.25485995
## 7 34 4 1 0 0.36818182 0.44181673 0.8752607 0.15487712
## 8 45 3 0 1 0.36818182 0.44181673 0.8752607 0.15487712
## 9 48 2 1 0 0.18409091 0.83378775 0.9435258 0.03591790
## 10 161 1 0 1 0.18409091 0.83378775 0.9435258 0.03591790
## 11 5 12 2 0 0.83333333 0.12909944 1.0000000 0.64703699
## 12 8 10 2 0 0.66666667 0.20412415 0.9946254 0.44684608
## 13 12 8 1 0 0.58333333 0.24397502 0.9409980 0.36161371
## 14 16 7 0 1 0.58333333 0.24397502 0.9409980 0.36161371
## 15 23 6 1 0 0.48611111 0.30472470 0.8833192 0.26751825
## 16 27 5 1 0 0.38888889 0.37796447 0.8157357 0.18539653
## 17 30 4 1 0 0.29166667 0.47559487 0.7408220 0.11483115
## 18 33 3 1 0 0.19444444 0.62678317 0.6642237 0.05692155
## 19 43 2 1 0 0.09722222 0.94491118 0.6195486 0.01525653
## 20 45 1 1 0 0.00000000 Inf NA NA
## strata
## 1 x=Maintained
## 2 x=Maintained
## 3 x=Maintained
## 4 x=Maintained
## 5 x=Maintained
## 6 x=Maintained
## 7 x=Maintained
## 8 x=Maintained
## 9 x=Maintained
## 10 x=Maintained
## 11 x=Nonmaintained
## 12 x=Nonmaintained
## 13 x=Nonmaintained
## 14 x=Nonmaintained
## 15 x=Nonmaintained
## 16 x=Nonmaintained
## 17 x=Nonmaintained
## 18 x=Nonmaintained
## 19 x=Nonmaintained
## 20 x=Nonmaintained
ggplot(td, aes(time, estimate, color = strata)) +
geom_line()
Now I rearrange the data like the example and perform the estimate:
processed <- td %>%
select(t = time, n = n.risk, d = n.event, onoff = strata, survival_estimate = estimate) %>%
group_by(onoff) %>%
arrange(t) %>%
mutate(our_estimate = cumprod((n - d) / n))
And the calculations match the survival package:
ggplot(processed, aes(survival_estimate, our_estimate)) +
geom_point() +
geom_abline(color = "red")