df.3.1 <- read_excel("Table 3.1.xlsx")
## New names:
## * Uncensor -> Uncensor...10
## * Uncensor -> Uncensor...12
head(df.3.1, 10) %>%
flextable() %>%
set_caption("First 10 Observations")
Patient | Age | Gender | Stage-II | Stage-III A | Stage-III B | Stage-IV A | Treatment | Remission Time | Uncensor...10 | Survival Time | Uncensor...12 |
1 | 59 | 0 | 0 | 0 | 1 | 0 | 1 | 33.7 | 0 | 33.7 | 0 |
2 | 50 | 0 | 0 | 0 | 1 | 0 | 1 | 3.8 | 1 | 3.9 | 1 |
3 | 76 | 1 | 0 | 0 | 1 | 0 | 1 | 6.3 | 1 | 10.5 | 1 |
4 | 66 | 0 | 0 | 0 | 1 | 0 | 1 | 2.3 | 1 | 5.4 | 1 |
5 | 33 | 1 | 0 | 0 | 1 | 0 | 1 | 6.4 | 1 | 19.5 | 1 |
6 | 23 | 0 | 0 | 0 | 1 | 0 | 1 | 23.8 | 0 | 23.8 | 0 |
7 | 40 | 0 | 0 | 0 | 1 | 0 | 1 | 1.8 | 1 | 7.9 | 1 |
8 | 34 | 1 | 0 | 0 | 1 | 0 | 1 | 5.5 | 1 | 16.9 | 0 |
9 | 34 | 1 | 0 | 0 | 1 | 0 | 1 | 16.6 | 0 | 16.6 | 0 |
10 | 38 | 0 | 1 | 0 | 0 | 0 | 1 | 33.7 | 0 | 33.7 | 0 |
surv.model <- Surv(df.3.1$`Survival Time`, event = df.3.1$Uncensor...12)
km <- survfit(surv.model ~ Treatment, data = df.3.1)
summary(km)
## Call: survfit(formula = surv.model ~ Treatment, data = df.3.1)
##
## Treatment=0
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 6.9 19 1 0.947 0.0512 0.852 1.000
## 7.7 18 1 0.895 0.0704 0.767 1.000
## 8.0 16 1 0.839 0.0854 0.687 1.000
## 8.3 13 1 0.774 0.1003 0.601 0.998
## 24.4 3 1 0.516 0.2211 0.223 1.000
##
## Treatment=1
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 3.9 11 1 0.909 0.0867 0.754 1.000
## 5.4 10 1 0.818 0.1163 0.619 1.000
## 7.9 9 1 0.727 0.1343 0.506 1.000
## 10.5 8 1 0.636 0.1450 0.407 0.995
## 19.5 4 1 0.477 0.1755 0.232 0.981
The survival times are the same with Table 3.2 in the book.
ggsurvplot(km)
The survival curves are the same as figure 3.1 in the book.
The following equation is used to calculate the variance of \(S(t)\).
\[ Var[\hat{S}(t)] = [\hat{S}(t)]^2 \sum_{r} \frac{1}{(n-r)(n-r + 1)} \]
Where \(n\) is number of observations and \(r\) is number of deaths.
surv.data <- tidy(km)
surv.var <- surv.data %>%
filter(n.event == 1) %>% #Looking only at uncensored observations
group_by(strata) %>%
mutate(S.Var = estimate^2 * cumsum(1/((max(n.risk)-cumsum(n.event))*(max(n.risk)-cumsum(n.event) + 1))))
surv.var %>%
select(time, strata, S.Var) %>%
flextable() %>%
set_caption("Surival Function Variences")
time | strata | S.Var |
6.9 | Treatment=0 | 0.002624289 |
7.7 | Treatment=0 | 0.004956991 |
8.0 | Treatment=0 | 0.006943539 |
8.3 | Treatment=0 | 0.008414419 |
24.4 | Treatment=0 | 0.005008582 |
3.9 | Treatment=1 | 0.007513148 |
5.4 | Treatment=1 | 0.013523666 |
7.9 | Treatment=1 | 0.018031555 |
10.5 | Treatment=1 | 0.021036814 |
19.5 | Treatment=1 | 0.017256762 |
print(km)
## Call: survfit(formula = surv.model ~ Treatment, data = df.3.1)
##
## n events median 0.95LCL 0.95UCL
## Treatment=0 19 5 NA 24.4 NA
## Treatment=1 11 5 19.5 10.5 NA
For treatment 0 the median is undefined since the is still more then 50% of patients alive.
## [1] "Placebo"
## [1] 1 1 2 2 3 4 4 5 5 8 8 8 8 11 11 12 12 15 17 22 23
## [1] "6-MP"
## [1] 6 6 6 7 10 13 16 22 23 6+ 9+ 10+ 11+ 17+ 19+ 20+ 25+ 32+ 32+
## [20] 34+ 35+
km <- survfit(surv.model.3.3 ~ treatment, data = df.3.3)
summary(km)
## Call: survfit(formula = surv.model.3.3 ~ treatment, data = df.3.3)
##
## treatment=6-MP
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 6 21 3 0.857 0.0764 0.720 1.000
## 7 17 1 0.807 0.0869 0.653 0.996
## 10 15 1 0.753 0.0963 0.586 0.968
## 13 12 1 0.690 0.1068 0.510 0.935
## 16 11 1 0.627 0.1141 0.439 0.896
## 22 7 1 0.538 0.1282 0.337 0.858
## 23 6 1 0.448 0.1346 0.249 0.807
##
## treatment=Placebo
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 21 2 0.9048 0.0641 0.78754 1.000
## 2 19 2 0.8095 0.0857 0.65785 0.996
## 3 17 1 0.7619 0.0929 0.59988 0.968
## 4 16 2 0.6667 0.1029 0.49268 0.902
## 5 14 2 0.5714 0.1080 0.39455 0.828
## 8 12 4 0.3810 0.1060 0.22085 0.657
## 11 8 2 0.2857 0.0986 0.14529 0.562
## 12 6 2 0.1905 0.0857 0.07887 0.460
## 15 4 1 0.1429 0.0764 0.05011 0.407
## 17 3 1 0.0952 0.0641 0.02549 0.356
## 22 2 1 0.0476 0.0465 0.00703 0.322
## 23 1 1 0.0000 NaN NA NA
ggsurvplot(km)
surv.data <- tidy(km)
surv.var <- surv.data %>%
filter(n.event == 1) %>% #Looking only at uncensored observations
group_by(strata) %>%
mutate(S.Var = estimate^2 * cumsum(1/((max(n.risk)-cumsum(n.event))*(max(n.risk)-cumsum(n.event) + 1))))
surv.var %>%
select(time, strata, S.Var) %>%
filter(time == 10 | time == 3) %>%
flextable() %>%
set_caption("Surival Function Variences S(10) & S(3)")
time | strata | S.Var |
10 | treatment=6-MP | 0.004446435 |
3 | treatment=Placebo | 0.002134187 |
print(km)
## Call: survfit(formula = surv.model.3.3 ~ treatment, data = df.3.3)
##
## n events median 0.95LCL 0.95UCL
## treatment=6-MP 21 9 23 16 NA
## treatment=Placebo 21 21 8 4 12
df.4.9 <- haven::read_sas("males.sas7bdat")
head(df.4.9) %>%
flextable() %>%
set_caption("First 5 Observations")
Years | Censored | Freq |
0.5 | 0 | 456 |
0.5 | 1 | 0 |
1.5 | 0 | 226 |
1.5 | 1 | 39 |
2.5 | 0 | 152 |
2.5 | 1 | 22 |
This data is different format of survival data. R handles survival data where each row is an event. So we will need to transform the data by replicated the number of rows by the associated rows frequency
duptimes <- df.4.9$Freq
idx <- rep(1:nrow(df.4.9), duptimes)
df.4.9.t <- df.4.9[idx,]
Once that transformation is done survival analysis is the same steps as the previous problems.
surv.model.4.9 <- Surv(df.4.9.t$Years, event = abs(df.4.9.t$Censored-1))
km <- survfit(surv.model.4.9 ~ 1, data = df.4.9.t)
summary(km)
## Call: survfit(formula = surv.model.4.9 ~ 1, data = df.4.9.t)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0.5 2418 456 0.811 0.00796 0.796 0.827
## 1.5 1962 226 0.718 0.00915 0.700 0.736
## 2.5 1697 152 0.654 0.00970 0.635 0.673
## 3.5 1523 171 0.580 0.01011 0.561 0.600
## 4.5 1329 135 0.521 0.01028 0.502 0.542
## 5.5 1170 125 0.466 0.01032 0.446 0.486
## 6.5 938 83 0.424 0.01035 0.405 0.445
## 7.5 722 74 0.381 0.01045 0.361 0.402
## 8.5 546 51 0.345 0.01059 0.325 0.367
## 9.5 427 42 0.311 0.01077 0.291 0.333
## 10.5 321 43 0.270 0.01105 0.249 0.292
## 11.5 233 34 0.230 0.01131 0.209 0.254
## 12.5 146 18 0.202 0.01173 0.180 0.226
## 13.5 95 9 0.183 0.01223 0.160 0.208
## 14.5 59 6 0.164 0.01313 0.140 0.192
ggsurvplot(km)
survival.table1 <- broom::tidy(km) %>% filter(n.event > 0)
survival.table1 <- survival.table1 %>% mutate(hazard = n.event / (n.risk * (lead(time) - time)))
ggplot() +
geom_line(data = survival.table1, aes(x = time, y = hazard)) +
labs(x = "Time", y = "Hazard")
survival.table1 <- survival.table1 %>% mutate(probability = estimate * hazard)
ggplot() +
geom_line(data = survival.table1, aes(x = time, y = probability)) +
labs(x = "Time", y = "Probability Density")