4.1 Consider the survival time of the 30 melanoma patients in Table 3.1.

df.3.1 <-  read_excel("Table 3.1.xlsx")
## New names:
## * Uncensor -> Uncensor...10
## * Uncensor -> Uncensor...12
head(df.3.1, 10) %>% 
  flextable() %>% 
  set_caption("First 10 Observations")

Compute and plot the PL estimates of the survivorship functions \(S(t)\) of the two treatment groups and check your results with Table 3.2 and Figure 3.1.

surv.model <- Surv(df.3.1$`Survival Time`, event = df.3.1$Uncensor...12)
km <- survfit(surv.model ~ Treatment, data = df.3.1)
summary(km)
## Call: survfit(formula = surv.model ~ Treatment, data = df.3.1)
## 
##                 Treatment=0 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   6.9     19       1    0.947  0.0512        0.852        1.000
##   7.7     18       1    0.895  0.0704        0.767        1.000
##   8.0     16       1    0.839  0.0854        0.687        1.000
##   8.3     13       1    0.774  0.1003        0.601        0.998
##  24.4      3       1    0.516  0.2211        0.223        1.000
## 
##                 Treatment=1 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   3.9     11       1    0.909  0.0867        0.754        1.000
##   5.4     10       1    0.818  0.1163        0.619        1.000
##   7.9      9       1    0.727  0.1343        0.506        1.000
##  10.5      8       1    0.636  0.1450        0.407        0.995
##  19.5      4       1    0.477  0.1755        0.232        0.981

The survival times are the same with Table 3.2 in the book.

ggsurvplot(km)

The survival curves are the same as figure 3.1 in the book.

Compute the variance of \(S(t)\) for every uncensored observation.

The following equation is used to calculate the variance of \(S(t)\).

\[ Var[\hat{S}(t)] = [\hat{S}(t)]^2 \sum_{r} \frac{1}{(n-r)(n-r + 1)} \]

Where \(n\) is number of observations and \(r\) is number of deaths.

surv.data <- tidy(km)

surv.var <- surv.data %>%
  filter(n.event == 1) %>% #Looking only at uncensored observations
  group_by(strata) %>% 
  mutate(S.Var = estimate^2 * cumsum(1/((max(n.risk)-cumsum(n.event))*(max(n.risk)-cumsum(n.event) + 1))))

surv.var %>% 
  select(time, strata, S.Var) %>% 
  flextable() %>% 
  set_caption("Surival Function Variences")

Estimate the median survival times of the two groups

print(km)
## Call: survfit(formula = surv.model ~ Treatment, data = df.3.1)
## 
##              n events median 0.95LCL 0.95UCL
## Treatment=0 19      5     NA    24.4      NA
## Treatment=1 11      5   19.5    10.5      NA

For treatment 0 the median is undefined since the is still more then 50% of patients alive.

4.4 Consider the remission data of 42 patients with acute leukemia in Example 3.3.

## [1] "Placebo"
##  [1]  1  1  2  2  3  4  4  5  5  8  8  8  8 11 11 12 12 15 17 22 23
## [1] "6-MP"
##  [1]  6   6   6   7  10  13  16  22  23   6+  9+ 10+ 11+ 17+ 19+ 20+ 25+ 32+ 32+
## [20] 34+ 35+

Compute and plot the PL estimates of \(S(t)\) at every time to relapse for the 6-MP and placebo groups.

km <- survfit(surv.model.3.3 ~ treatment, data = df.3.3)
summary(km)
## Call: survfit(formula = surv.model.3.3 ~ treatment, data = df.3.3)
## 
##                 treatment=6-MP 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     6     21       3    0.857  0.0764        0.720        1.000
##     7     17       1    0.807  0.0869        0.653        0.996
##    10     15       1    0.753  0.0963        0.586        0.968
##    13     12       1    0.690  0.1068        0.510        0.935
##    16     11       1    0.627  0.1141        0.439        0.896
##    22      7       1    0.538  0.1282        0.337        0.858
##    23      6       1    0.448  0.1346        0.249        0.807
## 
##                 treatment=Placebo 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1     21       2   0.9048  0.0641      0.78754        1.000
##     2     19       2   0.8095  0.0857      0.65785        0.996
##     3     17       1   0.7619  0.0929      0.59988        0.968
##     4     16       2   0.6667  0.1029      0.49268        0.902
##     5     14       2   0.5714  0.1080      0.39455        0.828
##     8     12       4   0.3810  0.1060      0.22085        0.657
##    11      8       2   0.2857  0.0986      0.14529        0.562
##    12      6       2   0.1905  0.0857      0.07887        0.460
##    15      4       1   0.1429  0.0764      0.05011        0.407
##    17      3       1   0.0952  0.0641      0.02549        0.356
##    22      2       1   0.0476  0.0465      0.00703        0.322
##    23      1       1   0.0000     NaN           NA           NA
ggsurvplot(km)

Compute the variances of \(S(10)\) in the 6-MP group and of \(S(3)\) in the placebo group.

surv.data <- tidy(km)

surv.var <- surv.data %>%
  filter(n.event == 1) %>% #Looking only at uncensored observations
  group_by(strata) %>% 
  mutate(S.Var = estimate^2 * cumsum(1/((max(n.risk)-cumsum(n.event))*(max(n.risk)-cumsum(n.event) + 1))))

surv.var %>% 
  select(time, strata, S.Var) %>% 
  filter(time == 10 | time == 3) %>% 
  flextable() %>% 
  set_caption("Surival Function Variences S(10) & S(3)")

Estimate the median remission times of the two treatment groups.

print(km)
## Call: survfit(formula = surv.model.3.3 ~ treatment, data = df.3.3)
## 
##                    n events median 0.95LCL 0.95UCL
## treatment=6-MP    21      9     23      16      NA
## treatment=Placebo 21     21      8       4      12

4.9 Do a complete life-table analysis of the data given in Exercise Table 4.2, the Angina Pectoris data.

df.4.9 <- haven::read_sas("males.sas7bdat")
head(df.4.9) %>% 
  flextable() %>% 
  set_caption("First 5 Observations")

This data is different format of survival data. R handles survival data where each row is an event. So we will need to transform the data by replicated the number of rows by the associated rows frequency

duptimes <- df.4.9$Freq
idx <- rep(1:nrow(df.4.9), duptimes)
df.4.9.t <- df.4.9[idx,]

Once that transformation is done survival analysis is the same steps as the previous problems.

surv.model.4.9 <- Surv(df.4.9.t$Years, event = abs(df.4.9.t$Censored-1))
km <- survfit(surv.model.4.9 ~ 1, data = df.4.9.t)
summary(km)
## Call: survfit(formula = surv.model.4.9 ~ 1, data = df.4.9.t)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   0.5   2418     456    0.811 0.00796        0.796        0.827
##   1.5   1962     226    0.718 0.00915        0.700        0.736
##   2.5   1697     152    0.654 0.00970        0.635        0.673
##   3.5   1523     171    0.580 0.01011        0.561        0.600
##   4.5   1329     135    0.521 0.01028        0.502        0.542
##   5.5   1170     125    0.466 0.01032        0.446        0.486
##   6.5    938      83    0.424 0.01035        0.405        0.445
##   7.5    722      74    0.381 0.01045        0.361        0.402
##   8.5    546      51    0.345 0.01059        0.325        0.367
##   9.5    427      42    0.311 0.01077        0.291        0.333
##  10.5    321      43    0.270 0.01105        0.249        0.292
##  11.5    233      34    0.230 0.01131        0.209        0.254
##  12.5    146      18    0.202 0.01173        0.180        0.226
##  13.5     95       9    0.183 0.01223        0.160        0.208
##  14.5     59       6    0.164 0.01313        0.140        0.192

Plot the three survival functions: survival, hazard and density functions.

Survival Function

ggsurvplot(km)

Hazard Function

survival.table1 <- broom::tidy(km) %>% filter(n.event > 0)
survival.table1 <- survival.table1 %>% mutate(hazard = n.event / (n.risk * (lead(time) - time)))
ggplot() +
  geom_line(data = survival.table1, aes(x = time, y = hazard)) +
  labs(x = "Time", y = "Hazard")

Probablity

survival.table1 <- survival.table1 %>% mutate(probability  = estimate * hazard)
ggplot() +
  geom_line(data = survival.table1, aes(x = time, y = probability)) +
  labs(x = "Time", y = "Probability Density")