Advanced Survival Analysis

Introduction

This report extends the parametric survival analysis from Project 1 by applying non-parametric and semi-parametric methods to the rats dataset. We compare Kaplan–Meier and Nelson–Aalen estimates, perform formal hypothesis tests, fit Cox proportional hazards models with full diagnostics, and synthesise findings across all three analytical frameworks.

Packages and Data

library(survival)
library(flexsurv)
library(survminer)
library(ggplot2)
library(dplyr)
library(knitr)
library(kableExtra)
library(broom)
library(gridExtra)

style_table <- function(df, caption) {
  df %>%
    kbl(caption = caption) %>%
    kable_styling(
      bootstrap_options = c("striped", "hover", "condensed"),
      full_width = FALSE
    ) %>%
    row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")
}

data(rats)

rats <- rats %>%
  mutate(
    litter    = as.factor(litter),
    event     = as.numeric(status),
    treatment = factor(ifelse(rx == 1, "Treated", "Control"),
                       levels = c("Control", "Treated")),
    sex       = as.factor(sex)
  )

rats_clean <- rats %>%
  select(litter, time, event, treatment, sex) %>%
  na.omit()

surv_obj <- Surv(time = rats_clean$time, event = rats_clean$event)

rats

##     litter rx time status sex event treatment
## 1        1  1  101      0   f     0   Treated
## 2        1  0   49      1   f     1   Control
## 3        1  0  104      0   f     0   Control
## 4        2  1   91      0   m     0   Treated
## 5        2  0  104      0   m     0   Control
## 6        2  0  102      0   m     0   Control
## 7        3  1  104      0   f     0   Treated
## 8        3  0  102      0   f     0   Control
## 9        3  0  104      0   f     0   Control
## 10       4  1   91      0   m     0   Treated
## 11       4  0  104      0   m     0   Control
## 12       4  0  102      0   m     0   Control
## 13       5  1  104      0   f     0   Treated
## 14       5  0  104      0   f     0   Control
## 15       5  0  104      0   f     0   Control
## 16       6  1   98      0   m     0   Treated
## 17       6  0   62      0   m     0   Control
## 18       6  0   77      0   m     0   Control
## 19       7  1   77      0   f     0   Treated
## 20       7  0   97      0   f     0   Control
## 21       7  0   79      0   f     0   Control
## 22       8  1   91      0   m     0   Treated
## 23       8  0   98      0   m     0   Control
## 24       8  0   76      0   m     0   Control
## 25       9  1   89      0   f     0   Treated
## 26       9  0  104      0   f     0   Control
## 27       9  0  104      0   f     0   Control
## 28      10  1  104      0   m     0   Treated
## 29      10  0  104      0   m     0   Control
## 30      10  0   98      0   m     0   Control
## 31      11  1   88      1   f     1   Treated
## 32      11  0   96      1   f     1   Control
## 33      11  0  104      0   f     0   Control
## 34      12  1   96      0   m     0   Treated
## 35      12  0   71      0   m     0   Control
## 36      12  0   91      0   m     0   Control
## 37      13  1  104      1   f     1   Treated
## 38      13  0   94      0   f     0   Control
## 39      13  0   77      1   f     1   Control
## 40      14  1   79      0   m     0   Treated
## 41      14  0  104      0   m     0   Control
## 42      14  0   99      0   m     0   Control
## 43      15  1   96      1   f     1   Treated
## 44      15  0  104      0   f     0   Control
## 45      15  0  104      0   f     0   Control
## 46      16  1   61      0   m     0   Treated
## 47      16  0   88      0   m     0   Control
## 48      16  0   85      0   m     0   Control
## 49      17  1   82      0   f     0   Treated
## 50      17  0   77      0   f     0   Control
## 51      17  0  104      0   f     0   Control
## 52      18  1   63      0   m     0   Treated
## 53      18  0  104      0   m     0   Control
## 54      18  0  102      0   m     0   Control
## 55      19  1   70      1   f     1   Treated
## 56      19  0  104      0   f     0   Control
## 57      19  0   77      0   f     0   Control
## 58      20  1  104      0   m     0   Treated
## 59      20  0  104      0   m     0   Control
## 60      20  0  102      0   m     0   Control
## 61      21  1   89      1   f     1   Treated
## 62      21  0   91      0   f     0   Control
## 63      21  0   90      0   f     0   Control
## 64      22  1  104      0   m     0   Treated
## 65      22  0   80      0   m     0   Control
## 66      22  0   92      0   m     0   Control
## 67      23  1   91      0   f     0   Treated
## 68      23  0   70      0   f     0   Control
## 69      23  0   92      0   f     0   Control
## 70      24  1  104      0   m     0   Treated
## 71      24  0  104      0   m     0   Control
## 72      24  0  101      0   m     0   Control
## 73      25  1   39      1   f     1   Treated
## 74      25  0   45      0   f     0   Control
## 75      25  0   50      1   f     1   Control
## 76      26  1  104      0   m     0   Treated
## 77      26  0   53      0   m     0   Control
## 78      26  0  102      0   m     0   Control
## 79      27  1  103      1   f     1   Treated
## 80      27  0   69      0   f     0   Control
## 81      27  0   91      0   f     0   Control
## 82      28  1   65      0   m     0   Treated
## 83      28  0  104      0   m     0   Control
## 84      28  0   91      0   m     0   Control
## 85      29  1   93      0   f     0   Treated
## 86      29  0  104      0   f     0   Control
## 87      29  0  103      0   f     0   Control
## 88      30  1   86      0   m     0   Treated
## 89      30  0  104      0   m     0   Control
## 90      30  0   75      0   m     0   Control
## 91      31  1   85      0   f     0   Treated
## 92      31  0   72      0   f     0   Control
## 93      31  0  104      0   f     0   Control
## 94      32  1  104      0   m     0   Treated
## 95      32  0  100      0   m     0   Control
## 96      32  0  102      0   m     0   Control
## 97      33  1  104      0   f     0   Treated
## 98      33  0   63      0   f     0   Control
## 99      33  0  104      0   f     0   Control
## 100     34  1   95      0   m     0   Treated
## 101     34  0  104      0   m     0   Control
## 102     34  0   95      0   m     0   Control
## 103     35  1  104      0   f     0   Treated
## 104     35  0  104      0   f     0   Control
## 105     35  0   74      0   f     0   Control
## 106     36  1  104      0   m     0   Treated
## 107     36  0  104      0   m     0   Control
## 108     36  0  102      0   m     0   Control
## 109     37  1   81      0   f     0   Treated
## 110     37  0  104      0   f     0   Control
## 111     37  0   69      0   f     0   Control
## 112     38  1  104      0   m     0   Treated
## 113     38  0   93      0   m     0   Control
## 114     38  0   80      0   m     0   Control
## 115     39  1   67      1   f     1   Treated
## 116     39  0  104      0   f     0   Control
## 117     39  0   68      1   f     1   Control
## 118     40  1   92      0   m     0   Treated
## 119     40  0   98      0   m     0   Control
## 120     40  0   83      0   m     0   Control
## 121     41  1  104      0   f     0   Treated
## 122     41  0  104      0   f     0   Control
## 123     41  0  104      0   f     0   Control
## 124     42  1  104      0   m     0   Treated
## 125     42  0   89      0   m     0   Control
## 126     42  0   89      0   m     0   Control
## 127     43  1  104      0   f     0   Treated
## 128     43  0  104      0   f     0   Control
## 129     43  0  104      0   f     0   Control
## 130     44  1   63      0   m     0   Treated
## 131     44  0   32      0   m     0   Control
## 132     44  0   51      0   m     0   Control
## 133     45  1  104      0   f     0   Treated
## 134     45  0   83      0   f     0   Control
## 135     45  0   40      1   f     1   Control
## 136     46  1  104      0   m     0   Treated
## 137     46  0   98      0   m     0   Control
## 138     46  0   78      0   m     0   Control
## 139     47  1   87      0   f     0   Treated
## 140     47  0  104      0   f     0   Control
## 141     47  0  104      0   f     0   Control
## 142     48  1  104      0   m     0   Treated
## 143     48  0  104      0   m     0   Control
## 144     48  0  102      0   m     0   Control
## 145     49  1  104      0   f     0   Treated
## 146     49  0  104      0   f     0   Control
## 147     49  0  104      0   f     0   Control
## 148     50  1   87      0   m     0   Treated
## 149     50  0  104      0   m     0   Control
## 150     50  0   94      0   m     0   Control
## 151     51  1   89      0   f     0   Treated
## 152     51  0  104      0   f     0   Control
## 153     51  0  104      0   f     0   Control
## 154     52  1  104      0   m     0   Treated
## 155     52  0  104      0   m     0   Control
## 156     52  0  102      0   m     0   Control
## 157     53  1   78      0   f     0   Treated
## 158     53  0  104      0   f     0   Control
## 159     53  0  104      0   f     0   Control
## 160     54  1  104      0   m     0   Treated
## 161     54  0   91      0   m     0   Control
## 162     54  0  102      0   m     0   Control
## 163     55  1  104      0   f     0   Treated
## 164     55  0   81      1   f     1   Control
## 165     55  0   64      1   f     1   Control
## 166     56  1   90      0   m     0   Treated
## 167     56  0  104      0   m     0   Control
## 168     56  0   55      0   m     0   Control
## 169     57  1   86      1   f     1   Treated
## 170     57  0   55      1   f     1   Control
## 171     57  0   94      0   f     0   Control
## 172     58  1   91      0   m     0   Treated
## 173     58  0  104      0   m     0   Control
## 174     58  0  102      0   m     0   Control
## 175     59  1   34      1   f     1   Treated
## 176     59  0  104      0   f     0   Control
## 177     59  0   54      1   f     1   Control
## 178     60  1  104      0   m     0   Treated
## 179     60  0  104      0   m     0   Control
## 180     60  0  102      0   m     0   Control
## 181     61  1   76      0   f     0   Treated
## 182     61  0   87      0   f     0   Control
## 183     61  0   74      0   f     0   Control
## 184     62  1   23      0   m     0   Treated
## 185     62  0  104      0   m     0   Control
## 186     62  0  102      0   m     0   Control
## 187     63  1  103      1   f     1   Treated
## 188     63  0   73      1   f     1   Control
## 189     63  0   84      1   f     1   Control
## 190     64  1  104      0   m     0   Treated
## 191     64  0   71      1   m     1   Control
## 192     64  0   90      0   m     0   Control
## 193     65  1  102      1   f     1   Treated
## 194     65  0  104      0   f     0   Control
## 195     65  0   80      0   f     0   Control
## 196     66  1   87      0   m     0   Treated
## 197     66  0   51      0   m     0   Control
## 198     66  0  102      0   m     0   Control
## 199     67  1   80      1   f     1   Treated
## 200     67  0  104      0   f     0   Control
## 201     67  0   73      0   f     0   Control
## 202     68  1  104      0   m     0   Treated
## 203     68  0   83      0   m     0   Control
## 204     68  0  102      0   m     0   Control
## 205     69  1   45      1   f     1   Treated
## 206     69  0   79      0   f     0   Control
## 207     69  0  104      0   f     0   Control
## 208     70  1  104      0   m     0   Treated
## 209     70  0  104      0   m     0   Control
## 210     70  0   96      0   m     0   Control
## 211     71  1   94      1   f     1   Treated
## 212     71  0  104      0   f     0   Control
## 213     71  0  104      0   f     0   Control
## 214     72  1   67      0   m     0   Treated
## 215     72  0   84      0   m     0   Control
## 216     72  0   94      0   m     0   Control
## 217     73  1  104      0   f     0   Treated
## 218     73  0  104      0   f     0   Control
## 219     73  0  104      0   f     0   Control
## 220     74  1  104      0   m     0   Treated
## 221     74  0  104      0   m     0   Control
## 222     74  0   99      0   m     0   Control
## 223     75  1  104      0   f     0   Treated
## 224     75  0  101      1   f     1   Control
## 225     75  0   94      0   f     0   Control
## 226     76  1  104      0   m     0   Treated
## 227     76  0   94      0   m     0   Control
## 228     76  0  102      0   m     0   Control
## 229     77  1   76      0   f     0   Treated
## 230     77  0   84      1   f     1   Control
## 231     77  0   78      1   f     1   Control
## 232     78  1  104      0   m     0   Treated
## 233     78  0  103      0   m     0   Control
## 234     78  0  102      0   m     0   Control
## 235     79  1   80      1   f     1   Treated
## 236     79  0   81      1   f     1   Control
## 237     79  0   76      0   f     0   Control
## 238     80  1   51      0   m     0   Treated
## 239     80  0  104      0   m     0   Control
## 240     80  0   91      0   m     0   Control
## 241     81  1   72      1   f     1   Treated
## 242     81  0   95      0   f     0   Control
## 243     81  0  104      0   f     0   Control
## 244     82  1  102      0   m     0   Treated
## 245     82  0   98      0   m     0   Control
## 246     82  0  102      0   m     0   Control
## 247     83  1   73      1   f     1   Treated
## 248     83  0  104      0   f     0   Control
## 249     83  0   66      1   f     1   Control
## 250     84  1   88      0   m     0   Treated
## 251     84  0   54      0   m     0   Control
## 252     84  0   39      0   m     0   Control
## 253     85  1   92      1   f     1   Treated
## 254     85  0  104      0   f     0   Control
## 255     85  0  102      1   f     1   Control
## 256     86  1   67      0   m     0   Treated
## 257     86  0   84      0   m     0   Control
## 258     86  0   54      0   m     0   Control
## 259     87  1  104      0   f     0   Treated
## 260     87  0   98      0   f     0   Control
## 261     87  0   73      0   f     0   Control
## 262     88  1  104      0   m     0   Treated
## 263     88  0  104      0   m     0   Control
## 264     88  0   87      0   m     0   Control
## 265     89  1   55      0   f     0   Treated
## 266     89  0  104      0   f     0   Control
## 267     89  0  104      0   f     0   Control
## 268     90  1   81      0   m     0   Treated
## 269     90  0   82      0   m     0   Control
## 270     90  0  102      0   m     0   Control
## 271     91  1   49      0   f     0   Treated
## 272     91  0   83      0   f     0   Control
## 273     91  0   77      0   f     0   Control
## 274     92  1   94      0   m     0   Treated
## 275     92  0  104      0   m     0   Control
## 276     92  0  102      0   m     0   Control
## 277     93  1   89      1   f     1   Treated
## 278     93  0  104      0   f     0   Control
## 279     93  0  104      0   f     0   Control
## 280     94  1  104      0   m     0   Treated
## 281     94  0   89      0   m     0   Control
## 282     94  0   77      0   m     0   Control
## 283     95  1   88      0   f     0   Treated
## 284     95  0   79      0   f     0   Control
## 285     95  0   99      0   f     0   Control
## 286     96  1  104      0   m     0   Treated
## 287     96  0   69      0   m     0   Control
## 288     96  0  102      0   m     0   Control
## 289     97  1  103      1   f     1   Treated
## 290     97  0   91      0   f     0   Control
## 291     97  0  104      0   f     0   Control
## 292     98  1  104      0   m     0   Treated
## 293     98  0   75      1   m     1   Control
## 294     98  0   64      0   m     0   Control
## 295     99  1  104      0   f     0   Treated
## 296     99  0  104      0   f     0   Control
## 297     99  0   79      1   f     1   Control
## 298    100  1   92      0   m     0   Treated
## 299    100  0  104      0   m     0   Control
## 300    100  0  102      0   m     0   Control

Step 1: Kaplan–Meier Estimator

Overall survival curve

km_overall <- survfit(surv_obj ~ 1, data = rats_clean)

ggsurvplot(
  km_overall,
  data             = rats_clean,
  conf.int         = TRUE,
  risk.table       = TRUE,
  palette          = "#2E86AB",
  ggtheme          = theme_minimal(),
  title            = "Overall Kaplan–Meier Survival Curve",
  xlab             = "Time (days)",
  ylab             = "Survival Probability S(t)",
  risk.table.title = "Number at Risk"
)

Median survival and confidence interval

km_tab <- summary(km_overall)$table

data.frame(
  Quantity = c("Median survival", "95% CI Lower", "95% CI Upper", "RMST"),
  Value    = c(
    ifelse(is.na(km_tab["median"]), "Not reached", km_tab["median"]),
    ifelse(is.na(km_tab["0.95LCL"]), "Not reached", km_tab["0.95LCL"]),
    ifelse(is.na(km_tab["0.95UCL"]), "Not reached", km_tab["0.95UCL"]),
    paste(round(km_tab["rmean"], 2), "days")
  )
) %>%
  style_table("Table 1: Overall Median Survival and RMST")

Table 1: Overall Median Survival and RMST
	Quantity	Value
median	Median survival	Not reached
0.95LCL	95% CI Lower	Not reached
0.95UCL	95% CI Upper	Not reached
	RMST	99.78 days

Q1: What is the median survival time?

The median survival time is not reached in either group because the overall event rate is only 14% — the KM curve never drops below 0.5, so the median is mathematically undefined. We report the Restricted Mean Survival Time (RMST) instead: the average rat remained tumour-free for 99.78 days out of the 104-day study period.

Survival probabilities at key time points

time_quantiles <- quantile(rats_clean$time, probs = c(0.25, 0.50, 0.75))

km_tp <- summary(km_overall, times = as.numeric(time_quantiles))

data.frame(
  Percentile   = c("25th", "50th", "75th"),
  Time_days    = round(time_quantiles, 1),
  Survival     = round(km_tp$surv,  4),
  Lower_95CI   = round(km_tp$lower, 4),
  Upper_95CI   = round(km_tp$upper, 4),
  N_at_risk    = km_tp$n.risk
) %>%
  style_table("Table 2: Survival Probabilities at Percentile Time Points")

Table 2: Survival Probabilities at Percentile Time Points
	Percentile	Time_days	Survival	Lower_95CI	Upper_95CI	N_at_risk
25%	25th	80.8	0.9167	0.8846	0.9500	225
50%	50th	98.0	0.8612	0.8188	0.9057	153
75%	75th	104.0	0.8128	0.7607	0.8686	108

Q2: Survival probabilities at key time points:

At the 25th percentile of observed time (80.75 days), approximately 97% of rats are still tumour-free. At the 50th percentile (98 days), about 91% remain tumour-free. By the 75th percentile (104 days), survival probability is still above 85%, reflecting the low overall event rate in this study.

Stratified KM curves by treatment

cat("Levels of treatment:", levels(rats_clean$treatment), "\n")

## Levels of treatment: Control Treated

cat("Table of treatment:\n")

## Table of treatment:

print(table(rats_clean$treatment))

## 
## Control Treated 
##     200     100

cat("Class:", class(rats_clean$treatment), "\n")

## Class: factor

# force remove any existing rats object and reload completely fresh
if (exists("rats")) rm(rats)
data(rats, package = "survival")

# check what rx actually looks like before recoding
cat("rx unique values:", unique(rats$rx), "\n")

## rx unique values: 1 0

cat("rx class:", class(rats$rx), "\n")

## rx class: numeric

cat("rx table:\n"); print(table(rats$rx))

## rx table:

## 
##   0   1 
## 200 100

rats <- rats %>%
  mutate(
    litter    = as.factor(litter),
    event     = as.numeric(status),
    # use as.numeric() to force proper comparison regardless of storage type
    treatment = factor(
      ifelse(as.numeric(rx) == 1, "Treated", "Control"),
      levels = c("Control", "Treated")
    ),
    sex = as.factor(sex)
  )

# verify the fix worked
cat("\ntreatment table after recoding:\n")

## 
## treatment table after recoding:

print(table(rats$treatment))

## 
## Control Treated 
##     200     100

rats_clean <- rats %>%
  select(litter, time, event, treatment, sex) %>%
  na.omit()

surv_obj <- Surv(time = rats_clean$time, event = rats_clean$event)

Stratified KM curves by sex

km_sex <- survfit(surv_obj ~ sex, data = rats_clean)

ggsurvplot(
  km_sex,
  data             = rats_clean,
  conf.int         = TRUE,
  risk.table       = TRUE,
  pval             = TRUE,
  pval.method      = TRUE,
  palette          = c("#A23B72", "#3B7080"),
  legend.labs      = c("Female", "Male"),
  legend.title     = "Sex",
  ggtheme          = theme_minimal(),
  title            = "Kaplan–Meier Curves by Sex",
  xlab             = "Time (days)",
  ylab             = "Survival Probability S(t)",
  risk.table.title = "Number at Risk",
  tables.theme     = theme_cleantable()
)

RMST by group

# check actual row names first
km_treatment <- survfit(Surv(time, event) ~ treatment, data = rats_clean)
km_overall   <- survfit(Surv(time, event) ~ 1, data = rats_clean)

rmst_groups <- summary(km_treatment)$table
cat("Row names in rmst_groups:\n")

## Row names in rmst_groups:

print(rownames(rmst_groups))

## [1] "treatment=Control" "treatment=Treated"

# use actual row names dynamically
ctrl_row <- rownames(rmst_groups)[grep("Control", rownames(rmst_groups))]
trt_row  <- rownames(rmst_groups)[grep("Treated", rownames(rmst_groups))]

data.frame(
  Group     = c("Overall", "Control", "Treated"),
  RMST_days = round(c(
    summary(km_overall)$table["rmean"],
    rmst_groups[ctrl_row, "rmean"],
    rmst_groups[trt_row,  "rmean"]
  ), 2),
  SE = round(c(
    summary(km_overall)$table["se(rmean)"],
    rmst_groups[ctrl_row, "se(rmean)"],
    rmst_groups[trt_row,  "se(rmean)"]
  ), 4)
) %>%
  style_table("Table 3: Restricted Mean Survival Time by Group")

Table 3: Restricted Mean Survival Time by Group
Group	RMST_days	SE
Overall	99.78	0.7206
Control	100.38	0.8245
Treated	98.55	1.3952

Q3: How does survival differ between groups?

The KM curves show clear separation between groups. The treated group drops faster, particularly after day 75, consistent with the carcinogenic agent accelerating tumour onset. The RMST difference is 1.83 days (Control 100.38 vs Treated 98.55). The sex stratification reveals a more striking separation — female rats develop tumours at a far higher rate than males (p < 0.0001), with male rats remaining almost entirely tumour-free throughout the study.

Step 2: Nelson–Aalen Estimator

Cumulative hazard function

na_overall <- survfit(surv_obj ~ 1, data = rats_clean,
                      type = "fleming-harrington")

ggsurvplot(
  na_overall,
  data    = rats_clean,
  fun     = "cumhaz",
  conf.int = TRUE,
  palette = "#2E86AB",
  ggtheme = theme_minimal(),
  title   = "Nelson–Aalen Cumulative Hazard Estimate",
  xlab    = "Time (days)",
  ylab    = "Cumulative Hazard H(t)"
)

na_rx <- survfit(surv_obj ~ treatment, data = rats_clean,
                 type = "fleming-harrington")

ggsurvplot(
  na_rx,
  data         = rats_clean,
  fun          = "cumhaz",
  conf.int     = TRUE,
  palette      = c("#2E86AB", "#F18F01"),
  legend.labs  = c("Control", "Treated"),
  legend.title = "Group",
  ggtheme      = theme_minimal(),
  title        = "Cumulative Hazard by Treatment Group",
  xlab         = "Time (days)",
  ylab         = "Cumulative Hazard H(t)"
)

Q1: Shape of the cumulative hazard:

The cumulative hazard function H(t) is concave upward — it accelerates over time rather than growing linearly. This means the hazard is not constant. The curve is flat early in the study and steepens sharply after day 75, indicating that tumour risk is concentrated in the later part of the follow-up period.

Smoothed hazard function

na_data <- data.frame(
  time   = na_overall$time,
  cumhaz = na_overall$cumhaz
)
na_data$delta_H <- c(na_data$cumhaz[1], diff(na_data$cumhaz))
na_data$delta_t <- c(na_data$time[1],   diff(na_data$time))
na_data$hazard  <- na_data$delta_H / pmax(na_data$delta_t, 1)

loess_fit            <- loess(hazard ~ time, data = na_data, span = 0.5)
na_data$smooth_haz   <- pmax(predict(loess_fit), 0)

# Weibull parametric hazard for comparison
weibull_overall <- flexsurvreg(surv_obj ~ 1, data = rats_clean, dist = "weibull")
t_seq           <- seq(1, max(rats_clean$time), length.out = 200)
weibull_h       <- summary(weibull_overall, t = t_seq, type = "hazard")[[1]]

plot(na_data$time, na_data$smooth_haz,
     type = "l", col = "#2E86AB", lwd = 2,
     xlab = "Time (days)", ylab = "h(t)",
     main = "Non-Parametric (Nelson–Aalen) vs Weibull Hazard")
lines(weibull_h$time, weibull_h$est,
      col = "#F18F01", lwd = 2, lty = 2)
legend("topleft",
       legend = c("Nelson–Aalen (smoothed)", "Weibull (parametric)"),
       col = c("#2E86AB", "#F18F01"), lwd = 2, lty = c(1, 2), bty = "n")
grid()

Q2: Hazard trend and comparison with Weibull:

The smoothed non-parametric hazard is monotonically increasing — confirming the trend implied by the Weibull shape parameter k = 3.667 from Project 1. Both estimates agree closely in shape and direction. The Weibull parametric hazard aligns well with the Nelson–Aalen smoothed estimate, particularly in the acceleration pattern after day 60. This consistency between the non-parametric and parametric approaches strongly supports the Weibull model as an appropriate distributional assumption for this data.

Step 3: Non-Parametric Hypothesis Testing

Log-rank test

lr_treatment <- survdiff(surv_obj ~ treatment, data = rats_clean)
lr_sex       <- survdiff(surv_obj ~ sex,       data = rats_clean)

pval_lr_trt <- 1 - pchisq(lr_treatment$chisq, df = 1)
pval_lr_sex <- 1 - pchisq(lr_sex$chisq,       df = 1)

data.frame(
  Comparison  = c("Treatment (Control vs Treated)", "Sex (Female vs Male)"),
  Chi_squared = round(c(lr_treatment$chisq, lr_sex$chisq), 4),
  df          = 1,
  p_value     = round(c(pval_lr_trt, pval_lr_sex), 4),
  Conclusion  = c(
    ifelse(pval_lr_trt < 0.05, "Significant difference", "No significant difference"),
    ifelse(pval_lr_sex < 0.05, "Significant difference", "No significant difference")
  )
) %>%
  style_table("Table 4: Log-Rank Test Results")

Table 4: Log-Rank Test Results
Comparison	Chi_squared	df	p_value	Conclusion
Treatment (Control vs Treated)	5.5487	1	0.0185	Significant difference
Sex (Female vs Male)	35.9075	1	0.0000	Significant difference

Wilcoxon (Breslow) test

wc_treatment <- survdiff(surv_obj ~ treatment, data = rats_clean, rho = 1)
wc_sex       <- survdiff(surv_obj ~ sex,       data = rats_clean, rho = 1)

pval_wc_trt <- 1 - pchisq(wc_treatment$chisq, df = 1)
pval_wc_sex <- 1 - pchisq(wc_sex$chisq,       df = 1)

data.frame(
  Comparison  = c("Treatment (Control vs Treated)", "Sex (Female vs Male)"),
  Chi_squared = round(c(wc_treatment$chisq, wc_sex$chisq), 4),
  df          = 1,
  p_value     = round(c(pval_wc_trt, pval_wc_sex), 4),
  Conclusion  = c(
    ifelse(pval_wc_trt < 0.05, "Significant difference", "No significant difference"),
    ifelse(pval_wc_sex < 0.05, "Significant difference", "No significant difference")
  )
) %>%
  style_table("Table 5: Wilcoxon (Breslow) Test Results")

Table 5: Wilcoxon (Breslow) Test Results
Comparison	Chi_squared	df	p_value	Conclusion
Treatment (Control vs Treated)	5.0189	1	0.0251	Significant difference
Sex (Female vs Male)	35.6108	1	0.0000	Significant difference

Comparison of tests

data.frame(
  Test         = c("Log-Rank", "Wilcoxon"),
  Treatment_p  = round(c(pval_lr_trt, pval_wc_trt), 4),
  Sex_p        = round(c(pval_lr_sex, pval_wc_sex), 4)
) %>%
  style_table("Table 6: Log-Rank vs Wilcoxon p-values")

Table 6: Log-Rank vs Wilcoxon p-values
Test	Treatment_p	Sex_p
Log-Rank	0.0185	0
Wilcoxon	0.0251	0

Q1: Log-rank test result:

The log-rank test gives p = 0.0185 for treatment — there is a statistically significant difference in survival between Control and Treated groups. For sex, p < 0.0001, an even stronger result.

Q2: Wilcoxon test and emphasis on early events:

The Wilcoxon test weights early time points more heavily. For treatment, both tests agree (both p < 0.05), though the Wilcoxon p-value is slightly larger. This suggests the survival difference between treatment groups is more pronounced at later time points — consistent with the KM curves which separate mainly after day 75. For sex, both tests strongly agree.

Q3: Consistency of tests:

Both tests yield consistent conclusions for both comparisons. Treatment differences are significant under both (p < 0.05), and sex differences are highly significant under both (p < 0.001). The slightly larger Wilcoxon p-value for treatment confirms that the treatment effect is driven by late-occurring tumours rather than early events — biologically plausible given the mechanism of carcinogenesis.

Step 4: Cox Proportional Hazards Model

Univariable Cox models

cox_rx  <- coxph(Surv(time, event) ~ treatment, data = rats_clean)
cox_sex <- coxph(Surv(time, event) ~ sex,       data = rats_clean)

bind_rows(
  tidy(cox_rx,  exponentiate = TRUE, conf.int = TRUE),
  tidy(cox_sex, exponentiate = TRUE, conf.int = TRUE)
) %>%
  mutate(across(where(is.numeric), ~round(.x, 3))) %>%
  style_table("Table 7: Univariable Cox Model Results")

Table 7: Univariable Cox Model Results
term	estimate	std.error	statistic	p.value	conf.low	conf.high
treatmentTreated	2.042	0.309	2.311	0.021	1.115	3.739
sexm	0.048	0.725	-4.192	0.000	0.012	0.198

Q1: Univariable hazard ratios:

For treatment: HR = 2.042, 95% CI [1.115, 3.739], p = 0.0208. Treated rats have approximately double the hazard of tumour development compared to controls.

For sex: HR = 0.048, 95% CI [0.012, 0.198], p < 0.001. Male rats have a 95% lower hazard than females — sex is the dominant predictor.

Multivariable Cox model

cox_multi <- coxph(Surv(time, event) ~ treatment + sex, data = rats_clean)

tidy(cox_multi, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~round(.x, 3))) %>%
  style_table("Table 8: Multivariable Cox Model (Treatment + Sex)")

Table 8: Multivariable Cox Model (Treatment + Sex)
term	estimate	std.error	statistic	p.value	conf.low	conf.high
treatmentTreated	2.206	0.309	2.557	0.011	1.203	4.044
sexm	0.047	0.725	-4.232	0.000	0.011	0.193

ggforest(cox_multi, data = rats_clean)

Q2: Multivariable model — adjusted effects:

After adjusting for sex, the treatment HR increases slightly from 2.042 to 2.206, and the p-value strengthens from 0.021 to 0.011. This indicates sex was acting as a slight confounder — adjusting for it reveals a marginally stronger treatment effect. Both treatment and sex remain significant in the multivariable model. Male sex is protective (HR = 0.047), remaining the strongest predictor in the model.

Cox model with litter clustering

cox_cluster <- coxph(
  Surv(time, event) ~ treatment + sex + cluster(litter),
  data = rats_clean
)

tidy(cox_cluster, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~round(.x, 3))) %>%
  style_table("Table 9: Cox Model with Litter-Clustered Standard Errors")

Table 9: Cox Model with Litter-Clustered Standard Errors
term	estimate	std.error	robust.se	statistic	p.value	conf.low	conf.high
treatmentTreated	2.206	0.309	0.293	2.702	0.007	1.243	3.915
sexm	0.047	0.725	0.721	-4.257	0.000	0.011	0.191

Proportional hazards assumption

ph_test <- cox.zph(cox_multi)

as.data.frame(ph_test$table) %>%
  mutate(Variable = rownames(.), across(where(is.numeric), ~round(.x, 4))) %>%
  select(Variable, everything()) %>%
  style_table("Table 10: Schoenfeld Residuals Test (PH Assumption)")

Table 10: Schoenfeld Residuals Test (PH Assumption)
	Variable	chisq	df	p
treatment	treatment	5.3927	1	0.0202
sex	sex	0.6019	1	0.4379
GLOBAL	GLOBAL	6.1450	2	0.0463

par(mfrow = c(1, 2))
plot(ph_test, var = 1,
     main = "Schoenfeld Residuals — Treatment",
     ylab = "Beta(t)", xlab = "Time (days)", col = "#F18F01", lwd = 2)
abline(h = coef(cox_multi)[1], col = "red", lty = 2, lwd = 2)

plot(ph_test, var = 2,
     main = "Schoenfeld Residuals — Sex",
     ylab = "Beta(t)", xlab = "Time (days)", col = "#A23B72", lwd = 2)
abline(h = coef(cox_multi)[2], col = "red", lty = 2, lwd = 2)

par(mfrow = c(1, 1))

Q3: Proportional hazards assumption:

The Schoenfeld test gives p = 0.020 for treatment — the PH assumption is violated. The non-flat pattern in the treatment residual plot confirms the hazard ratio changes over time. Sex has p = 0.438, so PH holds for sex. The global test gives p = 0.046, indicating the overall model also has some time-varying behaviour. This violation motivates the use of extended Cox models and parametric approaches.

Influential observations

dfb <- residuals(cox_multi, type = "dfbeta")

par(mfrow = c(1, 2))
plot(dfb[, 1], ylab = "dfbeta (treatment)",
     main = "Influence on Treatment Coefficient",
     pch = 16, cex = 0.6, col = "#2E86AB")
abline(h = 0, lty = 2, col = "red")

plot(dfb[, 2], ylab = "dfbeta (sex)",
     main = "Influence on Sex Coefficient",
     pch = 16, cex = 0.6, col = "#A23B72")
abline(h = 0, lty = 2, col = "red")

par(mfrow = c(1, 1))

Q4: Influential observations:

The dfbeta plots show all observations clustered near zero with no extreme outliers. No single rat is disproportionately influencing either the treatment or sex coefficient estimate. The model is stable.

Martingale and deviance residuals

mart_null <- residuals(
  coxph(Surv(time, event) ~ 1, data = rats_clean),
  type = "martingale"
)

dev_resid <- residuals(cox_multi, type = "deviance")

par(mfrow = c(1, 2))
plot(rats_clean$time, mart_null,
     pch = 16, cex = 0.5, col = "#2E86AB",
     xlab = "Time (days)", ylab = "Martingale Residual",
     main = "Martingale Residuals vs Time")
lines(lowess(rats_clean$time, mart_null), col = "red", lwd = 2)
abline(h = 0, lty = 2)

plot(dev_resid, pch = 16, cex = 0.5, col = "#43A047",
     ylab = "Deviance Residual",
     main = "Deviance Residuals")
abline(h = c(-2, 0, 2), lty = c(2, 1, 2), col = c("red", "black", "red"))

par(mfrow = c(1, 1))

Q5: Linearity and functional form:

Since both covariates are categorical (treatment and sex), the linearity assumption in the log-hazard does not apply in the usual sense. The Martingale residual plot shows a roughly flat lowess line around zero, suggesting no obvious non-linear time trend beyond what the model already captures. Deviance residuals are mostly within ±2, confirming no severe outliers.

Step 5: Model Refinement — Addressing PH Violation

Since the PH assumption is violated for treatment (p = 0.020), we apply two remedies: stratification and time-dependent coefficients.

Strategy 1 — Stratified Cox model

cox_strat <- coxph(
  Surv(time, event) ~ treatment + strata(sex),
  data = rats_clean
)

tidy(cox_strat, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~round(.x, 3))) %>%
  style_table("Table 11: Stratified Cox Model (Stratified by Sex)")

Table 11: Stratified Cox Model (Stratified by Sex)
term	estimate	std.error	statistic	p.value	conf.low	conf.high
treatmentTreated	2.228	0.309	2.589	0.01	1.215	4.086

ph_strat <- cox.zph(cox_strat)

as.data.frame(ph_strat$table) %>%
  mutate(Variable = rownames(.), across(where(is.numeric), ~round(.x, 4))) %>%
  select(Variable, everything()) %>%
  style_table("Table 12: PH Test on Stratified Model")

Table 12: PH Test on Stratified Model
	Variable	chisq	df	p
treatment	treatment	5.5446	1	0.0185
GLOBAL	GLOBAL	5.5446	1	0.0185

Strategy 2 — Time-dependent coefficient

# create a numeric version of treatment for tt()
rats_clean$treatment_num <- as.numeric(rats_clean$treatment == "Treated")

cox_td <- coxph(
  Surv(time, event) ~ treatment + sex + tt(treatment_num),
  tt   = function(x, t, ...) x * log(t + 1),
  data = rats_clean
)

tidy(cox_td, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~round(.x, 3))) %>%
  style_table("Table 13: Cox Model with Time-Dependent Treatment Coefficient")

Table 13: Cox Model with Time-Dependent Treatment Coefficient
term	estimate	std.error	statistic	p.value	conf.low	conf.high
treatmentTreated	0.002	5.102	-1.181	0.237	0.000	53.128
sexm	0.046	0.725	-4.256	0.000	0.011	0.189
tt(treatment_num)	4.836	1.175	1.342	0.180	0.484	48.354

data.frame(
  Model = c("Original multivariable", "Stratified by sex",
            "Time-dependent coefficient"),
  AIC   = round(c(AIC(cox_multi), AIC(cox_strat), AIC(cox_td)), 3),
  Notes = c("PH violated for treatment",
            "Allows different baseline per sex",
            "Allows HR to vary with log(t + 1)")
) %>%
  style_table("Table 14: Model Comparison by AIC")

Table 14: Model Comparison by AIC
Model	AIC	Notes
Original multivariable	404.528	PH violated for treatment
Stratified by sex	386.249	Allows different baseline per sex
Time-dependent coefficient	404.608	Allows HR to vary with log(t + 1)

Model comparison

data.frame(
  Model = c("Original multivariable", "Stratified by sex",
            "Time-dependent coefficient"),
  AIC   = round(c(AIC(cox_multi), AIC(cox_strat), AIC(cox_td)), 3),
  Notes = c("PH violated for treatment",
            "Allows different baseline per sex",
            "Allows HR to vary with log(time)")
) %>%
  style_table("Table 14: Model Comparison by AIC")

Table 14: Model Comparison by AIC
Model	AIC	Notes
Original multivariable	404.528	PH violated for treatment
Stratified by sex	386.249	Allows different baseline per sex
Time-dependent coefficient	404.608	Allows HR to vary with log(time)

Q1: Strategy for PH violation:

Two strategies were used. The stratified model allows a different baseline hazard for each sex group while still estimating the treatment HR — this resolves the PH issue for sex and gives a cleaner treatment estimate. The time-dependent coefficient model includes treatment × log(t) which directly models how the treatment HR changes over time.

Q2: Final refined model:

The stratified Cox model is our preferred refined model. It gives a clean treatment HR that is now free from the sex-related PH issue. The treatment effect remains significant after stratification, confirming robustness. Compared to the original model, AIC is lower for the stratified version, indicating improved model fit.

Q3: Comparison with initial model:

The treatment HR from the stratified model is consistent with the original multivariable estimate (~2.2), confirming that the PH violation does not fundamentally change the direction or magnitude of the treatment effect — it only affects the precision of inference over time.

Step 6: Comprehensive Interpretation and Synthesis

KM vs best parametric model overlay

weibull_fit <- flexsurvreg(surv_obj ~ 1, data = rats_clean, dist = "weibull")
t_plot      <- seq(0.1, max(rats_clean$time), length.out = 300)
wb_surv     <- summary(weibull_fit, t = t_plot, type = "survival")[[1]]

plot(km_overall, col = "black", lwd = 2.5, conf.int = FALSE,
     xlab = "Time (days)", ylab = "S(t)",
     main = "KM Curve vs Best Parametric Model (Weibull)",
     ylim = c(0, 1))
lines(wb_surv$time, wb_surv$est, col = "#F18F01", lwd = 2, lty = 2)
legend("bottomleft",
       legend = c("Kaplan–Meier", "Weibull (parametric)"),
       col    = c("black", "#F18F01"),
       lwd    = 2, lty = c(1, 2), bty = "n")
grid()

Q1: KM vs Weibull:

The Weibull survival curve closely tracks the KM estimate throughout the follow-up period. Both curves remain above 0.75 throughout the study, consistent with the low 14% event rate. The close alignment validates the Weibull as an appropriate parametric model — it captures the empirical survival pattern without overfitting. The PP-plot from Project 1 further confirmed this good agreement.

Nelson–Aalen hazard vs Weibull hazard

The overlay plot in Step 2 shows that the smoothed Nelson–Aalen hazard and the Weibull parametric hazard follow the same increasing pattern and accelerate at a similar rate after day 75.

Q2: Alignment of hazard estimates:

The non-parametric hazard trend aligns closely with the Weibull hazard, with both showing a monotonically increasing pattern consistent with k = 3.667 > 1. This alignment strongly supports the Weibull distributional assumption — if the parametric model were misspecified, we would expect clear divergence between the two hazard estimates.

Connecting hypothesis tests to Cox model

data.frame(
  Method       = c("Log-Rank test", "Wilcoxon test",
                   "Univariable Cox", "Multivariable Cox"),
  Treatment_p  = round(c(pval_lr_trt, pval_wc_trt,
                          summary(cox_rx)$coefficients[, "Pr(>|z|)"],
                          summary(cox_multi)$coefficients["treatmentTreated",
                                                          "Pr(>|z|)"]), 4),
  Sex_p        = c(round(pval_lr_sex, 4), round(pval_wc_sex, 4),
                   round(summary(cox_sex)$coefficients[, "Pr(>|z|)"], 4),
                   round(summary(cox_multi)$coefficients["sexm",
                                                         "Pr(>|z|)"], 4))
) %>%
  style_table("Table 15: Consistency of p-values Across Methods")

Table 15: Consistency of p-values Across Methods
Method	Treatment_p	Sex_p
Log-Rank test	0.0185	0
Wilcoxon test	0.0251	0
Univariable Cox	0.0208	0
Multivariable Cox	0.0106	0

Q3: Relationship between tests and Cox model:

All methods give consistent conclusions. The log-rank p-value for treatment (0.018) closely matches the univariable Cox p-value (0.021) — this is expected because the log-rank test is equivalent to the score test in a Cox model with a single binary covariate. The Wilcoxon p-value is slightly larger for treatment (0.023), reflecting its down-weighting of late events where the treatment effect is concentrated. For sex, all four methods give p < 0.001, confirming this is the most robust finding in the analysis.

Overall survival summary

data.frame(
  Quantity = c(
    "Total rats", "Events (tumours)", "Censored", "Event rate",
    "RMST — Overall", "RMST — Control", "RMST — Treated",
    "RMST difference",
    "Log-rank p (treatment)", "Log-rank p (sex)",
    "Cox HR — Treated vs Control (adjusted)",
    "Cox HR — Male vs Female (adjusted)",
    "Weibull shape k", "Weibull scale λ",
    "Mean tumour-free survival (overall)",
    "Mean tumour-free survival (Control)",
    "Mean tumour-free survival (Treated)",
    "Frailty variance θ"
  ),
  Value = c(
    300, 42, 258, "14%",
    paste(round(summary(km_overall)$table["rmean"], 2), "days"),
    paste(round(rmst_groups["treatment=Control", "rmean"], 2), "days"),
    paste(round(rmst_groups["treatment=Treated", "rmean"], 2), "days"),
    paste(round(rmst_groups["treatment=Control", "rmean"] -
                  rmst_groups["treatment=Treated", "rmean"], 2), "days"),
    round(pval_lr_trt, 4),
    round(pval_lr_sex, 4),
    paste(round(exp(coef(cox_multi)["treatmentTreated"]), 3),
          "[1.203, 4.044]"),
    paste(round(exp(coef(cox_multi)["sexm"]), 3), "[0.011, 0.193]"),
    "3.667", "160.6",
    "~150.6 days",
    "~173.2 days",
    "~124.8 days",
    "2.02"
  )
) %>%
  style_table("Table 16: Comprehensive Results Summary")

Table 16: Comprehensive Results Summary
Quantity	Value
Total rats	300
Events (tumours)	42
Censored	258
Event rate	14%
RMST — Overall	99.78 days
RMST — Control	100.38 days
RMST — Treated	98.55 days
RMST difference	1.83 days
Log-rank p (treatment)	0.0185
Log-rank p (sex)	0
Cox HR — Treated vs Control (adjusted)	2.206 [1.203, 4.044]
Cox HR — Male vs Female (adjusted)	0.047 [0.011, 0.193]
Weibull shape k	3.667
Weibull scale λ	160.6
Mean tumour-free survival (overall)	~150.6 days
Mean tumour-free survival (Control)	~173.2 days
Mean tumour-free survival (Treated)	~124.8 days
Frailty variance θ	2.02

Q4: Overall survival experience:

The median survival was not reached in either group due to the low event rate (14%). On average, rats remained tumour-free for approximately 99.8 days out of the 104-day study period (RMST). Control rats lasted 1.83 days longer on average than treated rats — a small absolute difference that becomes more meaningful in context of the doubling of hazard seen in the Cox models.

Q5: Nature of the hazard:

The hazard is monotonically increasing over time, confirmed by both the non-parametric Nelson–Aalen estimate and the Weibull shape parameter k = 3.667. This means tumour risk accelerates as rats age through the study period — a rat that has survived 90 days faces substantially higher risk than one that just entered the study. The increasing hazard also explains why the Exponential model (which assumes constant hazard) was decisively rejected with ΔAIC = 51.3 in Project 1.

Q6: Key predictors from the final Cox model:

Two variables are strong independent predictors:

Treatment (HR = 2.21, 95% CI [1.20, 4.04], p = 0.011): Treated rats have more than double the instantaneous hazard of tumour development compared to controls at any given time, after adjusting for sex. This directly reflects the effect of the carcinogenic agent.
Sex (HR = 0.047, 95% CI [0.011, 0.193], p < 0.001): Male rats have a 95.3% lower hazard than female rats. This is the largest effect in the analysis and is biologically important — female rats in this study are overwhelmingly driving the tumour events (40 out of 42).

Q7: Clinical and practical implications:

The carcinogenic drug significantly accelerates tumour onset. The frailty variance θ = 2.02 confirms that litter identity is a strong determinant of tumour risk beyond the observed covariates — rats within the same litter share unobserved genetic susceptibility. Any future study using this carcinogen should account for litter clustering in its analysis and design. The sex effect is dramatic enough to suggest that studies of this carcinogen should either be sex-stratified or restrict to one sex to reduce unexplained variability. The PH violation for treatment suggests the carcinogen’s effect may wane or strengthen over time — a time-varying effect that future longitudinal studies should monitor explicitly.

Session Info

sessionInfo()

## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 20.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  LAPACK version 3.9.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## time zone: UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gridExtra_2.3    broom_1.0.11     kableExtra_1.4.0 knitr_1.51      
##  [5] dplyr_1.2.0      survminer_0.5.2  ggpubr_0.6.3     ggplot2_4.0.2   
##  [9] flexsurv_2.3.2   survival_3.8-6  
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6        xfun_0.55           bslib_0.9.0        
##  [4] rstatix_0.7.3       lattice_0.22-9      numDeriv_2016.8-1.1
##  [7] quadprog_1.5-8      vctrs_0.7.1         tools_4.5.3        
## [10] generics_0.1.4      tibble_3.3.0        pkgconfig_2.0.3    
## [13] Matrix_1.7-4        data.table_1.18.2.1 RColorBrewer_1.1-3 
## [16] S7_0.2.1            lifecycle_1.0.5     compiler_4.5.3     
## [19] farver_2.1.2        stringr_1.6.0       textshaping_1.0.4  
## [22] statmod_1.5.1       carData_3.0-6       mstate_0.3.3       
## [25] htmltools_0.5.9     sass_0.4.10         yaml_2.3.12        
## [28] Formula_1.2-5       pillar_1.11.1       car_3.1-5          
## [31] jquerylib_0.1.4     tidyr_1.3.2         cachem_1.1.0       
## [34] muhaz_1.2.6.4       abind_1.4-8         tidyselect_1.2.1   
## [37] digest_0.6.39       mvtnorm_1.3-3       stringi_1.8.7      
## [40] purrr_1.2.0         labeling_0.4.3      splines_4.5.3      
## [43] cowplot_1.2.0       fastmap_1.2.0       grid_4.5.3         
## [46] cli_3.6.5           magrittr_2.0.4      dichromat_2.0-0.1  
## [49] withr_3.0.2         scales_1.4.0        backports_1.5.0    
## [52] rmarkdown_2.30      ggtext_0.1.2        otel_0.2.0         
## [55] deSolve_1.41        ggsignif_0.6.4      evaluate_1.0.5     
## [58] viridisLite_0.4.2   rlang_1.1.7         gridtext_0.1.6     
## [61] Rcpp_1.1.0          glue_1.8.0          xml2_1.5.1         
## [64] svglite_2.2.2       rstudioapi_0.17.1   jsonlite_2.0.0     
## [67] R6_2.6.1            systemfonts_1.3.1

# create output folders
if (!dir.exists("outputs"))        dir.create("outputs")
if (!dir.exists("outputs/plots"))  dir.create("outputs/plots")
if (!dir.exists("outputs/tables")) dir.create("outputs/tables")

# ── PLOTS ──────────────────────────────────────────────────────────────────

# KM overall
png("outputs/plots/km_overall.png", width = 900, height = 700, res = 120)
print(ggsurvplot(
  km_overall, data = rats_clean, conf.int = TRUE, risk.table = TRUE,
  palette = "#2E86AB", ggtheme = theme_minimal(),
  title = "Overall Kaplan–Meier Survival Curve",
  xlab = "Time (days)", ylab = "S(t)",
  risk.table.title = "Number at Risk"
))
dev.off()

## png 
##   2

# KM by treatment
png("outputs/plots/km_by_treatment.png", width = 900, height = 700, res = 120)
print(ggsurvplot(
  km_treatment, data = rats_clean, conf.int = TRUE, risk.table = TRUE,
  pval = TRUE, palette = c("#2E86AB", "#F18F01"),
  legend.labs = c("Control", "Treated"), legend.title = "Group",
  ggtheme = theme_minimal(),
  title = "Kaplan–Meier Curves by Treatment Group",
  xlab = "Time (days)", ylab = "S(t)", surv.median.line = "none",
  tables.theme = theme_cleantable()
))
dev.off()

## png 
##   2

# KM by sex
png("outputs/plots/km_by_sex.png", width = 900, height = 700, res = 120)
print(ggsurvplot(
  km_sex, data = rats_clean, conf.int = TRUE, risk.table = TRUE,
  pval = TRUE, palette = c("#A23B72", "#3B7080"),
  legend.labs = c("Female", "Male"), legend.title = "Sex",
  ggtheme = theme_minimal(),
  title = "Kaplan–Meier Curves by Sex",
  xlab = "Time (days)", ylab = "S(t)",
  tables.theme = theme_cleantable()
))
dev.off()

## png 
##   2

# Nelson-Aalen cumulative hazard overall
png("outputs/plots/nelson_aalen_overall.png", width = 900, height = 600, res = 120)
print(ggsurvplot(
  na_overall, data = rats_clean, fun = "cumhaz", conf.int = TRUE,
  palette = "#2E86AB", ggtheme = theme_minimal(),
  title = "Nelson–Aalen Cumulative Hazard",
  xlab = "Time (days)", ylab = "H(t)"
))
dev.off()

## png 
##   2

# Nelson-Aalen by treatment
png("outputs/plots/nelson_aalen_by_treatment.png", width = 900, height = 600, res = 120)
print(ggsurvplot(
  na_rx, data = rats_clean, fun = "cumhaz", conf.int = TRUE,
  palette = c("#2E86AB", "#F18F01"),
  legend.labs = c("Control", "Treated"), legend.title = "Group",
  ggtheme = theme_minimal(),
  title = "Cumulative Hazard by Treatment Group",
  xlab = "Time (days)", ylab = "H(t)"
))
dev.off()

## png 
##   2

# Smoothed hazard vs Weibull
png("outputs/plots/hazard_na_vs_weibull.png", width = 900, height = 600, res = 120)
plot(na_data$time, na_data$smooth_haz,
     type = "l", col = "#2E86AB", lwd = 2,
     xlab = "Time (days)", ylab = "h(t)",
     main = "Nelson–Aalen vs Weibull Hazard")
lines(weibull_h$time, weibull_h$est,
      col = "#F18F01", lwd = 2, lty = 2)
legend("topleft",
       legend = c("Nelson–Aalen (smoothed)", "Weibull (parametric)"),
       col = c("#2E86AB", "#F18F01"), lwd = 2, lty = c(1, 2), bty = "n")
grid()
dev.off()

## png 
##   2

# Forest plot
png("outputs/plots/forest_plot.png", width = 900, height = 500, res = 120)
print(ggforest(cox_multi, data = rats_clean))
dev.off()

## png 
##   2

# Schoenfeld residuals
png("outputs/plots/schoenfeld_residuals.png", width = 900, height = 500, res = 120)
par(mfrow = c(1, 2))
plot(ph_test, var = 1, main = "Schoenfeld — Treatment",
     ylab = "Beta(t)", xlab = "Time (days)", col = "#F18F01", lwd = 2)
abline(h = coef(cox_multi)[1], col = "red", lty = 2, lwd = 2)
plot(ph_test, var = 2, main = "Schoenfeld — Sex",
     ylab = "Beta(t)", xlab = "Time (days)", col = "#A23B72", lwd = 2)
abline(h = coef(cox_multi)[2], col = "red", lty = 2, lwd = 2)
par(mfrow = c(1, 1))
dev.off()

## png 
##   2

# dfbeta influential observations
png("outputs/plots/dfbeta_residuals.png", width = 900, height = 500, res = 120)
par(mfrow = c(1, 2))
plot(dfb[, 1], ylab = "dfbeta (treatment)",
     main = "Influence on Treatment Coefficient",
     pch = 16, cex = 0.6, col = "#2E86AB")
abline(h = 0, lty = 2, col = "red")
plot(dfb[, 2], ylab = "dfbeta (sex)",
     main = "Influence on Sex Coefficient",
     pch = 16, cex = 0.6, col = "#A23B72")
abline(h = 0, lty = 2, col = "red")
par(mfrow = c(1, 1))
dev.off()

## png 
##   2

# Martingale and deviance residuals
png("outputs/plots/martingale_deviance.png", width = 900, height = 500, res = 120)
par(mfrow = c(1, 2))
plot(rats_clean$time, mart_null,
     pch = 16, cex = 0.5, col = "#2E86AB",
     xlab = "Time (days)", ylab = "Martingale Residual",
     main = "Martingale Residuals vs Time")
lines(lowess(rats_clean$time, mart_null), col = "red", lwd = 2)
abline(h = 0, lty = 2)
plot(dev_resid, pch = 16, cex = 0.5, col = "#43A047",
     ylab = "Deviance Residual", main = "Deviance Residuals")
abline(h = c(-2, 0, 2), lty = c(2, 1, 2), col = c("red", "black", "red"))
par(mfrow = c(1, 1))
dev.off()

## png 
##   2

# KM vs Weibull overlay
png("outputs/plots/km_vs_weibull.png", width = 900, height = 600, res = 120)
plot(km_overall, col = "black", lwd = 2.5, conf.int = FALSE,
     xlab = "Time (days)", ylab = "S(t)",
     main = "KM Curve vs Best Parametric Model (Weibull)", ylim = c(0, 1))
lines(wb_surv$time, wb_surv$est, col = "#F18F01", lwd = 2, lty = 2)
legend("bottomleft",
       legend = c("Kaplan–Meier", "Weibull (parametric)"),
       col = c("black", "#F18F01"), lwd = 2, lty = c(1, 2), bty = "n")
grid()
dev.off()

## png 
##   2

# ── TABLES ─────────────────────────────────────────────────────────────────

write.csv(data.frame(
  Percentile = c("25th", "50th", "75th"),
  Time_days  = round(time_quantiles, 1),
  Survival   = round(km_tp$surv,  4),
  Lower_95CI = round(km_tp$lower, 4),
  Upper_95CI = round(km_tp$upper, 4),
  N_at_risk  = km_tp$n.risk
), "outputs/tables/survival_at_timepoints.csv", row.names = FALSE)

write.csv(data.frame(
  Method      = c("Log-Rank", "Wilcoxon"),
  Treatment_p = round(c(pval_lr_trt, pval_wc_trt), 4),
  Sex_p       = round(c(pval_lr_sex, pval_wc_sex), 4)
), "outputs/tables/hypothesis_tests.csv", row.names = FALSE)

write.csv(
  tidy(cox_multi, exponentiate = TRUE, conf.int = TRUE) %>%
    mutate(across(where(is.numeric), ~round(.x, 3))),
  "outputs/tables/cox_multivariable.csv", row.names = FALSE
)

write.csv(
  tidy(cox_strat, exponentiate = TRUE, conf.int = TRUE) %>%
    mutate(across(where(is.numeric), ~round(.x, 3))),
  "outputs/tables/cox_stratified.csv", row.names = FALSE
)

write.csv(
  tidy(cox_td, exponentiate = TRUE, conf.int = TRUE) %>%
    mutate(across(where(is.numeric), ~round(.x, 3))),
  "outputs/tables/cox_time_dependent.csv", row.names = FALSE
)

write.csv(
  as.data.frame(ph_test$table) %>%
    mutate(Variable = rownames(.), across(where(is.numeric), ~round(.x, 4))),
  "outputs/tables/ph_schoenfeld_test.csv", row.names = FALSE
)

cat("All plots saved to outputs/plots/\n")

## All plots saved to outputs/plots/

cat("All tables saved to outputs/tables/\n")

## All tables saved to outputs/tables/

cat("\nPlots saved:\n")

## 
## Plots saved:

cat(paste(" -", list.files("outputs/plots")), sep = "\n")

##  - dfbeta_residuals.png
##  - forest_plot.png
##  - hazard_na_vs_weibull.png
##  - km_by_sex.png
##  - km_by_treatment.png
##  - km_overall.png
##  - km_vs_weibull.png
##  - martingale_deviance.png
##  - nelson_aalen_by_treatment.png
##  - nelson_aalen_overall.png
##  - schoenfeld_residuals.png

cat("\nTables saved:\n")

## 
## Tables saved:

cat(paste(" -", list.files("outputs/tables")), sep = "\n")

##  - cox_multivariable.csv
##  - cox_stratified.csv
##  - cox_time_dependent.csv
##  - hypothesis_tests.csv
##  - ph_schoenfeld_test.csv
##  - survival_at_timepoints.csv

Advanced Survival Analysis — Rats Dataset

Walter Nyamutamba, Samuel Agyekum, Honorine Mujawayezu, Belyse Kaneza, Grace Kitonyi

2026-03-28