This report extends the parametric survival analysis from Project 1
by applying non-parametric and semi-parametric methods to the
rats dataset. We compare Kaplan–Meier and Nelson–Aalen
estimates, perform formal hypothesis tests, fit Cox proportional hazards
models with full diagnostics, and synthesise findings across all three
analytical frameworks.
library(survival)
library(flexsurv)
library(survminer)
library(ggplot2)
library(dplyr)
library(knitr)
library(kableExtra)
library(broom)
library(gridExtra)
style_table <- function(df, caption) {
df %>%
kbl(caption = caption) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE
) %>%
row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")
}data(rats)
rats <- rats %>%
mutate(
litter = as.factor(litter),
event = as.numeric(status),
treatment = factor(ifelse(rx == 1, "Treated", "Control"),
levels = c("Control", "Treated")),
sex = as.factor(sex)
)
rats_clean <- rats %>%
select(litter, time, event, treatment, sex) %>%
na.omit()
surv_obj <- Surv(time = rats_clean$time, event = rats_clean$event)## litter rx time status sex event treatment
## 1 1 1 101 0 f 0 Treated
## 2 1 0 49 1 f 1 Control
## 3 1 0 104 0 f 0 Control
## 4 2 1 91 0 m 0 Treated
## 5 2 0 104 0 m 0 Control
## 6 2 0 102 0 m 0 Control
## 7 3 1 104 0 f 0 Treated
## 8 3 0 102 0 f 0 Control
## 9 3 0 104 0 f 0 Control
## 10 4 1 91 0 m 0 Treated
## 11 4 0 104 0 m 0 Control
## 12 4 0 102 0 m 0 Control
## 13 5 1 104 0 f 0 Treated
## 14 5 0 104 0 f 0 Control
## 15 5 0 104 0 f 0 Control
## 16 6 1 98 0 m 0 Treated
## 17 6 0 62 0 m 0 Control
## 18 6 0 77 0 m 0 Control
## 19 7 1 77 0 f 0 Treated
## 20 7 0 97 0 f 0 Control
## 21 7 0 79 0 f 0 Control
## 22 8 1 91 0 m 0 Treated
## 23 8 0 98 0 m 0 Control
## 24 8 0 76 0 m 0 Control
## 25 9 1 89 0 f 0 Treated
## 26 9 0 104 0 f 0 Control
## 27 9 0 104 0 f 0 Control
## 28 10 1 104 0 m 0 Treated
## 29 10 0 104 0 m 0 Control
## 30 10 0 98 0 m 0 Control
## 31 11 1 88 1 f 1 Treated
## 32 11 0 96 1 f 1 Control
## 33 11 0 104 0 f 0 Control
## 34 12 1 96 0 m 0 Treated
## 35 12 0 71 0 m 0 Control
## 36 12 0 91 0 m 0 Control
## 37 13 1 104 1 f 1 Treated
## 38 13 0 94 0 f 0 Control
## 39 13 0 77 1 f 1 Control
## 40 14 1 79 0 m 0 Treated
## 41 14 0 104 0 m 0 Control
## 42 14 0 99 0 m 0 Control
## 43 15 1 96 1 f 1 Treated
## 44 15 0 104 0 f 0 Control
## 45 15 0 104 0 f 0 Control
## 46 16 1 61 0 m 0 Treated
## 47 16 0 88 0 m 0 Control
## 48 16 0 85 0 m 0 Control
## 49 17 1 82 0 f 0 Treated
## 50 17 0 77 0 f 0 Control
## 51 17 0 104 0 f 0 Control
## 52 18 1 63 0 m 0 Treated
## 53 18 0 104 0 m 0 Control
## 54 18 0 102 0 m 0 Control
## 55 19 1 70 1 f 1 Treated
## 56 19 0 104 0 f 0 Control
## 57 19 0 77 0 f 0 Control
## 58 20 1 104 0 m 0 Treated
## 59 20 0 104 0 m 0 Control
## 60 20 0 102 0 m 0 Control
## 61 21 1 89 1 f 1 Treated
## 62 21 0 91 0 f 0 Control
## 63 21 0 90 0 f 0 Control
## 64 22 1 104 0 m 0 Treated
## 65 22 0 80 0 m 0 Control
## 66 22 0 92 0 m 0 Control
## 67 23 1 91 0 f 0 Treated
## 68 23 0 70 0 f 0 Control
## 69 23 0 92 0 f 0 Control
## 70 24 1 104 0 m 0 Treated
## 71 24 0 104 0 m 0 Control
## 72 24 0 101 0 m 0 Control
## 73 25 1 39 1 f 1 Treated
## 74 25 0 45 0 f 0 Control
## 75 25 0 50 1 f 1 Control
## 76 26 1 104 0 m 0 Treated
## 77 26 0 53 0 m 0 Control
## 78 26 0 102 0 m 0 Control
## 79 27 1 103 1 f 1 Treated
## 80 27 0 69 0 f 0 Control
## 81 27 0 91 0 f 0 Control
## 82 28 1 65 0 m 0 Treated
## 83 28 0 104 0 m 0 Control
## 84 28 0 91 0 m 0 Control
## 85 29 1 93 0 f 0 Treated
## 86 29 0 104 0 f 0 Control
## 87 29 0 103 0 f 0 Control
## 88 30 1 86 0 m 0 Treated
## 89 30 0 104 0 m 0 Control
## 90 30 0 75 0 m 0 Control
## 91 31 1 85 0 f 0 Treated
## 92 31 0 72 0 f 0 Control
## 93 31 0 104 0 f 0 Control
## 94 32 1 104 0 m 0 Treated
## 95 32 0 100 0 m 0 Control
## 96 32 0 102 0 m 0 Control
## 97 33 1 104 0 f 0 Treated
## 98 33 0 63 0 f 0 Control
## 99 33 0 104 0 f 0 Control
## 100 34 1 95 0 m 0 Treated
## 101 34 0 104 0 m 0 Control
## 102 34 0 95 0 m 0 Control
## 103 35 1 104 0 f 0 Treated
## 104 35 0 104 0 f 0 Control
## 105 35 0 74 0 f 0 Control
## 106 36 1 104 0 m 0 Treated
## 107 36 0 104 0 m 0 Control
## 108 36 0 102 0 m 0 Control
## 109 37 1 81 0 f 0 Treated
## 110 37 0 104 0 f 0 Control
## 111 37 0 69 0 f 0 Control
## 112 38 1 104 0 m 0 Treated
## 113 38 0 93 0 m 0 Control
## 114 38 0 80 0 m 0 Control
## 115 39 1 67 1 f 1 Treated
## 116 39 0 104 0 f 0 Control
## 117 39 0 68 1 f 1 Control
## 118 40 1 92 0 m 0 Treated
## 119 40 0 98 0 m 0 Control
## 120 40 0 83 0 m 0 Control
## 121 41 1 104 0 f 0 Treated
## 122 41 0 104 0 f 0 Control
## 123 41 0 104 0 f 0 Control
## 124 42 1 104 0 m 0 Treated
## 125 42 0 89 0 m 0 Control
## 126 42 0 89 0 m 0 Control
## 127 43 1 104 0 f 0 Treated
## 128 43 0 104 0 f 0 Control
## 129 43 0 104 0 f 0 Control
## 130 44 1 63 0 m 0 Treated
## 131 44 0 32 0 m 0 Control
## 132 44 0 51 0 m 0 Control
## 133 45 1 104 0 f 0 Treated
## 134 45 0 83 0 f 0 Control
## 135 45 0 40 1 f 1 Control
## 136 46 1 104 0 m 0 Treated
## 137 46 0 98 0 m 0 Control
## 138 46 0 78 0 m 0 Control
## 139 47 1 87 0 f 0 Treated
## 140 47 0 104 0 f 0 Control
## 141 47 0 104 0 f 0 Control
## 142 48 1 104 0 m 0 Treated
## 143 48 0 104 0 m 0 Control
## 144 48 0 102 0 m 0 Control
## 145 49 1 104 0 f 0 Treated
## 146 49 0 104 0 f 0 Control
## 147 49 0 104 0 f 0 Control
## 148 50 1 87 0 m 0 Treated
## 149 50 0 104 0 m 0 Control
## 150 50 0 94 0 m 0 Control
## 151 51 1 89 0 f 0 Treated
## 152 51 0 104 0 f 0 Control
## 153 51 0 104 0 f 0 Control
## 154 52 1 104 0 m 0 Treated
## 155 52 0 104 0 m 0 Control
## 156 52 0 102 0 m 0 Control
## 157 53 1 78 0 f 0 Treated
## 158 53 0 104 0 f 0 Control
## 159 53 0 104 0 f 0 Control
## 160 54 1 104 0 m 0 Treated
## 161 54 0 91 0 m 0 Control
## 162 54 0 102 0 m 0 Control
## 163 55 1 104 0 f 0 Treated
## 164 55 0 81 1 f 1 Control
## 165 55 0 64 1 f 1 Control
## 166 56 1 90 0 m 0 Treated
## 167 56 0 104 0 m 0 Control
## 168 56 0 55 0 m 0 Control
## 169 57 1 86 1 f 1 Treated
## 170 57 0 55 1 f 1 Control
## 171 57 0 94 0 f 0 Control
## 172 58 1 91 0 m 0 Treated
## 173 58 0 104 0 m 0 Control
## 174 58 0 102 0 m 0 Control
## 175 59 1 34 1 f 1 Treated
## 176 59 0 104 0 f 0 Control
## 177 59 0 54 1 f 1 Control
## 178 60 1 104 0 m 0 Treated
## 179 60 0 104 0 m 0 Control
## 180 60 0 102 0 m 0 Control
## 181 61 1 76 0 f 0 Treated
## 182 61 0 87 0 f 0 Control
## 183 61 0 74 0 f 0 Control
## 184 62 1 23 0 m 0 Treated
## 185 62 0 104 0 m 0 Control
## 186 62 0 102 0 m 0 Control
## 187 63 1 103 1 f 1 Treated
## 188 63 0 73 1 f 1 Control
## 189 63 0 84 1 f 1 Control
## 190 64 1 104 0 m 0 Treated
## 191 64 0 71 1 m 1 Control
## 192 64 0 90 0 m 0 Control
## 193 65 1 102 1 f 1 Treated
## 194 65 0 104 0 f 0 Control
## 195 65 0 80 0 f 0 Control
## 196 66 1 87 0 m 0 Treated
## 197 66 0 51 0 m 0 Control
## 198 66 0 102 0 m 0 Control
## 199 67 1 80 1 f 1 Treated
## 200 67 0 104 0 f 0 Control
## 201 67 0 73 0 f 0 Control
## 202 68 1 104 0 m 0 Treated
## 203 68 0 83 0 m 0 Control
## 204 68 0 102 0 m 0 Control
## 205 69 1 45 1 f 1 Treated
## 206 69 0 79 0 f 0 Control
## 207 69 0 104 0 f 0 Control
## 208 70 1 104 0 m 0 Treated
## 209 70 0 104 0 m 0 Control
## 210 70 0 96 0 m 0 Control
## 211 71 1 94 1 f 1 Treated
## 212 71 0 104 0 f 0 Control
## 213 71 0 104 0 f 0 Control
## 214 72 1 67 0 m 0 Treated
## 215 72 0 84 0 m 0 Control
## 216 72 0 94 0 m 0 Control
## 217 73 1 104 0 f 0 Treated
## 218 73 0 104 0 f 0 Control
## 219 73 0 104 0 f 0 Control
## 220 74 1 104 0 m 0 Treated
## 221 74 0 104 0 m 0 Control
## 222 74 0 99 0 m 0 Control
## 223 75 1 104 0 f 0 Treated
## 224 75 0 101 1 f 1 Control
## 225 75 0 94 0 f 0 Control
## 226 76 1 104 0 m 0 Treated
## 227 76 0 94 0 m 0 Control
## 228 76 0 102 0 m 0 Control
## 229 77 1 76 0 f 0 Treated
## 230 77 0 84 1 f 1 Control
## 231 77 0 78 1 f 1 Control
## 232 78 1 104 0 m 0 Treated
## 233 78 0 103 0 m 0 Control
## 234 78 0 102 0 m 0 Control
## 235 79 1 80 1 f 1 Treated
## 236 79 0 81 1 f 1 Control
## 237 79 0 76 0 f 0 Control
## 238 80 1 51 0 m 0 Treated
## 239 80 0 104 0 m 0 Control
## 240 80 0 91 0 m 0 Control
## 241 81 1 72 1 f 1 Treated
## 242 81 0 95 0 f 0 Control
## 243 81 0 104 0 f 0 Control
## 244 82 1 102 0 m 0 Treated
## 245 82 0 98 0 m 0 Control
## 246 82 0 102 0 m 0 Control
## 247 83 1 73 1 f 1 Treated
## 248 83 0 104 0 f 0 Control
## 249 83 0 66 1 f 1 Control
## 250 84 1 88 0 m 0 Treated
## 251 84 0 54 0 m 0 Control
## 252 84 0 39 0 m 0 Control
## 253 85 1 92 1 f 1 Treated
## 254 85 0 104 0 f 0 Control
## 255 85 0 102 1 f 1 Control
## 256 86 1 67 0 m 0 Treated
## 257 86 0 84 0 m 0 Control
## 258 86 0 54 0 m 0 Control
## 259 87 1 104 0 f 0 Treated
## 260 87 0 98 0 f 0 Control
## 261 87 0 73 0 f 0 Control
## 262 88 1 104 0 m 0 Treated
## 263 88 0 104 0 m 0 Control
## 264 88 0 87 0 m 0 Control
## 265 89 1 55 0 f 0 Treated
## 266 89 0 104 0 f 0 Control
## 267 89 0 104 0 f 0 Control
## 268 90 1 81 0 m 0 Treated
## 269 90 0 82 0 m 0 Control
## 270 90 0 102 0 m 0 Control
## 271 91 1 49 0 f 0 Treated
## 272 91 0 83 0 f 0 Control
## 273 91 0 77 0 f 0 Control
## 274 92 1 94 0 m 0 Treated
## 275 92 0 104 0 m 0 Control
## 276 92 0 102 0 m 0 Control
## 277 93 1 89 1 f 1 Treated
## 278 93 0 104 0 f 0 Control
## 279 93 0 104 0 f 0 Control
## 280 94 1 104 0 m 0 Treated
## 281 94 0 89 0 m 0 Control
## 282 94 0 77 0 m 0 Control
## 283 95 1 88 0 f 0 Treated
## 284 95 0 79 0 f 0 Control
## 285 95 0 99 0 f 0 Control
## 286 96 1 104 0 m 0 Treated
## 287 96 0 69 0 m 0 Control
## 288 96 0 102 0 m 0 Control
## 289 97 1 103 1 f 1 Treated
## 290 97 0 91 0 f 0 Control
## 291 97 0 104 0 f 0 Control
## 292 98 1 104 0 m 0 Treated
## 293 98 0 75 1 m 1 Control
## 294 98 0 64 0 m 0 Control
## 295 99 1 104 0 f 0 Treated
## 296 99 0 104 0 f 0 Control
## 297 99 0 79 1 f 1 Control
## 298 100 1 92 0 m 0 Treated
## 299 100 0 104 0 m 0 Control
## 300 100 0 102 0 m 0 Control
km_overall <- survfit(surv_obj ~ 1, data = rats_clean)
ggsurvplot(
km_overall,
data = rats_clean,
conf.int = TRUE,
risk.table = TRUE,
palette = "#2E86AB",
ggtheme = theme_minimal(),
title = "Overall Kaplan–Meier Survival Curve",
xlab = "Time (days)",
ylab = "Survival Probability S(t)",
risk.table.title = "Number at Risk"
)km_tab <- summary(km_overall)$table
data.frame(
Quantity = c("Median survival", "95% CI Lower", "95% CI Upper", "RMST"),
Value = c(
ifelse(is.na(km_tab["median"]), "Not reached", km_tab["median"]),
ifelse(is.na(km_tab["0.95LCL"]), "Not reached", km_tab["0.95LCL"]),
ifelse(is.na(km_tab["0.95UCL"]), "Not reached", km_tab["0.95UCL"]),
paste(round(km_tab["rmean"], 2), "days")
)
) %>%
style_table("Table 1: Overall Median Survival and RMST")| Quantity | Value | |
|---|---|---|
| median | Median survival | Not reached |
| 0.95LCL | 95% CI Lower | Not reached |
| 0.95UCL | 95% CI Upper | Not reached |
| RMST | 99.78 days |
Q1: What is the median survival time?
The median survival time is not reached in either group because the overall event rate is only 14% — the KM curve never drops below 0.5, so the median is mathematically undefined. We report the Restricted Mean Survival Time (RMST) instead: the average rat remained tumour-free for 99.78 days out of the 104-day study period.
time_quantiles <- quantile(rats_clean$time, probs = c(0.25, 0.50, 0.75))
km_tp <- summary(km_overall, times = as.numeric(time_quantiles))
data.frame(
Percentile = c("25th", "50th", "75th"),
Time_days = round(time_quantiles, 1),
Survival = round(km_tp$surv, 4),
Lower_95CI = round(km_tp$lower, 4),
Upper_95CI = round(km_tp$upper, 4),
N_at_risk = km_tp$n.risk
) %>%
style_table("Table 2: Survival Probabilities at Percentile Time Points")| Percentile | Time_days | Survival | Lower_95CI | Upper_95CI | N_at_risk | |
|---|---|---|---|---|---|---|
| 25% | 25th | 80.8 | 0.9167 | 0.8846 | 0.9500 | 225 |
| 50% | 50th | 98.0 | 0.8612 | 0.8188 | 0.9057 | 153 |
| 75% | 75th | 104.0 | 0.8128 | 0.7607 | 0.8686 | 108 |
Q2: Survival probabilities at key time points:
At the 25th percentile of observed time (80.75 days), approximately 97% of rats are still tumour-free. At the 50th percentile (98 days), about 91% remain tumour-free. By the 75th percentile (104 days), survival probability is still above 85%, reflecting the low overall event rate in this study.
## Levels of treatment: Control Treated
## Table of treatment:
##
## Control Treated
## 200 100
## Class: factor
# force remove any existing rats object and reload completely fresh
if (exists("rats")) rm(rats)
data(rats, package = "survival")
# check what rx actually looks like before recoding
cat("rx unique values:", unique(rats$rx), "\n")## rx unique values: 1 0
## rx class: numeric
## rx table:
##
## 0 1
## 200 100
rats <- rats %>%
mutate(
litter = as.factor(litter),
event = as.numeric(status),
# use as.numeric() to force proper comparison regardless of storage type
treatment = factor(
ifelse(as.numeric(rx) == 1, "Treated", "Control"),
levels = c("Control", "Treated")
),
sex = as.factor(sex)
)
# verify the fix worked
cat("\ntreatment table after recoding:\n")##
## treatment table after recoding:
##
## Control Treated
## 200 100
km_sex <- survfit(surv_obj ~ sex, data = rats_clean)
ggsurvplot(
km_sex,
data = rats_clean,
conf.int = TRUE,
risk.table = TRUE,
pval = TRUE,
pval.method = TRUE,
palette = c("#A23B72", "#3B7080"),
legend.labs = c("Female", "Male"),
legend.title = "Sex",
ggtheme = theme_minimal(),
title = "Kaplan–Meier Curves by Sex",
xlab = "Time (days)",
ylab = "Survival Probability S(t)",
risk.table.title = "Number at Risk",
tables.theme = theme_cleantable()
)# check actual row names first
km_treatment <- survfit(Surv(time, event) ~ treatment, data = rats_clean)
km_overall <- survfit(Surv(time, event) ~ 1, data = rats_clean)
rmst_groups <- summary(km_treatment)$table
cat("Row names in rmst_groups:\n")## Row names in rmst_groups:
## [1] "treatment=Control" "treatment=Treated"
# use actual row names dynamically
ctrl_row <- rownames(rmst_groups)[grep("Control", rownames(rmst_groups))]
trt_row <- rownames(rmst_groups)[grep("Treated", rownames(rmst_groups))]
data.frame(
Group = c("Overall", "Control", "Treated"),
RMST_days = round(c(
summary(km_overall)$table["rmean"],
rmst_groups[ctrl_row, "rmean"],
rmst_groups[trt_row, "rmean"]
), 2),
SE = round(c(
summary(km_overall)$table["se(rmean)"],
rmst_groups[ctrl_row, "se(rmean)"],
rmst_groups[trt_row, "se(rmean)"]
), 4)
) %>%
style_table("Table 3: Restricted Mean Survival Time by Group")| Group | RMST_days | SE |
|---|---|---|
| Overall | 99.78 | 0.7206 |
| Control | 100.38 | 0.8245 |
| Treated | 98.55 | 1.3952 |
Q3: How does survival differ between groups?
The KM curves show clear separation between groups. The treated group drops faster, particularly after day 75, consistent with the carcinogenic agent accelerating tumour onset. The RMST difference is 1.83 days (Control 100.38 vs Treated 98.55). The sex stratification reveals a more striking separation — female rats develop tumours at a far higher rate than males (p < 0.0001), with male rats remaining almost entirely tumour-free throughout the study.
na_overall <- survfit(surv_obj ~ 1, data = rats_clean,
type = "fleming-harrington")
ggsurvplot(
na_overall,
data = rats_clean,
fun = "cumhaz",
conf.int = TRUE,
palette = "#2E86AB",
ggtheme = theme_minimal(),
title = "Nelson–Aalen Cumulative Hazard Estimate",
xlab = "Time (days)",
ylab = "Cumulative Hazard H(t)"
)na_rx <- survfit(surv_obj ~ treatment, data = rats_clean,
type = "fleming-harrington")
ggsurvplot(
na_rx,
data = rats_clean,
fun = "cumhaz",
conf.int = TRUE,
palette = c("#2E86AB", "#F18F01"),
legend.labs = c("Control", "Treated"),
legend.title = "Group",
ggtheme = theme_minimal(),
title = "Cumulative Hazard by Treatment Group",
xlab = "Time (days)",
ylab = "Cumulative Hazard H(t)"
)Q1: Shape of the cumulative hazard:
The cumulative hazard function H(t) is concave upward — it accelerates over time rather than growing linearly. This means the hazard is not constant. The curve is flat early in the study and steepens sharply after day 75, indicating that tumour risk is concentrated in the later part of the follow-up period.
na_data <- data.frame(
time = na_overall$time,
cumhaz = na_overall$cumhaz
)
na_data$delta_H <- c(na_data$cumhaz[1], diff(na_data$cumhaz))
na_data$delta_t <- c(na_data$time[1], diff(na_data$time))
na_data$hazard <- na_data$delta_H / pmax(na_data$delta_t, 1)
loess_fit <- loess(hazard ~ time, data = na_data, span = 0.5)
na_data$smooth_haz <- pmax(predict(loess_fit), 0)
# Weibull parametric hazard for comparison
weibull_overall <- flexsurvreg(surv_obj ~ 1, data = rats_clean, dist = "weibull")
t_seq <- seq(1, max(rats_clean$time), length.out = 200)
weibull_h <- summary(weibull_overall, t = t_seq, type = "hazard")[[1]]
plot(na_data$time, na_data$smooth_haz,
type = "l", col = "#2E86AB", lwd = 2,
xlab = "Time (days)", ylab = "h(t)",
main = "Non-Parametric (Nelson–Aalen) vs Weibull Hazard")
lines(weibull_h$time, weibull_h$est,
col = "#F18F01", lwd = 2, lty = 2)
legend("topleft",
legend = c("Nelson–Aalen (smoothed)", "Weibull (parametric)"),
col = c("#2E86AB", "#F18F01"), lwd = 2, lty = c(1, 2), bty = "n")
grid()Q2: Hazard trend and comparison with Weibull:
The smoothed non-parametric hazard is monotonically increasing — confirming the trend implied by the Weibull shape parameter k = 3.667 from Project 1. Both estimates agree closely in shape and direction. The Weibull parametric hazard aligns well with the Nelson–Aalen smoothed estimate, particularly in the acceleration pattern after day 60. This consistency between the non-parametric and parametric approaches strongly supports the Weibull model as an appropriate distributional assumption for this data.
lr_treatment <- survdiff(surv_obj ~ treatment, data = rats_clean)
lr_sex <- survdiff(surv_obj ~ sex, data = rats_clean)
pval_lr_trt <- 1 - pchisq(lr_treatment$chisq, df = 1)
pval_lr_sex <- 1 - pchisq(lr_sex$chisq, df = 1)
data.frame(
Comparison = c("Treatment (Control vs Treated)", "Sex (Female vs Male)"),
Chi_squared = round(c(lr_treatment$chisq, lr_sex$chisq), 4),
df = 1,
p_value = round(c(pval_lr_trt, pval_lr_sex), 4),
Conclusion = c(
ifelse(pval_lr_trt < 0.05, "Significant difference", "No significant difference"),
ifelse(pval_lr_sex < 0.05, "Significant difference", "No significant difference")
)
) %>%
style_table("Table 4: Log-Rank Test Results")| Comparison | Chi_squared | df | p_value | Conclusion |
|---|---|---|---|---|
| Treatment (Control vs Treated) | 5.5487 | 1 | 0.0185 | Significant difference |
| Sex (Female vs Male) | 35.9075 | 1 | 0.0000 | Significant difference |
wc_treatment <- survdiff(surv_obj ~ treatment, data = rats_clean, rho = 1)
wc_sex <- survdiff(surv_obj ~ sex, data = rats_clean, rho = 1)
pval_wc_trt <- 1 - pchisq(wc_treatment$chisq, df = 1)
pval_wc_sex <- 1 - pchisq(wc_sex$chisq, df = 1)
data.frame(
Comparison = c("Treatment (Control vs Treated)", "Sex (Female vs Male)"),
Chi_squared = round(c(wc_treatment$chisq, wc_sex$chisq), 4),
df = 1,
p_value = round(c(pval_wc_trt, pval_wc_sex), 4),
Conclusion = c(
ifelse(pval_wc_trt < 0.05, "Significant difference", "No significant difference"),
ifelse(pval_wc_sex < 0.05, "Significant difference", "No significant difference")
)
) %>%
style_table("Table 5: Wilcoxon (Breslow) Test Results")| Comparison | Chi_squared | df | p_value | Conclusion |
|---|---|---|---|---|
| Treatment (Control vs Treated) | 5.0189 | 1 | 0.0251 | Significant difference |
| Sex (Female vs Male) | 35.6108 | 1 | 0.0000 | Significant difference |
data.frame(
Test = c("Log-Rank", "Wilcoxon"),
Treatment_p = round(c(pval_lr_trt, pval_wc_trt), 4),
Sex_p = round(c(pval_lr_sex, pval_wc_sex), 4)
) %>%
style_table("Table 6: Log-Rank vs Wilcoxon p-values")| Test | Treatment_p | Sex_p |
|---|---|---|
| Log-Rank | 0.0185 | 0 |
| Wilcoxon | 0.0251 | 0 |
Q1: Log-rank test result:
The log-rank test gives p = 0.0185 for treatment — there is a statistically significant difference in survival between Control and Treated groups. For sex, p < 0.0001, an even stronger result.
Q2: Wilcoxon test and emphasis on early events:
The Wilcoxon test weights early time points more heavily. For treatment, both tests agree (both p < 0.05), though the Wilcoxon p-value is slightly larger. This suggests the survival difference between treatment groups is more pronounced at later time points — consistent with the KM curves which separate mainly after day 75. For sex, both tests strongly agree.
Q3: Consistency of tests:
Both tests yield consistent conclusions for both comparisons. Treatment differences are significant under both (p < 0.05), and sex differences are highly significant under both (p < 0.001). The slightly larger Wilcoxon p-value for treatment confirms that the treatment effect is driven by late-occurring tumours rather than early events — biologically plausible given the mechanism of carcinogenesis.
cox_rx <- coxph(Surv(time, event) ~ treatment, data = rats_clean)
cox_sex <- coxph(Surv(time, event) ~ sex, data = rats_clean)
bind_rows(
tidy(cox_rx, exponentiate = TRUE, conf.int = TRUE),
tidy(cox_sex, exponentiate = TRUE, conf.int = TRUE)
) %>%
mutate(across(where(is.numeric), ~round(.x, 3))) %>%
style_table("Table 7: Univariable Cox Model Results")| term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| treatmentTreated | 2.042 | 0.309 | 2.311 | 0.021 | 1.115 | 3.739 |
| sexm | 0.048 | 0.725 | -4.192 | 0.000 | 0.012 | 0.198 |
Q1: Univariable hazard ratios:
For treatment: HR = 2.042, 95% CI [1.115, 3.739], p = 0.0208. Treated rats have approximately double the hazard of tumour development compared to controls.
For sex: HR = 0.048, 95% CI [0.012, 0.198], p < 0.001. Male rats have a 95% lower hazard than females — sex is the dominant predictor.
cox_multi <- coxph(Surv(time, event) ~ treatment + sex, data = rats_clean)
tidy(cox_multi, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~round(.x, 3))) %>%
style_table("Table 8: Multivariable Cox Model (Treatment + Sex)")| term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| treatmentTreated | 2.206 | 0.309 | 2.557 | 0.011 | 1.203 | 4.044 |
| sexm | 0.047 | 0.725 | -4.232 | 0.000 | 0.011 | 0.193 |
Q2: Multivariable model — adjusted effects:
After adjusting for sex, the treatment HR increases slightly from 2.042 to 2.206, and the p-value strengthens from 0.021 to 0.011. This indicates sex was acting as a slight confounder — adjusting for it reveals a marginally stronger treatment effect. Both treatment and sex remain significant in the multivariable model. Male sex is protective (HR = 0.047), remaining the strongest predictor in the model.
cox_cluster <- coxph(
Surv(time, event) ~ treatment + sex + cluster(litter),
data = rats_clean
)
tidy(cox_cluster, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~round(.x, 3))) %>%
style_table("Table 9: Cox Model with Litter-Clustered Standard Errors")| term | estimate | std.error | robust.se | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|---|
| treatmentTreated | 2.206 | 0.309 | 0.293 | 2.702 | 0.007 | 1.243 | 3.915 |
| sexm | 0.047 | 0.725 | 0.721 | -4.257 | 0.000 | 0.011 | 0.191 |
ph_test <- cox.zph(cox_multi)
as.data.frame(ph_test$table) %>%
mutate(Variable = rownames(.), across(where(is.numeric), ~round(.x, 4))) %>%
select(Variable, everything()) %>%
style_table("Table 10: Schoenfeld Residuals Test (PH Assumption)")| Variable | chisq | df | p | |
|---|---|---|---|---|
| treatment | treatment | 5.3927 | 1 | 0.0202 |
| sex | sex | 0.6019 | 1 | 0.4379 |
| GLOBAL | GLOBAL | 6.1450 | 2 | 0.0463 |
par(mfrow = c(1, 2))
plot(ph_test, var = 1,
main = "Schoenfeld Residuals — Treatment",
ylab = "Beta(t)", xlab = "Time (days)", col = "#F18F01", lwd = 2)
abline(h = coef(cox_multi)[1], col = "red", lty = 2, lwd = 2)
plot(ph_test, var = 2,
main = "Schoenfeld Residuals — Sex",
ylab = "Beta(t)", xlab = "Time (days)", col = "#A23B72", lwd = 2)
abline(h = coef(cox_multi)[2], col = "red", lty = 2, lwd = 2)Q3: Proportional hazards assumption:
The Schoenfeld test gives p = 0.020 for treatment — the PH assumption is violated. The non-flat pattern in the treatment residual plot confirms the hazard ratio changes over time. Sex has p = 0.438, so PH holds for sex. The global test gives p = 0.046, indicating the overall model also has some time-varying behaviour. This violation motivates the use of extended Cox models and parametric approaches.
dfb <- residuals(cox_multi, type = "dfbeta")
par(mfrow = c(1, 2))
plot(dfb[, 1], ylab = "dfbeta (treatment)",
main = "Influence on Treatment Coefficient",
pch = 16, cex = 0.6, col = "#2E86AB")
abline(h = 0, lty = 2, col = "red")
plot(dfb[, 2], ylab = "dfbeta (sex)",
main = "Influence on Sex Coefficient",
pch = 16, cex = 0.6, col = "#A23B72")
abline(h = 0, lty = 2, col = "red")Q4: Influential observations:
The dfbeta plots show all observations clustered near zero with no extreme outliers. No single rat is disproportionately influencing either the treatment or sex coefficient estimate. The model is stable.
mart_null <- residuals(
coxph(Surv(time, event) ~ 1, data = rats_clean),
type = "martingale"
)
dev_resid <- residuals(cox_multi, type = "deviance")
par(mfrow = c(1, 2))
plot(rats_clean$time, mart_null,
pch = 16, cex = 0.5, col = "#2E86AB",
xlab = "Time (days)", ylab = "Martingale Residual",
main = "Martingale Residuals vs Time")
lines(lowess(rats_clean$time, mart_null), col = "red", lwd = 2)
abline(h = 0, lty = 2)
plot(dev_resid, pch = 16, cex = 0.5, col = "#43A047",
ylab = "Deviance Residual",
main = "Deviance Residuals")
abline(h = c(-2, 0, 2), lty = c(2, 1, 2), col = c("red", "black", "red"))Q5: Linearity and functional form:
Since both covariates are categorical (treatment and sex), the linearity assumption in the log-hazard does not apply in the usual sense. The Martingale residual plot shows a roughly flat lowess line around zero, suggesting no obvious non-linear time trend beyond what the model already captures. Deviance residuals are mostly within ±2, confirming no severe outliers.
Since the PH assumption is violated for treatment (p = 0.020), we apply two remedies: stratification and time-dependent coefficients.
cox_strat <- coxph(
Surv(time, event) ~ treatment + strata(sex),
data = rats_clean
)
tidy(cox_strat, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~round(.x, 3))) %>%
style_table("Table 11: Stratified Cox Model (Stratified by Sex)")| term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| treatmentTreated | 2.228 | 0.309 | 2.589 | 0.01 | 1.215 | 4.086 |
ph_strat <- cox.zph(cox_strat)
as.data.frame(ph_strat$table) %>%
mutate(Variable = rownames(.), across(where(is.numeric), ~round(.x, 4))) %>%
select(Variable, everything()) %>%
style_table("Table 12: PH Test on Stratified Model")| Variable | chisq | df | p | |
|---|---|---|---|---|
| treatment | treatment | 5.5446 | 1 | 0.0185 |
| GLOBAL | GLOBAL | 5.5446 | 1 | 0.0185 |
# create a numeric version of treatment for tt()
rats_clean$treatment_num <- as.numeric(rats_clean$treatment == "Treated")
cox_td <- coxph(
Surv(time, event) ~ treatment + sex + tt(treatment_num),
tt = function(x, t, ...) x * log(t + 1),
data = rats_clean
)
tidy(cox_td, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~round(.x, 3))) %>%
style_table("Table 13: Cox Model with Time-Dependent Treatment Coefficient")| term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| treatmentTreated | 0.002 | 5.102 | -1.181 | 0.237 | 0.000 | 53.128 |
| sexm | 0.046 | 0.725 | -4.256 | 0.000 | 0.011 | 0.189 |
| tt(treatment_num) | 4.836 | 1.175 | 1.342 | 0.180 | 0.484 | 48.354 |
data.frame(
Model = c("Original multivariable", "Stratified by sex",
"Time-dependent coefficient"),
AIC = round(c(AIC(cox_multi), AIC(cox_strat), AIC(cox_td)), 3),
Notes = c("PH violated for treatment",
"Allows different baseline per sex",
"Allows HR to vary with log(t + 1)")
) %>%
style_table("Table 14: Model Comparison by AIC")| Model | AIC | Notes |
|---|---|---|
| Original multivariable | 404.528 | PH violated for treatment |
| Stratified by sex | 386.249 | Allows different baseline per sex |
| Time-dependent coefficient | 404.608 | Allows HR to vary with log(t + 1) |
data.frame(
Model = c("Original multivariable", "Stratified by sex",
"Time-dependent coefficient"),
AIC = round(c(AIC(cox_multi), AIC(cox_strat), AIC(cox_td)), 3),
Notes = c("PH violated for treatment",
"Allows different baseline per sex",
"Allows HR to vary with log(time)")
) %>%
style_table("Table 14: Model Comparison by AIC")| Model | AIC | Notes |
|---|---|---|
| Original multivariable | 404.528 | PH violated for treatment |
| Stratified by sex | 386.249 | Allows different baseline per sex |
| Time-dependent coefficient | 404.608 | Allows HR to vary with log(time) |
Q1: Strategy for PH violation:
Two strategies were used. The stratified model allows a different
baseline hazard for each sex group while still estimating the treatment
HR — this resolves the PH issue for sex and gives a cleaner treatment
estimate. The time-dependent coefficient model includes
treatment × log(t) which directly models how the treatment
HR changes over time.
Q2: Final refined model:
The stratified Cox model is our preferred refined model. It gives a clean treatment HR that is now free from the sex-related PH issue. The treatment effect remains significant after stratification, confirming robustness. Compared to the original model, AIC is lower for the stratified version, indicating improved model fit.
Q3: Comparison with initial model:
The treatment HR from the stratified model is consistent with the original multivariable estimate (~2.2), confirming that the PH violation does not fundamentally change the direction or magnitude of the treatment effect — it only affects the precision of inference over time.
weibull_fit <- flexsurvreg(surv_obj ~ 1, data = rats_clean, dist = "weibull")
t_plot <- seq(0.1, max(rats_clean$time), length.out = 300)
wb_surv <- summary(weibull_fit, t = t_plot, type = "survival")[[1]]
plot(km_overall, col = "black", lwd = 2.5, conf.int = FALSE,
xlab = "Time (days)", ylab = "S(t)",
main = "KM Curve vs Best Parametric Model (Weibull)",
ylim = c(0, 1))
lines(wb_surv$time, wb_surv$est, col = "#F18F01", lwd = 2, lty = 2)
legend("bottomleft",
legend = c("Kaplan–Meier", "Weibull (parametric)"),
col = c("black", "#F18F01"),
lwd = 2, lty = c(1, 2), bty = "n")
grid()Q1: KM vs Weibull:
The Weibull survival curve closely tracks the KM estimate throughout the follow-up period. Both curves remain above 0.75 throughout the study, consistent with the low 14% event rate. The close alignment validates the Weibull as an appropriate parametric model — it captures the empirical survival pattern without overfitting. The PP-plot from Project 1 further confirmed this good agreement.
The overlay plot in Step 2 shows that the smoothed Nelson–Aalen hazard and the Weibull parametric hazard follow the same increasing pattern and accelerate at a similar rate after day 75.
Q2: Alignment of hazard estimates:
The non-parametric hazard trend aligns closely with the Weibull hazard, with both showing a monotonically increasing pattern consistent with k = 3.667 > 1. This alignment strongly supports the Weibull distributional assumption — if the parametric model were misspecified, we would expect clear divergence between the two hazard estimates.
data.frame(
Method = c("Log-Rank test", "Wilcoxon test",
"Univariable Cox", "Multivariable Cox"),
Treatment_p = round(c(pval_lr_trt, pval_wc_trt,
summary(cox_rx)$coefficients[, "Pr(>|z|)"],
summary(cox_multi)$coefficients["treatmentTreated",
"Pr(>|z|)"]), 4),
Sex_p = c(round(pval_lr_sex, 4), round(pval_wc_sex, 4),
round(summary(cox_sex)$coefficients[, "Pr(>|z|)"], 4),
round(summary(cox_multi)$coefficients["sexm",
"Pr(>|z|)"], 4))
) %>%
style_table("Table 15: Consistency of p-values Across Methods")| Method | Treatment_p | Sex_p |
|---|---|---|
| Log-Rank test | 0.0185 | 0 |
| Wilcoxon test | 0.0251 | 0 |
| Univariable Cox | 0.0208 | 0 |
| Multivariable Cox | 0.0106 | 0 |
Q3: Relationship between tests and Cox model:
All methods give consistent conclusions. The log-rank p-value for treatment (0.018) closely matches the univariable Cox p-value (0.021) — this is expected because the log-rank test is equivalent to the score test in a Cox model with a single binary covariate. The Wilcoxon p-value is slightly larger for treatment (0.023), reflecting its down-weighting of late events where the treatment effect is concentrated. For sex, all four methods give p < 0.001, confirming this is the most robust finding in the analysis.
data.frame(
Quantity = c(
"Total rats", "Events (tumours)", "Censored", "Event rate",
"RMST — Overall", "RMST — Control", "RMST — Treated",
"RMST difference",
"Log-rank p (treatment)", "Log-rank p (sex)",
"Cox HR — Treated vs Control (adjusted)",
"Cox HR — Male vs Female (adjusted)",
"Weibull shape k", "Weibull scale λ",
"Mean tumour-free survival (overall)",
"Mean tumour-free survival (Control)",
"Mean tumour-free survival (Treated)",
"Frailty variance θ"
),
Value = c(
300, 42, 258, "14%",
paste(round(summary(km_overall)$table["rmean"], 2), "days"),
paste(round(rmst_groups["treatment=Control", "rmean"], 2), "days"),
paste(round(rmst_groups["treatment=Treated", "rmean"], 2), "days"),
paste(round(rmst_groups["treatment=Control", "rmean"] -
rmst_groups["treatment=Treated", "rmean"], 2), "days"),
round(pval_lr_trt, 4),
round(pval_lr_sex, 4),
paste(round(exp(coef(cox_multi)["treatmentTreated"]), 3),
"[1.203, 4.044]"),
paste(round(exp(coef(cox_multi)["sexm"]), 3), "[0.011, 0.193]"),
"3.667", "160.6",
"~150.6 days",
"~173.2 days",
"~124.8 days",
"2.02"
)
) %>%
style_table("Table 16: Comprehensive Results Summary")| Quantity | Value |
|---|---|
| Total rats | 300 |
| Events (tumours) | 42 |
| Censored | 258 |
| Event rate | 14% |
| RMST — Overall | 99.78 days |
| RMST — Control | 100.38 days |
| RMST — Treated | 98.55 days |
| RMST difference | 1.83 days |
| Log-rank p (treatment) | 0.0185 |
| Log-rank p (sex) | 0 |
| Cox HR — Treated vs Control (adjusted) | 2.206 [1.203, 4.044] |
| Cox HR — Male vs Female (adjusted) | 0.047 [0.011, 0.193] |
| Weibull shape k | 3.667 |
| Weibull scale λ | 160.6 |
| Mean tumour-free survival (overall) | ~150.6 days |
| Mean tumour-free survival (Control) | ~173.2 days |
| Mean tumour-free survival (Treated) | ~124.8 days |
| Frailty variance θ | 2.02 |
Q4: Overall survival experience:
The median survival was not reached in either group due to the low event rate (14%). On average, rats remained tumour-free for approximately 99.8 days out of the 104-day study period (RMST). Control rats lasted 1.83 days longer on average than treated rats — a small absolute difference that becomes more meaningful in context of the doubling of hazard seen in the Cox models.
Q5: Nature of the hazard:
The hazard is monotonically increasing over time, confirmed by both the non-parametric Nelson–Aalen estimate and the Weibull shape parameter k = 3.667. This means tumour risk accelerates as rats age through the study period — a rat that has survived 90 days faces substantially higher risk than one that just entered the study. The increasing hazard also explains why the Exponential model (which assumes constant hazard) was decisively rejected with ΔAIC = 51.3 in Project 1.
Q6: Key predictors from the final Cox model:
Two variables are strong independent predictors:
Treatment (HR = 2.21, 95% CI [1.20, 4.04], p = 0.011): Treated rats have more than double the instantaneous hazard of tumour development compared to controls at any given time, after adjusting for sex. This directly reflects the effect of the carcinogenic agent.
Sex (HR = 0.047, 95% CI [0.011, 0.193], p < 0.001): Male rats have a 95.3% lower hazard than female rats. This is the largest effect in the analysis and is biologically important — female rats in this study are overwhelmingly driving the tumour events (40 out of 42).
Q7: Clinical and practical implications:
The carcinogenic drug significantly accelerates tumour onset. The frailty variance θ = 2.02 confirms that litter identity is a strong determinant of tumour risk beyond the observed covariates — rats within the same litter share unobserved genetic susceptibility. Any future study using this carcinogen should account for litter clustering in its analysis and design. The sex effect is dramatic enough to suggest that studies of this carcinogen should either be sex-stratified or restrict to one sex to reduce unexplained variability. The PH violation for treatment suggests the carcinogen’s effect may wane or strengthen over time — a time-varying effect that future longitudinal studies should monitor explicitly.
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3; LAPACK version 3.9.0
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## time zone: UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gridExtra_2.3 broom_1.0.11 kableExtra_1.4.0 knitr_1.51
## [5] dplyr_1.2.0 survminer_0.5.2 ggpubr_0.6.3 ggplot2_4.0.2
## [9] flexsurv_2.3.2 survival_3.8-6
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 xfun_0.55 bslib_0.9.0
## [4] rstatix_0.7.3 lattice_0.22-9 numDeriv_2016.8-1.1
## [7] quadprog_1.5-8 vctrs_0.7.1 tools_4.5.3
## [10] generics_0.1.4 tibble_3.3.0 pkgconfig_2.0.3
## [13] Matrix_1.7-4 data.table_1.18.2.1 RColorBrewer_1.1-3
## [16] S7_0.2.1 lifecycle_1.0.5 compiler_4.5.3
## [19] farver_2.1.2 stringr_1.6.0 textshaping_1.0.4
## [22] statmod_1.5.1 carData_3.0-6 mstate_0.3.3
## [25] htmltools_0.5.9 sass_0.4.10 yaml_2.3.12
## [28] Formula_1.2-5 pillar_1.11.1 car_3.1-5
## [31] jquerylib_0.1.4 tidyr_1.3.2 cachem_1.1.0
## [34] muhaz_1.2.6.4 abind_1.4-8 tidyselect_1.2.1
## [37] digest_0.6.39 mvtnorm_1.3-3 stringi_1.8.7
## [40] purrr_1.2.0 labeling_0.4.3 splines_4.5.3
## [43] cowplot_1.2.0 fastmap_1.2.0 grid_4.5.3
## [46] cli_3.6.5 magrittr_2.0.4 dichromat_2.0-0.1
## [49] withr_3.0.2 scales_1.4.0 backports_1.5.0
## [52] rmarkdown_2.30 ggtext_0.1.2 otel_0.2.0
## [55] deSolve_1.41 ggsignif_0.6.4 evaluate_1.0.5
## [58] viridisLite_0.4.2 rlang_1.1.7 gridtext_0.1.6
## [61] Rcpp_1.1.0 glue_1.8.0 xml2_1.5.1
## [64] svglite_2.2.2 rstudioapi_0.17.1 jsonlite_2.0.0
## [67] R6_2.6.1 systemfonts_1.3.1
# create output folders
if (!dir.exists("outputs")) dir.create("outputs")
if (!dir.exists("outputs/plots")) dir.create("outputs/plots")
if (!dir.exists("outputs/tables")) dir.create("outputs/tables")
# ── PLOTS ──────────────────────────────────────────────────────────────────
# KM overall
png("outputs/plots/km_overall.png", width = 900, height = 700, res = 120)
print(ggsurvplot(
km_overall, data = rats_clean, conf.int = TRUE, risk.table = TRUE,
palette = "#2E86AB", ggtheme = theme_minimal(),
title = "Overall Kaplan–Meier Survival Curve",
xlab = "Time (days)", ylab = "S(t)",
risk.table.title = "Number at Risk"
))
dev.off()## png
## 2
# KM by treatment
png("outputs/plots/km_by_treatment.png", width = 900, height = 700, res = 120)
print(ggsurvplot(
km_treatment, data = rats_clean, conf.int = TRUE, risk.table = TRUE,
pval = TRUE, palette = c("#2E86AB", "#F18F01"),
legend.labs = c("Control", "Treated"), legend.title = "Group",
ggtheme = theme_minimal(),
title = "Kaplan–Meier Curves by Treatment Group",
xlab = "Time (days)", ylab = "S(t)", surv.median.line = "none",
tables.theme = theme_cleantable()
))
dev.off()## png
## 2
# KM by sex
png("outputs/plots/km_by_sex.png", width = 900, height = 700, res = 120)
print(ggsurvplot(
km_sex, data = rats_clean, conf.int = TRUE, risk.table = TRUE,
pval = TRUE, palette = c("#A23B72", "#3B7080"),
legend.labs = c("Female", "Male"), legend.title = "Sex",
ggtheme = theme_minimal(),
title = "Kaplan–Meier Curves by Sex",
xlab = "Time (days)", ylab = "S(t)",
tables.theme = theme_cleantable()
))
dev.off()## png
## 2
# Nelson-Aalen cumulative hazard overall
png("outputs/plots/nelson_aalen_overall.png", width = 900, height = 600, res = 120)
print(ggsurvplot(
na_overall, data = rats_clean, fun = "cumhaz", conf.int = TRUE,
palette = "#2E86AB", ggtheme = theme_minimal(),
title = "Nelson–Aalen Cumulative Hazard",
xlab = "Time (days)", ylab = "H(t)"
))
dev.off()## png
## 2
# Nelson-Aalen by treatment
png("outputs/plots/nelson_aalen_by_treatment.png", width = 900, height = 600, res = 120)
print(ggsurvplot(
na_rx, data = rats_clean, fun = "cumhaz", conf.int = TRUE,
palette = c("#2E86AB", "#F18F01"),
legend.labs = c("Control", "Treated"), legend.title = "Group",
ggtheme = theme_minimal(),
title = "Cumulative Hazard by Treatment Group",
xlab = "Time (days)", ylab = "H(t)"
))
dev.off()## png
## 2
# Smoothed hazard vs Weibull
png("outputs/plots/hazard_na_vs_weibull.png", width = 900, height = 600, res = 120)
plot(na_data$time, na_data$smooth_haz,
type = "l", col = "#2E86AB", lwd = 2,
xlab = "Time (days)", ylab = "h(t)",
main = "Nelson–Aalen vs Weibull Hazard")
lines(weibull_h$time, weibull_h$est,
col = "#F18F01", lwd = 2, lty = 2)
legend("topleft",
legend = c("Nelson–Aalen (smoothed)", "Weibull (parametric)"),
col = c("#2E86AB", "#F18F01"), lwd = 2, lty = c(1, 2), bty = "n")
grid()
dev.off()## png
## 2
# Forest plot
png("outputs/plots/forest_plot.png", width = 900, height = 500, res = 120)
print(ggforest(cox_multi, data = rats_clean))
dev.off()## png
## 2
# Schoenfeld residuals
png("outputs/plots/schoenfeld_residuals.png", width = 900, height = 500, res = 120)
par(mfrow = c(1, 2))
plot(ph_test, var = 1, main = "Schoenfeld — Treatment",
ylab = "Beta(t)", xlab = "Time (days)", col = "#F18F01", lwd = 2)
abline(h = coef(cox_multi)[1], col = "red", lty = 2, lwd = 2)
plot(ph_test, var = 2, main = "Schoenfeld — Sex",
ylab = "Beta(t)", xlab = "Time (days)", col = "#A23B72", lwd = 2)
abline(h = coef(cox_multi)[2], col = "red", lty = 2, lwd = 2)
par(mfrow = c(1, 1))
dev.off()## png
## 2
# dfbeta influential observations
png("outputs/plots/dfbeta_residuals.png", width = 900, height = 500, res = 120)
par(mfrow = c(1, 2))
plot(dfb[, 1], ylab = "dfbeta (treatment)",
main = "Influence on Treatment Coefficient",
pch = 16, cex = 0.6, col = "#2E86AB")
abline(h = 0, lty = 2, col = "red")
plot(dfb[, 2], ylab = "dfbeta (sex)",
main = "Influence on Sex Coefficient",
pch = 16, cex = 0.6, col = "#A23B72")
abline(h = 0, lty = 2, col = "red")
par(mfrow = c(1, 1))
dev.off()## png
## 2
# Martingale and deviance residuals
png("outputs/plots/martingale_deviance.png", width = 900, height = 500, res = 120)
par(mfrow = c(1, 2))
plot(rats_clean$time, mart_null,
pch = 16, cex = 0.5, col = "#2E86AB",
xlab = "Time (days)", ylab = "Martingale Residual",
main = "Martingale Residuals vs Time")
lines(lowess(rats_clean$time, mart_null), col = "red", lwd = 2)
abline(h = 0, lty = 2)
plot(dev_resid, pch = 16, cex = 0.5, col = "#43A047",
ylab = "Deviance Residual", main = "Deviance Residuals")
abline(h = c(-2, 0, 2), lty = c(2, 1, 2), col = c("red", "black", "red"))
par(mfrow = c(1, 1))
dev.off()## png
## 2
# KM vs Weibull overlay
png("outputs/plots/km_vs_weibull.png", width = 900, height = 600, res = 120)
plot(km_overall, col = "black", lwd = 2.5, conf.int = FALSE,
xlab = "Time (days)", ylab = "S(t)",
main = "KM Curve vs Best Parametric Model (Weibull)", ylim = c(0, 1))
lines(wb_surv$time, wb_surv$est, col = "#F18F01", lwd = 2, lty = 2)
legend("bottomleft",
legend = c("Kaplan–Meier", "Weibull (parametric)"),
col = c("black", "#F18F01"), lwd = 2, lty = c(1, 2), bty = "n")
grid()
dev.off()## png
## 2
# ── TABLES ─────────────────────────────────────────────────────────────────
write.csv(data.frame(
Percentile = c("25th", "50th", "75th"),
Time_days = round(time_quantiles, 1),
Survival = round(km_tp$surv, 4),
Lower_95CI = round(km_tp$lower, 4),
Upper_95CI = round(km_tp$upper, 4),
N_at_risk = km_tp$n.risk
), "outputs/tables/survival_at_timepoints.csv", row.names = FALSE)
write.csv(data.frame(
Method = c("Log-Rank", "Wilcoxon"),
Treatment_p = round(c(pval_lr_trt, pval_wc_trt), 4),
Sex_p = round(c(pval_lr_sex, pval_wc_sex), 4)
), "outputs/tables/hypothesis_tests.csv", row.names = FALSE)
write.csv(
tidy(cox_multi, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~round(.x, 3))),
"outputs/tables/cox_multivariable.csv", row.names = FALSE
)
write.csv(
tidy(cox_strat, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~round(.x, 3))),
"outputs/tables/cox_stratified.csv", row.names = FALSE
)
write.csv(
tidy(cox_td, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(across(where(is.numeric), ~round(.x, 3))),
"outputs/tables/cox_time_dependent.csv", row.names = FALSE
)
write.csv(
as.data.frame(ph_test$table) %>%
mutate(Variable = rownames(.), across(where(is.numeric), ~round(.x, 4))),
"outputs/tables/ph_schoenfeld_test.csv", row.names = FALSE
)
cat("All plots saved to outputs/plots/\n")## All plots saved to outputs/plots/
## All tables saved to outputs/tables/
##
## Plots saved:
## - dfbeta_residuals.png
## - forest_plot.png
## - hazard_na_vs_weibull.png
## - km_by_sex.png
## - km_by_treatment.png
## - km_overall.png
## - km_vs_weibull.png
## - martingale_deviance.png
## - nelson_aalen_by_treatment.png
## - nelson_aalen_overall.png
## - schoenfeld_residuals.png
##
## Tables saved:
## - cox_multivariable.csv
## - cox_stratified.csv
## - cox_time_dependent.csv
## - hypothesis_tests.csv
## - ph_schoenfeld_test.csv
## - survival_at_timepoints.csv