Rats Survival Analysis

Introduction

This report analyzes the rats survival dataset using standard survival analysis techniques. The analysis includes descriptive summaries, Kaplan-Meier estimation, Cox proportional hazards regression, tie-handling comparisons, proportional hazards diagnostics, accelerated failure time (AFT) modeling, and parametric model comparison.

The data contain the following variables:

litter: factor identifying the litter group
rx: numeric treatment indicator
time: observed survival time
status: event indicator
sex: factor with levels Male and Female
event: duplicate event indicator
treatment: factor with levels Control and Drug

To avoid duplication, this analysis uses:

time as the survival time
event as the event indicator
treatment as the treatment factor
sex as the sex factor
litter as a clustering variable in the Cox model

Setup

library(survival)
library(survminer)
library(flexsurv)
library(dplyr)
library(ggplot2)
library(knitr)
library(kableExtra)
library(broom)
library(gridExtra)

data("rats", package = "survival")

rats_data <- rats

str(rats_data)

## 'data.frame':    300 obs. of  5 variables:
##  $ litter: int  1 1 1 2 2 2 3 3 3 4 ...
##  $ rx    : num  1 0 0 1 0 0 1 0 0 1 ...
##  $ time  : num  101 49 104 91 104 102 104 102 104 91 ...
##  $ status: num  0 1 0 0 0 0 0 0 0 0 ...
##  $ sex   : chr  "f" "f" "f" "m" ...

head(rats_data)

##   litter rx time status sex
## 1      1  1  101      0   f
## 2      1  0   49      1   f
## 3      1  0  104      0   f
## 4      2  1   91      0   m
## 5      2  0  104      0   m
## 6      2  0  102      0   m

summary(rats_data)

##      litter             rx              time            status    
##  Min.   :  1.00   Min.   :0.0000   Min.   : 23.00   Min.   :0.00  
##  1st Qu.: 25.75   1st Qu.:0.0000   1st Qu.: 80.75   1st Qu.:0.00  
##  Median : 50.50   Median :0.0000   Median : 98.00   Median :0.00  
##  Mean   : 50.50   Mean   :0.3333   Mean   : 90.44   Mean   :0.14  
##  3rd Qu.: 75.25   3rd Qu.:1.0000   3rd Qu.:104.00   3rd Qu.:0.00  
##  Max.   :100.00   Max.   :1.0000   Max.   :104.00   Max.   :1.00  
##      sex           
##  Length:300        
##  Class :character  
##  Mode  :character  
##                    
##                    
##

rats_data <- rats_data %>%
  mutate(
    litter = as.factor(litter),

  
    treatment = factor(rx, levels = c(0, 1), labels = c("Control", "Drug")),

  
    event = as.numeric(status),

   
    time = as.numeric(time),

    sex = as.factor(sex)
  )

str(rats_data)

## 'data.frame':    300 obs. of  7 variables:
##  $ litter   : Factor w/ 100 levels "1","2","3","4",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ rx       : num  1 0 0 1 0 0 1 0 0 1 ...
##  $ time     : num  101 49 104 91 104 102 104 102 104 91 ...
##  $ status   : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ sex      : Factor w/ 2 levels "f","m": 1 1 1 2 2 2 1 1 1 2 ...
##  $ treatment: Factor w/ 2 levels "Control","Drug": 2 1 1 2 1 1 2 1 1 2 ...
##  $ event    : num  0 1 0 0 0 0 0 0 0 0 ...

missing_summary <- data.frame(
  Variable = names(rats_data),
  Missing = colSums(is.na(rats_data)),
  Percent = round(100 * colSums(is.na(rats_data)) / nrow(rats_data), 2)
)

missing_summary %>%
  kbl(caption = "Table 1: Missing Data Summary") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 1: Missing Data Summary
	Variable	Missing	Percent
litter	litter	0	0
rx	rx	0	0
time	time	0	0
status	status	0	0
sex	sex	0	0
treatment	treatment	0	0
event	event	0	0

rats_clean <- rats_data %>%
  select(litter, time, event, treatment, sex) %>%
  na.omit()

SurvObj_rats <- Surv(time = rats_clean$time, event = rats_clean$event)

cat("Sample size:", nrow(rats_clean), "\n")

## Sample size: 300

cat("Number of events:", sum(rats_clean$event), "\n")

## Number of events: 42

cat("Number censored:", sum(rats_clean$event == 0), "\n")

## Number censored: 258

cat("Event rate (%):", round(100 * mean(rats_clean$event), 2), "\n")

## Event rate (%): 14

sample_summary <- data.frame(
  Metric = c("Sample size", "Number of events", "Number censored", "Event rate (%)"),
  Value = c(
    nrow(rats_clean),
    sum(rats_clean$event),
    sum(rats_clean$event == 0),
    round(100 * mean(rats_clean$event), 2)
  )
)

sample_summary %>%
  kbl(caption = "Table 2: Sample Summary") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 2: Sample Summary
Metric	Value
Sample size	300
Number of events	42
Number censored	258
Event rate (%)	14

time_summary <- data.frame(
  Statistic = c("Minimum", "1st Quartile", "Median", "Mean", "3rd Quartile", "Maximum", "SD"),
  Value = c(
    min(rats_clean$time),
    quantile(rats_clean$time, 0.25),
    median(rats_clean$time),
    round(mean(rats_clean$time), 2),
    quantile(rats_clean$time, 0.75),
    max(rats_clean$time),
    round(sd(rats_clean$time), 2)
  )
)

time_summary %>%
  kbl(caption = "Table 3: Descriptive Statistics for Survival Time") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 3: Descriptive Statistics for Survival Time
Statistic	Value
Minimum	23.00
1st Quartile	80.75
Median	98.00
Mean	90.44
3rd Quartile	104.00
Maximum	104.00
SD	17.06

p1 <- ggplot(rats_clean, aes(x = time)) +
  geom_histogram(fill = "#2E86AB", color = "white", bins = 20) +
  labs(title = "Distribution of Survival Time", x = "Time", y = "Count") +
  theme_minimal()

p2 <- ggplot(rats_clean, aes(x = treatment, fill = treatment)) +
  geom_bar() +
  scale_fill_manual(values = c("#2E86AB", "#F18F01")) +
  labs(title = "Treatment Distribution", x = "", y = "Count") +
  theme_minimal() +
  theme(legend.position = "none")

p3 <- ggplot(rats_clean, aes(x = sex, fill = sex)) +
  geom_bar() +
  scale_fill_manual(values = c("#A23B72", "#3B7080")) +
  labs(title = "Sex Distribution", x = "", y = "Count") +
  theme_minimal() +
  theme(legend.position = "none")

p4 <- ggplot(rats_clean, aes(x = treatment, y = time, fill = treatment)) +
  geom_boxplot(alpha = 0.8) +
  scale_fill_manual(values = c("#2E86AB", "#F18F01")) +
  labs(title = "Survival Time by Treatment", x = "", y = "Time") +
  theme_minimal() +
  theme(legend.position = "none")

grid.arrange(p1, p2, p3, p4, ncol = 2)

km_all <- survfit(Surv(time, event) ~ 1, data = rats_clean)

ggsurvplot(
  km_all,
  data = rats_clean,
  conf.int = TRUE,
  risk.table = TRUE,
  palette = "#2E86AB",
  ggtheme = theme_minimal(),
  title = "Overall Kaplan-Meier Survival Curve",
  xlab = "Time",
  ylab = "Survival Probability"
)

km_treatment <- survfit(Surv(time, event) ~ treatment, data = rats_clean)

ggsurvplot(
  km_treatment,
  data = rats_clean,
  conf.int = TRUE,
  risk.table = TRUE,
  pval = TRUE,
  pval.method = TRUE,
  palette = c("#2E86AB", "#F18F01"),
  ggtheme = theme_minimal(),
  title = "Kaplan-Meier Curves by Treatment",
  xlab = "Time",
  ylab = "Survival Probability"
)

km_sex <- survfit(Surv(time, event) ~ sex, data = rats_clean)

ggsurvplot(
  km_sex,
  data = rats_clean,
  conf.int = TRUE,
  risk.table = TRUE,
  pval = TRUE,
  pval.method = TRUE,
  palette = c("#A23B72", "#3B7080"),
  ggtheme = theme_minimal(),
  title = "Kaplan-Meier Curves by Sex",
  xlab = "Time",
  ylab = "Survival Probability"
)

cox_treatment <- coxph(Surv(time, event) ~ treatment, data = rats_clean)
summary(cox_treatment)

## Call:
## coxph(formula = Surv(time, event) ~ treatment, data = rats_clean)
## 
##   n= 300, number of events= 42 
## 
##                 coef exp(coef) se(coef)     z Pr(>|z|)  
## treatmentDrug 0.7137    2.0416   0.3088 2.311   0.0208 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## treatmentDrug     2.042     0.4898     1.115     3.739
## 
## Concordance= 0.565  (se = 0.039 )
## Likelihood ratio test= 5.23  on 1 df,   p=0.02
## Wald test            = 5.34  on 1 df,   p=0.02
## Score (logrank) test = 5.57  on 1 df,   p=0.02

cox_treatment_tbl <- tidy(cox_treatment, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

cox_treatment_tbl %>%
  kbl(caption = "Table 4: Unadjusted Cox Model for Treatment") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 4: Unadjusted Cox Model for Treatment
term	estimate	std.error	statistic	p.value	conf.low	conf.high
treatmentDrug	2.042	0.309	2.311	0.021	1.115	3.739

cox_multi <- coxph(Surv(time, event) ~ treatment + sex, data = rats_clean)
summary(cox_multi)

## Call:
## coxph(formula = Surv(time, event) ~ treatment + sex, data = rats_clean)
## 
##   n= 300, number of events= 42 
## 
##                   coef exp(coef) se(coef)      z Pr(>|z|)    
## treatmentDrug  0.79100   2.20559  0.30936  2.557   0.0106 *  
## sexm          -3.06769   0.04653  0.72480 -4.232 2.31e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## treatmentDrug   2.20559     0.4534   1.20281    4.0444
## sexm            0.04653    21.4923   0.01124    0.1926
## 
## Concordance= 0.764  (se = 0.031 )
## Likelihood ratio test= 50.04  on 2 df,   p=1e-11
## Wald test            = 24.08  on 2 df,   p=6e-06
## Score (logrank) test = 42.68  on 2 df,   p=5e-10

cox_multi_tbl <- tidy(cox_multi, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

cox_multi_tbl %>%
  kbl(caption = "Table 5: Multivariable Cox Model") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 5: Multivariable Cox Model
term	estimate	std.error	statistic	p.value	conf.low	conf.high
treatmentDrug	2.206	0.309	2.557	0.011	1.203	4.044
sexm	0.047	0.725	-4.232	0.000	0.011	0.193

cox_cluster <- coxph(
  Surv(time, event) ~ treatment + sex + cluster(litter),
  data = rats_clean
)
summary(cox_cluster)

## Call:
## coxph(formula = Surv(time, event) ~ treatment + sex, data = rats_clean, 
##     cluster = litter)
## 
##   n= 300, number of events= 42 
## 
##                   coef exp(coef) se(coef) robust se      z Pr(>|z|)    
## treatmentDrug  0.79100   2.20559  0.30936   0.29271  2.702  0.00689 ** 
## sexm          -3.06769   0.04653  0.72480   0.72066 -4.257 2.07e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## treatmentDrug   2.20559     0.4534   1.24271     3.915
## sexm            0.04653    21.4923   0.01133     0.191
## 
## Concordance= 0.764  (se = 0.032 )
## Likelihood ratio test= 50.04  on 2 df,   p=1e-11
## Wald test            = 21.23  on 2 df,   p=2e-05
## Score (logrank) test = 42.68  on 2 df,   p=5e-10,   Robust = 25.74  p=3e-06
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).

cox_cluster_tbl <- tidy(cox_cluster, exponentiate = TRUE, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

cox_cluster_tbl %>%
  kbl(caption = "Table 6: Cox Model with Litter-Clustered Standard Errors") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 6: Cox Model with Litter-Clustered Standard Errors
term	estimate	std.error	robust.se	statistic	p.value	conf.low	conf.high
treatmentDrug	2.206	0.309	0.293	2.702	0.007	1.243	3.915
sexm	0.047	0.725	0.721	-4.257	0.000	0.011	0.191

cox_breslow <- coxph(Surv(time, event) ~ treatment + sex, data = rats_clean, ties = "breslow")
cox_efron   <- coxph(Surv(time, event) ~ treatment + sex, data = rats_clean, ties = "efron")
cox_exact   <- coxph(Surv(time, event) ~ treatment + sex, data = rats_clean, ties = "exact")

ties_table <- data.frame(
  Method = c("Breslow", "Efron", "Exact"),
  Treatment_Beta = c(coef(cox_breslow)["treatmentDrug"],
                     coef(cox_efron)["treatmentDrug"],
                     coef(cox_exact)["treatmentDrug"]),
  Treatment_HR = exp(c(coef(cox_breslow)["treatmentDrug"],
                       coef(cox_efron)["treatmentDrug"],
                       coef(cox_exact)["treatmentDrug"])),
  Sex_Beta = c(coef(cox_breslow)["sexm"],
               coef(cox_efron)["sexm"],
               coef(cox_exact)["sexm"]),
  Sex_HR = exp(c(coef(cox_breslow)["sexm"],
                 coef(cox_efron)["sexm"],
                 coef(cox_exact)["sexm"]))
)

ties_table <- ties_table %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

ties_table %>%
  kbl(caption = "Table 7: Comparison of Tie-Handling Methods") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 7: Comparison of Tie-Handling Methods
Method	Treatment_Beta	Treatment_HR	Sex_Beta	Sex_HR
Breslow	0.785	2.193	-3.063	0.047
Efron	0.791	2.206	-3.068	0.047
Exact	0.790	2.203	-3.070	0.046

cox_test_summary <- summary(cox_treatment)

tests_table <- data.frame(
  Test = c("Likelihood Ratio", "Wald", "Score (Log-rank)"),
  Statistic = round(c(
    cox_test_summary$logtest["test"],
    cox_test_summary$waldtest["test"],
    cox_test_summary$sctest["test"]
  ), 3),
  DF = c(
    cox_test_summary$logtest["df"],
    cox_test_summary$waldtest["df"],
    cox_test_summary$sctest["df"]
  ),
  P_value = signif(c(
    cox_test_summary$logtest["pvalue"],
    cox_test_summary$waldtest["pvalue"],
    cox_test_summary$sctest["pvalue"]
  ), 4)
)

tests_table %>%
  kbl(caption = "Table 8: Hypothesis Tests for the Treatment Cox Model") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 8: Hypothesis Tests for the Treatment Cox Model
Test	Statistic	DF	P_value
Likelihood Ratio	5.234	1	0.02215
Wald	5.340	1	0.02081
Score (Log-rank)	5.573	1	0.01824

cox_plot <- coxph(Surv(time, event) ~ treatment + sex, data = rats_clean)

newdata_cox <- data.frame(
  treatment = factor(
    c(levels(rats_clean$treatment)[1], levels(rats_clean$treatment)[2]),
    levels = levels(rats_clean$treatment)
  ),
  sex = factor(
    rep(levels(rats_clean$sex)[1], 2),
    levels = levels(rats_clean$sex)
  )
)

cox_surv <- survfit(cox_plot, newdata = newdata_cox)

ggsurvplot(
  cox_surv,
  data = newdata_cox,
  conf.int = TRUE,
  legend.title = "Treatment",
  legend.labs = as.character(newdata_cox$treatment),
  palette = c("#2E86AB", "#F18F01"),
  ggtheme = theme_minimal(),
  title = "Adjusted Survival Curves from Cox Model",
  xlab = "Time",
  ylab = "Survival Probability"
)

ph_test <- cox.zph(cox_multi)
ph_test

##           chisq df     p
## treatment 5.393  1 0.020
## sex       0.602  1 0.438
## GLOBAL    6.145  2 0.046

ph_table <- data.frame(ph_test$table)
ph_table$Variable <- rownames(ph_table)
rownames(ph_table) <- NULL

ph_table %>%
  mutate(across(where(is.numeric), ~ round(.x, 4))) %>%
  kbl(caption = "Table 9: Proportional Hazards Assumption Check") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 9: Proportional Hazards Assumption Check
chisq	df	p	Variable
5.3927	1	0.0202	treatment
0.6019	1	0.4379	sex
6.1450	2	0.0463	GLOBAL

aft_weibull <- survreg(Surv(time, event) ~ treatment + sex, data = rats_clean, dist = "weibull")
summary(aft_weibull)

## 
## Call:
## survreg(formula = Surv(time, event) ~ treatment + sex, data = rats_clean, 
##     dist = "weibull")
##                 Value Std. Error     z       p
## (Intercept)    4.9714     0.0800 62.12 < 2e-16
## treatmentDrug -0.2117     0.0862 -2.46 0.01408
## sexm           0.8197     0.2233  3.67 0.00024
## Log(scale)    -1.3256     0.1407 -9.42 < 2e-16
## 
## Scale= 0.266 
## 
## Weibull distribution
## Loglik(model)= -261.6   Loglik(intercept only)= -287.1
##  Chisq= 51.08 on 2 degrees of freedom, p= 8.1e-12 
## Number of Newton-Raphson Iterations: 9 
## n= 300

aft_tbl <- tidy(aft_weibull, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

aft_tbl %>%
  kbl(caption = "Table 10: Weibull AFT Model Coefficients") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 10: Weibull AFT Model Coefficients
term	estimate	std.error	statistic	p.value	conf.low	conf.high
(Intercept)	4.971	0.080	62.124	0.000	4.815	5.128
treatmentDrug	-0.212	0.086	-2.455	0.014	-0.381	-0.043
sexm	0.820	0.223	3.671	0.000	0.382	1.257
Log(scale)	-1.326	0.141	-9.424	0.000	NA	NA

aft_coef <- coef(aft_weibull)
aft_coef <- aft_coef[names(aft_coef) != "(Intercept)"]

aft_time_ratio <- exp(aft_coef)

aft_time_ratio_table <- data.frame(
  Variable = names(aft_time_ratio),
  Time_Ratio = round(aft_time_ratio, 3)
)

aft_time_ratio_table %>%
  kbl(caption = "Table 11: Time Ratios from the Weibull AFT Model") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 11: Time Ratios from the Weibull AFT Model
	Variable	Time_Ratio
treatmentDrug	treatmentDrug	0.809
sexm	sexm	2.270

safe_fit <- function(dist_name, surv_obj) {
  tryCatch({
    if (dist_name %in% c("gompertz", "gengamma")) {
      flexsurvreg(
        surv_obj ~ 1,
        dist = dist_name,
        method = "BFGS",
        control = list(maxit = 1000)
      )
    } else {
      flexsurvreg(surv_obj ~ 1, dist = dist_name)
    }
  }, error = function(e) {
    message("Model ", dist_name, " failed: ", e$message)
    return(NULL)
  })
}

distributions <- c(
  exp = "Exponential",
  weibull = "Weibull",
  lnorm = "LogNormal",
  llogis = "LogLogistic",
  gamma = "Gamma",
  gompertz = "Gompertz",
  gengamma = "GeneralizedGamma"
)

fit_models <- list()

for (dist_name in names(distributions)) {
  fit <- safe_fit(dist_name, SurvObj_rats)
  if (!is.null(fit)) {
    fit_models[[distributions[[dist_name]]]] <- fit
  }
}

fit_models <- Filter(Negate(is.null), fit_models)

if (length(fit_models) == 0) {
  stop("No parametric model was successfully fitted.")
}

model_summary <- data.frame(
  Model = names(fit_models),
  LogLik = sapply(fit_models, function(x) as.numeric(logLik(x))),
  AIC = sapply(fit_models, function(x) as.numeric(AIC(x))),
  Parameters = sapply(fit_models, function(x) nrow(x$res)),
  stringsAsFactors = FALSE
)

model_summary$LogLik <- round(model_summary$LogLik, 2)
model_summary$AIC <- round(model_summary$AIC, 2)

param_estimates <- lapply(fit_models, function(x) {
  params <- round(x$res[, "est"], 3)
  paste(names(params), params, sep = " = ", collapse = "<br>")
})

model_summary$Estimates <- unlist(param_estimates)
best_aic_row <- which.min(model_summary$AIC)

model_summary %>%
  select(Model, Estimates, LogLik, AIC) %>%
  kbl(caption = "Table 12: Parametric Model Fits", escape = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = TRUE) %>%
  column_spec(1, bold = TRUE, background = "#E8F0FE") %>%
  column_spec(2, width = "20em") %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white") %>%
  row_spec(best_aic_row, background = "#F0F8E8", bold = TRUE)

Table 12: Parametric Model Fits
	Model	Estimates	LogLik	AIC
Exponential	Exponential	= 0.002	-313.77	629.55
Weibull	Weibull	shape = 3.667 scale = 160.625	-287.10	578.21
LogNormal	LogNormal	meanlog = 5.159 sdlog = 0.557	-287.43	578.87
LogLogistic	LogLogistic	shape = 3.823 scale = 153.951	-287.16	578.33
Gamma	Gamma	shape = 5.483 rate = 0.032	-287.19	578.37
Gompertz	Gompertz	shape = 0.044 rate = 0	-288.33	580.65
GeneralizedGamma	GeneralizedGamma	mu = 5.058 sigma = 0.125 Q = 2.278	-287.07	580.13

best_model_name <- model_summary$Model[which.min(model_summary$AIC)]
best_model <- fit_models[[best_model_name]]

best_model_table <- data.frame(
  Quantity = c("Best Model", "Minimum AIC"),
  Value = c(best_model_name, round(min(model_summary$AIC), 2))
)

best_model_table %>%
  kbl(caption = "Table 13: Best Parametric Model by AIC") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

Table 13: Best Parametric Model by AIC
Quantity	Value
Best Model	Weibull
Minimum AIC	578.21

# 🔥 ADD THIS SECTION HERE
time_points <- c(30, 60, 90)

surv_probs <- summary(
  best_model,
  t = time_points,
  type = "survival",
  tidy = TRUE
)

surv_table <- data.frame(
  Time = time_points,
  Survival_Probability = round(surv_probs$est, 3)
)

surv_table %>%
  kbl(caption = "**Table: Survival Probabilities at Key Time Points**") %>%
  kable_styling(
    bootstrap_options = c("striped", "hover"),
    full_width = FALSE
  ) %>%
  row_spec(0, bold = TRUE, background = "#2E86AB", color = "white")

**Table: Survival Probabilities at Key Time Points**
Time	Survival_Probability
30	0.998
60	0.973
90	0.887

ggplot(model_summary, aes(x = reorder(Model, AIC), y = AIC)) +
  geom_col(fill = "#2E86AB") +
  coord_flip() +
  labs(title = "AIC Comparison of Parametric Models",
       x = "Model", y = "AIC") +
  theme_minimal()

time_seq <- seq(0.1, max(rats_clean$time), length.out = 200)

surv_out <- tryCatch(
  summary(best_model, t = time_seq, type = "survival", tidy = TRUE),
  error = function(e) NULL
)

haz_out <- tryCatch(
  summary(best_model, t = time_seq, type = "hazard", tidy = TRUE),
  error = function(e) NULL
)

surv_probs <- if (!is.null(surv_out)) surv_out$est else rep(NA, length(time_seq))
haz_probs <- if (!is.null(haz_out)) haz_out$est else rep(NA, length(time_seq))

life_df <- data.frame(
  Time = time_seq,
  Survival = surv_probs,
  Hazard = haz_probs,
  CDF = 1 - surv_probs
)
life_df$Density <- life_df$Hazard * life_df$Survival
life_df$CumHazard <- -log(life_df$Survival)

p_surv <- ggplot(life_df, aes(x = Time, y = Survival)) +
  geom_line(color = "#2E86AB", linewidth = 1) +
  labs(title = "Survival Function", y = "S(t)", x = "Time") +
  theme_minimal()

p_haz <- ggplot(life_df, aes(x = Time, y = Hazard)) +
  geom_line(color = "#F18F01", linewidth = 1) +
  labs(title = "Hazard Function", y = "h(t)", x = "Time") +
  theme_minimal()

p_cdf <- ggplot(life_df, aes(x = Time, y = CDF)) +
  geom_line(color = "#A23B72", linewidth = 1) +
  labs(title = "Cumulative Distribution Function", y = "F(t)", x = "Time") +
  theme_minimal()

p_density <- ggplot(life_df, aes(Time, Density)) +
  geom_line(color = "#E15759", linewidth = 1) +
  labs(title = "Density Function f(t)", y = "f(t)", x = "Time") +
  theme_minimal()

p_cumhaz <- ggplot(life_df, aes(Time, CumHazard)) +
  geom_line(color = "#76B7B2", linewidth = 1) +
  labs(title = "Cumulative Hazard H(t)", y = "H(t)", x = "Time") +
  theme_minimal()

grid.arrange(p_surv, p_haz, p_density, p_cumhaz, ncol = 2)

Rats Survival Analysis

Walter

2026-03-22

Introduction

Setup