# PARAMETRIC SURVIVAL ANALYSIS — GROUP 2
# Dataset : Colon Cancer (from survival package)
# Event   : Death (etype == 2)
# ============================================================

rm(list = ls())

# ============================================================
library(survival)
library(flexsurv)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
# ============================================================
# ── Saving outputs ─────────────

base_dir  <- getwd()
fig_dir   <- file.path(base_dir, "outputs", "figures")
tbl_dir   <- file.path(base_dir, "outputs", "tables")

dir.create(fig_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(tbl_dir, recursive = TRUE, showWarnings = FALSE)

cat("Figures will be saved to :", fig_dir, "\n")
## Figures will be saved to : /cloud/project/outputs/figures
cat("Tables  will be saved to :", tbl_dir, "\n\n")
## Tables  will be saved to : /cloud/project/outputs/tables
# Data Exploration
# ============================================================

colon <- survival::colon

colon_raw <- colon %>%
  filter(etype == 2) %>%
  distinct(id, .keep_all = TRUE)

cat("=== Dimensions (death records, one row per patient) ===\n")
## === Dimensions (death records, one row per patient) ===
dim(colon_raw)
## [1] 929  16
cat("\n=== Missing Values per Variable (before imputation) ===\n")
## 
## === Missing Values per Variable (before imputation) ===
mv_before <- colSums(is.na(colon_raw))
print(mv_before[mv_before > 0])
##  nodes differ 
##     18     23
# ── Table 1: Summary Statistics of Key Variables ────────────
tbl1_data <- data.frame(
  Variable  = c("Time (days)", "Status", "Age (years)", "Sex", "Nodes"),
  Min       = c(min(colon_raw$time,   na.rm = TRUE),
                min(colon_raw$status, na.rm = TRUE),
                min(colon_raw$age,    na.rm = TRUE),
                min(colon_raw$sex,    na.rm = TRUE),
                min(colon_raw$nodes,  na.rm = TRUE)),
  Max       = c(max(colon_raw$time,   na.rm = TRUE),
                max(colon_raw$status, na.rm = TRUE),
                max(colon_raw$age,    na.rm = TRUE),
                max(colon_raw$sex,    na.rm = TRUE),
                max(colon_raw$nodes,  na.rm = TRUE)),
  Mean      = round(c(mean(colon_raw$time,   na.rm = TRUE),
                      mean(colon_raw$status, na.rm = TRUE),
                      mean(colon_raw$age,    na.rm = TRUE),
                      mean(colon_raw$sex,    na.rm = TRUE),
                      mean(colon_raw$nodes,  na.rm = TRUE)), 2),
  Median    = round(c(median(colon_raw$time,   na.rm = TRUE),
                      median(colon_raw$status, na.rm = TRUE),
                      median(colon_raw$age,    na.rm = TRUE),
                      median(colon_raw$sex,    na.rm = TRUE),
                      median(colon_raw$nodes,  na.rm = TRUE)), 2),
  Missing   = c(sum(is.na(colon_raw$time)),
                sum(is.na(colon_raw$status)),
                sum(is.na(colon_raw$age)),
                sum(is.na(colon_raw$sex)),
                sum(is.na(colon_raw$nodes)))
)

kable(tbl1_data,
      caption = "Table 1: Summary Statistics of Key Variables (Pre-Imputation)",
      align   = "lrrrrr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") %>%
  column_spec(1, bold = TRUE)
Table 1: Summary Statistics of Key Variables (Pre-Imputation)
Variable Min Max Mean Median Missing
Time (days) 23 3329 1669.96 1976 0
Status 0 1 0.49 0 0
Age (years) 18 85 59.75 61 0
Sex 0 1 0.52 1 0
Nodes 0 33 3.66 2 18
# Data Preparation — LOCF and Survival Object Definition
# ============================================================

# ── Handling missings ──────────────────
locf <- function(x) {
  for (i in seq_along(x)) {
    if (is.na(x[i]) && i > 1) x[i] <- x[i - 1]
  }

  for (i in rev(seq_along(x))) {
    if (is.na(x[i]) && i < length(x)) x[i] <- x[i + 1]
  }
  return(x)
}

colon_clean <- colon_raw %>%
  arrange(id) %>%
  mutate(nodes = locf(nodes))

cat("=== Missing Values After LOCF Imputation ===\n")
## === Missing Values After LOCF Imputation ===
print(colSums(is.na(colon_clean[, c("time","status","age","sex","nodes")])))
##   time status    age    sex  nodes 
##      0      0      0      0      0
# ── Table 2: Sample Summary ───────────────────────────────────
n_total    <- nrow(colon_clean)
n_events   <- sum(colon_clean$status == 1, na.rm = TRUE)
n_censored <- sum(colon_clean$status == 0, na.rm = TRUE)
pct_event  <- round(100 * n_events  / n_total, 1)
pct_cens   <- round(100 * n_censored / n_total, 1)

tbl2_data <- data.frame(
  Statistic = c("Total Patients",
                "Events (Deaths)",
                "Censored Observations",
                "Percentage Events (%)",
                "Percentage Censored (%)",
                "Minimum Follow-up (days)",
                "Maximum Follow-up (days)",
                "Mean Follow-up (days)",
                "Median Follow-up (days)"),
  Value     = c(n_total,
                n_events,
                n_censored,
                paste0(pct_event,  "%"),
                paste0(pct_cens,   "%"),
                min(colon_clean$time,    na.rm = TRUE),
                max(colon_clean$time,    na.rm = TRUE),
                round(mean(colon_clean$time,   na.rm = TRUE), 1),
                round(median(colon_clean$time, na.rm = TRUE), 1))
)

kable(tbl2_data,
      caption   = "Table 2: Sample Summary — Colon Cancer Dataset (Post-Cleaning)",
      col.names = c("Statistic", "Value"),
      align     = "lr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") %>%
  column_spec(1, bold = TRUE)
Table 2: Sample Summary — Colon Cancer Dataset (Post-Cleaning)
Statistic Value
Total Patients 929
Events (Deaths) 452
Censored Observations 477
Percentage Events (%) 48.7%
Percentage Censored (%) 51.3%
Minimum Follow-up (days) 23
Maximum Follow-up (days) 3329
Mean Follow-up (days) 1670
Median Follow-up (days) 1976
# ── Create Survival Object ────────────────────────────────────
SurvObj_colon <- Surv(time  = colon_clean$time,
                      event = colon_clean$status)


# Fit Multiple Parametric Models
# ============================================================

fits <- list(
  Exponential  = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "exp"),
  Weibull      = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "weibull"),
  LogNormal    = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "lnorm"),
  LogLogistic  = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "llogis"),
  Gamma        = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "gamma"),
  GenGamma     = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "gengamma"),
  Gompertz     = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "gompertz")
)

# ── Table 3: Model Comparison ─────────────────────────────────
extract_params <- function(model_name, fit) {
  params     <- fit$res[, "est"]
  param_str  <- paste(
    paste(rownames(fit$res), round(params, 4), sep = " = "),
    collapse = "; "
  )
  data.frame(
    Model      = model_name,
    Parameters = param_str,
    LogLik     = round(as.numeric(logLik(fit)), 3),
    AIC        = round(AIC(fit), 3),
    BIC        = round(BIC(fit), 3),
    stringsAsFactors = FALSE
  )
}

tbl3_data <- do.call(rbind, mapply(extract_params,
                                   names(fits), fits,
                                   SIMPLIFY = FALSE))
tbl3_data <- tbl3_data[order(tbl3_data$AIC), ]
rownames(tbl3_data) <- NULL

kable(tbl3_data,
      caption = "Table 3: Parametric Model Comparison — MLE Parameters, Log-Likelihood, AIC & BIC",
      align   = "llrrr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") #%>%
Table 3: Parametric Model Comparison — MLE Parameters, Log-Likelihood, AIC & BIC
Model Parameters LogLik AIC BIC
GenGamma mu = 7.5454; sigma = 1.5753; Q = -0.4805 -4103.464 8212.927 8227.429
LogNormal meanlog = 7.7773; sdlog = 1.4318 -4106.571 8217.142 8226.810
LogLogistic shape = 1.2017; scale = 2309.7226 -4116.389 8236.779 8246.447
Gompertz shape = -3e-04; rate = 4e-04 -4123.578 8251.156 8260.825
Exponential rate = 3e-04 -4131.723 8265.445 8270.279
Gamma shape = 1.0504; rate = 3e-04 -4131.348 8266.696 8276.364
Weibull shape = 0.9991; scale = 3433.9201 -4131.722 8267.445 8277.113
  # column_spec(1, bold = TRUE) %>%
  # row_spec(1, bold = TRUE, color = "white", background = "#2C6E49")


# Model Selection
# ============================================================

best_model_name <- tbl3_data$Model[1]
best_model      <- fits[[best_model_name]]

cat("\n=== Best Model by AIC:", best_model_name, "===\n")
## 
## === Best Model by AIC: GenGamma ===
print(best_model)
## Call:
## flexsurvreg(formula = SurvObj_colon ~ 1, data = colon_clean, 
##     dist = "gengamma")
## 
## Estimates: 
##        est      L95%     U95%     se     
## mu      7.5454   7.3169   7.7739   0.1166
## sigma   1.5753   1.4382   1.7253   0.0731
## Q      -0.4805  -0.8550  -0.1061   0.1911
## 
## N = 929,  Events: 452,  Censored: 477
## Total time at risk: 1551389
## Log-likelihood = -4103.464, df = 3
## AIC = 8212.927
# ── Figure 1: AIC Bar Chart ───────────────────────────────────
aic_df        <- data.frame(Model = tbl3_data$Model, AIC = tbl3_data$AIC)
aic_df$Model  <- factor(aic_df$Model, levels = aic_df$Model[order(aic_df$AIC)])

p_aic <- ggplot(aic_df, aes(x = Model, y = AIC,
                             fill = Model == best_model_name)) +
  geom_bar(stat = "identity", width = 0.6, color = "black", linewidth = 0.3) +
  geom_text(aes(label = round(AIC, 1)), vjust = -0.4, size = 3.5, fontface = "bold") +
  scale_fill_manual(values = c("TRUE" = "#2C6E49", "FALSE" = "#A8DADC"),
                    guide = "none") +
  labs(#title    = "Figure 1: AIC Comparison Across Parametric Models",
       subtitle = paste("Best model:", best_model_name, "(lowest AIC, highlighted)"),
       x = "Model", y = "AIC") +
  theme_bw(base_size = 13) +
  theme(plot.title    = element_text(face = "bold"),
        axis.text.x   = element_text(angle = 20, hjust = 1))

print(p_aic)

# ── Figure 2: Log-Likelihood Dot Plot ────────────────────────
loglik_df        <- data.frame(Model = tbl3_data$Model, LogLik = tbl3_data$LogLik)
loglik_df$Model  <- factor(loglik_df$Model,
                            levels = loglik_df$Model[order(loglik_df$LogLik,
                                                           decreasing = TRUE)])

p_loglik <- ggplot(loglik_df, aes(x = LogLik, y = Model,
                                   color = Model == best_model_name)) +
  geom_point(size = 5) +
  geom_vline(xintercept = max(loglik_df$LogLik),
             linetype = "dashed", color = "#2C6E49") +
  geom_text(aes(label = round(LogLik, 1)), vjust = -1.2, size = 3.5) +
  scale_color_manual(values = c("TRUE" = "#2C6E49", "FALSE" = "black"),
                     guide = "none") +
  labs(#title    = "Figure 2: Log-Likelihood Comparison Across Parametric Models",
       subtitle = "Higher (less negative) = better fit",
       x = "Log-Likelihood", y = "Model") +
  theme_bw(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

print(p_loglik)

# ── Figure 3: Survival Curves — All Models ───────────────────
time_seq_all <- seq(0, max(colon_clean$time, na.rm = TRUE), by = 10)

colors_models <- c("#E63946","#457B9D","#2C6E49","#F4A261",
                   "#9B2226","#6A0572","#3D405B")
names(colors_models) <- names(fits)

surv_curves <- do.call(rbind, lapply(names(fits), function(m) {
  s <- summary(fits[[m]], t = time_seq_all, type = "survival", ci = FALSE)[[1]]
  data.frame(Time = s$time, Survival = s$est, Model = m)
}))

p_surv_compare <- ggplot(surv_curves, aes(x = Time, y = Survival, color = Model)) +
  geom_line(linewidth = 0.9) +
  scale_color_manual(values = colors_models) +
  labs(#title  = "Figure 3: Fitted Survival Curves — All Parametric Models",
       x      = "Time (days)",
       y      = "Survival Probability S(t)",
       color  = "Model") +
  theme_bw(base_size = 13) +
  theme(plot.title     = element_text(face = "bold"),
        legend.position = "right")

print(p_surv_compare)

# Life Functions of the Best Model
# ============================================================

time_seq <- seq(1, max(colon_clean$time, na.rm = TRUE), by = 10)

surv_best   <- summary(best_model, t = time_seq, type = "survival", ci = FALSE)[[1]]
haz_best    <- summary(best_model, t = time_seq, type = "hazard",   ci = FALSE)[[1]]

S_t <- surv_best$est
h_t <- haz_best$est
F_t <- 1 - S_t
f_t <- h_t * S_t


# ── Mean & Variance ─────────────────
dt     <- diff(time_seq)[1]
mean_T <- sum(S_t) * dt
ET2    <- 2 * sum(time_seq * S_t) * dt
var_T  <- ET2 - mean_T^2

cat(sprintf("\nMean survival time  : %.2f days (%.1f years)\n",
            mean_T, mean_T / 365))
## 
## Mean survival time  : 2130.40 days (5.8 years)
cat(sprintf("Variance            : %.2f days^2\n", var_T))
## Variance            : 1491721.66 days^2
cat(sprintf("Standard deviation  : %.2f days\n",  sqrt(var_T)))
## Standard deviation  : 1221.36 days
# ── Table 4: Life Functions at Selected Time Points ───────────
selected_times <- c(100, 200, 365, 500, 730, 1000, 1500, 2000, 2500, 3000)
lf_idx         <- sapply(selected_times,
                         function(tp) which.min(abs(time_seq - tp)))

tbl4_data <- data.frame(
  `Time (days)` = time_seq[lf_idx],
  `S(t)`        = round(S_t[lf_idx], 4),
  `h(t)`        = round(h_t[lf_idx], 6),
  `f(t)`        = round(f_t[lf_idx], 6),
  `F(t)`        = round(F_t[lf_idx], 4),
  check.names   = FALSE
)

kable(tbl4_data,
      caption = paste("Table 4: Life Functions at Selected Time Points —",
                      best_model_name, "Model"),
      align   = "rrrrr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") %>%
  add_header_above(c(" " = 1, "Survival" = 1, "Hazard" = 1,
                     "Density" = 1, "CDF" = 1)) %>%
  column_spec(1, bold = TRUE)
Table 4: Life Functions at Selected Time Points — GenGamma Model
Survival
Hazard
Density
CDF
Time (days) S(t) h(t) f(t) F(t)
101 0.9901 0.000229 0.000227 0.0099
201 0.9605 0.000354 0.000340 0.0395
361 0.9035 0.000394 0.000356 0.0965
501 0.8554 0.000385 0.000329 0.1446
731 0.7856 0.000354 0.000278 0.2144
1001 0.7177 0.000317 0.000227 0.2823
1501 0.6212 0.000263 0.000164 0.3788
2001 0.5500 0.000226 0.000124 0.4500
2501 0.4950 0.000198 0.000098 0.5050
3001 0.4509 0.000176 0.000079 0.5491
# ── Table 5: Mean & Variance ──────────────────────────────────
tbl5_data <- data.frame(
  Statistic = c("Mean Survival Time (days)",
                "Mean Survival Time (years)",
                "Variance (days\u00b2)",
                "Standard Deviation (days)"),
  Value     = c(round(mean_T,        2),
                round(mean_T / 365,  2),
                round(var_T,         2),
                round(sqrt(var_T),   2))
)

kable(tbl5_data,
      caption = paste("Table 5: Mean and Variance of Survival Time —",
                      best_model_name, "Model"),
      align   = "lr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") %>%
  column_spec(1, bold = TRUE)
Table 5: Mean and Variance of Survival Time — GenGamma Model
Statistic Value
Mean Survival Time (days) 2130.40
Mean Survival Time (years) 5.84
Variance (days²) 1491721.66
Standard Deviation (days) 1221.36
# ── Figure 4: 2×2 Life Function Plots ────────────────────────

df_life <- data.frame(Time = time_seq, S = S_t, h = h_t, f = f_t, F = F_t)

p_St <- ggplot(df_life, aes(x = Time, y = S)) +
  geom_line(color = "#457B9D", linewidth = 1.2) +
  labs(title = paste("Survival Function —", best_model_name),
       x = "Time (days)", y = "S(t)") +
  theme_bw(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 11))

p_ht <- ggplot(df_life, aes(x = Time, y = h)) +
  geom_line(color = "#E63946", linewidth = 1.2) +
  labs(title = paste("Hazard Function —", best_model_name),
       x = "Time (days)", y = "h(t)") +
  theme_bw(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 11))

p_ft <- ggplot(df_life, aes(x = Time, y = f)) +
  geom_line(color = "#2C6E49", linewidth = 1.2) +
  labs(title = paste("Density Function —", best_model_name),
       x = "Time (days)", y = "f(t)") +
  theme_bw(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 11))

p_Ft <- ggplot(df_life, aes(x = Time, y = F)) +
  geom_line(color = "#9B2226", linewidth = 1.2) +
  labs(title = paste("CDF —", best_model_name),
       x = "Time (days)", y = "F(t)") +
  theme_bw(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 11))

p_life_grid <- gridExtra::grid.arrange(p_St, p_ht, p_ft, p_Ft, nrow = 2)

# MLE Parameter Table & Interpretation
# ============================================================

# ── Table 6: MLE Parameter Estimates ─────────────────────────

param_raw <- as.data.frame(best_model$res)

tbl6_data <- data.frame(
  Parameter       = rownames(param_raw),
  Estimate        = round(param_raw$est,    4),
  `Std. Error`    = round(param_raw$se,     4),
  `95% CI Lower`  = round(param_raw$`L95%`, 4),
  `95% CI Upper`  = round(param_raw$`U95%`, 4),
  check.names     = FALSE
)

kable(tbl6_data,
      caption = paste("Table 6: MLE Parameter Estimates —",
                      best_model_name, "Model"),
      align   = "lrrrr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") %>%
  column_spec(1, bold = TRUE)
Table 6: MLE Parameter Estimates — GenGamma Model
Parameter Estimate Std. Error 95% CI Lower 95% CI Upper
mu 7.5454 0.1166 7.3169 7.7739
sigma 1.5753 0.0731 1.4382 1.7253
Q -0.4805 0.1911 -0.8550 -0.1061
cat("\n=== Full Model Summary ===\n")
## 
## === Full Model Summary ===
print(best_model)
## Call:
## flexsurvreg(formula = SurvObj_colon ~ 1, data = colon_clean, 
##     dist = "gengamma")
## 
## Estimates: 
##        est      L95%     U95%     se     
## mu      7.5454   7.3169   7.7739   0.1166
## sigma   1.5753   1.4382   1.7253   0.0731
## Q      -0.4805  -0.8550  -0.1061   0.1911
## 
## N = 929,  Events: 452,  Censored: 477
## Total time at risk: 1551389
## Log-likelihood = -4103.464, df = 3
## AIC = 8212.927
# ── Printed Interpretation ───────────────────────────────────
cat("\n========================================================\n")
## 
## ========================================================
cat("INTERPRETATION SUMMARY\n")
## INTERPRETATION SUMMARY
cat("========================================================\n")
## ========================================================
cat("Model     :", best_model_name, "\n")
## Model     : GenGamma
cat("Dataset   : Colon Cancer\n")
## Dataset   : Colon Cancer
cat("Patients  :", n_total, " | Events:", n_events,
    " | Censored:", n_censored, "\n\n")
## Patients  : 929  | Events: 452  | Censored: 477
cat("Shape parameter:\n")
## Shape parameter:
cat("  > 1 → Hazard INCREASES over time (aging / deterioration)\n")
##   > 1 → Hazard INCREASES over time (aging / deterioration)
cat("  < 1 → Hazard DECREASES over time (early risk dominates)\n")
##   < 1 → Hazard DECREASES over time (early risk dominates)
cat("  = 1 → Constant hazard (Exponential)\n\n")
##   = 1 → Constant hazard (Exponential)
cat("Scale parameter: sets the characteristic time scale of survival.\n\n")
## Scale parameter: sets the characteristic time scale of survival.
cat("Life Functions:\n")
## Life Functions:
cat("  S(t) : Probability of surviving beyond time t\n")
##   S(t) : Probability of surviving beyond time t
cat("  h(t) : Instantaneous risk of the event at time t\n")
##   h(t) : Instantaneous risk of the event at time t
cat("  f(t) : Probability density of the event near time t\n")
##   f(t) : Probability density of the event near time t
cat("  F(t) : Probability of having experienced the event by time t\n\n")
##   F(t) : Probability of having experienced the event by time t
cat(sprintf("Mean survival : %.2f days (%.1f years)\n", mean_T, mean_T / 365))
## Mean survival : 2130.40 days (5.8 years)
cat(sprintf("Std deviation : %.2f days\n", sqrt(var_T)))
## Std deviation : 1221.36 days
cat("========================================================\n")
## ========================================================
# Saving all figs & tables
# ============================================================

cat("\n\n=== SAVING OUTPUTS ===\n")
## 
## 
## === SAVING OUTPUTS ===
# ── Figures ──────────────────────────────────────────────────
ggsave(file.path(fig_dir, "Figure1_AIC_Comparison.png"),
       plot = p_aic, width = 8, height = 5, dpi = 300, bg = "white")
cat("Saved: Figure1_AIC_Comparison.png\n")
## Saved: Figure1_AIC_Comparison.png
ggsave(file.path(fig_dir, "Figure2_LogLikelihood_Comparison.png"),
       plot = p_loglik, width = 8, height = 5, dpi = 300, bg = "white")
cat("Saved: Figure2_LogLikelihood_Comparison.png\n")
## Saved: Figure2_LogLikelihood_Comparison.png
ggsave(file.path(fig_dir, "Figure3_Survival_Curves_AllModels.png"),
       plot = p_surv_compare, width = 9, height = 5, dpi = 300, bg = "white")
cat("Saved: Figure3_Survival_Curves_AllModels.png\n")
## Saved: Figure3_Survival_Curves_AllModels.png
ggsave(file.path(fig_dir, "Figure4_LifeFunctions_BestModel.png"),
       plot = p_life_grid, width = 10, height = 8, dpi = 300, bg = "white")
cat("Saved: Figure4_LifeFunctions_BestModel.png\n")
## Saved: Figure4_LifeFunctions_BestModel.png
# ── Tables as CSV ─────────────────────────────────────────────
write.csv(tbl1_data, file.path(tbl_dir, "Table1_Variable_Summary.csv"),  row.names = FALSE)
write.csv(tbl2_data, file.path(tbl_dir, "Table2_Sample_Summary.csv"),    row.names = FALSE)
write.csv(tbl3_data, file.path(tbl_dir, "Table3_Model_Comparison.csv"),  row.names = FALSE)
write.csv(tbl4_data, file.path(tbl_dir, "Table4_Life_Functions.csv"),    row.names = FALSE)
write.csv(tbl5_data, file.path(tbl_dir, "Table5_Mean_Variance.csv"),     row.names = FALSE)
write.csv(tbl6_data, file.path(tbl_dir, "Table6_MLE_Parameters.csv"),    row.names = FALSE)
cat("All CSV tables saved.\n")
## All CSV tables saved.
# ── Tables as HTML ───────────────────
save_kable(
  kable(tbl1_data,
        caption = "Table 1: Summary Statistics of Key Variables (Pre-Imputation)",
        align   = "lrrrrr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    column_spec(1, bold = TRUE),
  file = file.path(tbl_dir, "Table1_Variable_Summary.html")
)

save_kable(
  kable(tbl2_data,
        caption   = "Table 2: Sample Summary — Colon Cancer Dataset",
        col.names = c("Statistic", "Value"), align = "lr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    column_spec(1, bold = TRUE),
  file = file.path(tbl_dir, "Table2_Sample_Summary.html")
)

save_kable(
  kable(tbl3_data,
        caption = "Table 3: Parametric Model Comparison — MLE, Log-Likelihood, AIC & BIC",
        align   = "llrrr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    column_spec(1, bold = TRUE) %>%
    row_spec(1, bold = TRUE, color = "white", background = "#2C6E49"),
  file = file.path(tbl_dir, "Table3_Model_Comparison.html")
)

save_kable(
  kable(tbl4_data,
        caption = paste("Table 4: Life Functions at Selected Time Points —",
                        best_model_name, "Model"),
        align   = "rrrrr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    add_header_above(c(" " = 1, "Survival" = 1, "Hazard" = 1,
                       "Density" = 1, "CDF" = 1)) %>%
    column_spec(1, bold = TRUE),
  file = file.path(tbl_dir, "Table4_Life_Functions.html")
)

save_kable(
  kable(tbl5_data,
        caption = paste("Table 5: Mean and Variance of Survival Time —",
                        best_model_name, "Model"),
        align   = "lr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    column_spec(1, bold = TRUE),
  file = file.path(tbl_dir, "Table5_Mean_Variance.html")
)

save_kable(
  kable(tbl6_data,
        caption = paste("Table 6: MLE Parameter Estimates —",
                        best_model_name, "Model"),
        align   = "lrrrr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    column_spec(1, bold = TRUE),
  file = file.path(tbl_dir, "Table6_MLE_Parameters.html")
)

cat("All HTML tables saved.\n")
## All HTML tables saved.
# Final confrmation
# ============================================================
cat("\n========================================================\n")
## 
## ========================================================
cat("ALL OUTPUTS SAVED SUCCESSFULLY\n")
## ALL OUTPUTS SAVED SUCCESSFULLY
cat("========================================================\n")
## ========================================================
cat("Root folder :", file.path(base_dir, "outputs"), "\n\n")
## Root folder : /cloud/project/outputs
cat("FIGURES (", fig_dir, ")\n", sep = "")
## FIGURES (/cloud/project/outputs/figures)
cat("  Figure1_AIC_Comparison.png\n")
##   Figure1_AIC_Comparison.png
cat("  Figure2_LogLikelihood_Comparison.png\n")
##   Figure2_LogLikelihood_Comparison.png
cat("  Figure3_Survival_Curves_AllModels.png\n")
##   Figure3_Survival_Curves_AllModels.png
cat("  Figure4_LifeFunctions_BestModel.png\n\n")
##   Figure4_LifeFunctions_BestModel.png
cat("TABLES  (", tbl_dir, ")\n", sep = "")
## TABLES  (/cloud/project/outputs/tables)
cat("  Table1_Variable_Summary     .csv  .html\n")
##   Table1_Variable_Summary     .csv  .html
cat("  Table2_Sample_Summary       .csv  .html\n")
##   Table2_Sample_Summary       .csv  .html
cat("  Table3_Model_Comparison     .csv  .html\n")
##   Table3_Model_Comparison     .csv  .html
cat("  Table4_Life_Functions       .csv  .html\n")
##   Table4_Life_Functions       .csv  .html
cat("  Table5_Mean_Variance        .csv  .html\n")
##   Table5_Mean_Variance        .csv  .html
cat("  Table6_MLE_Parameters       .csv  .html\n")
##   Table6_MLE_Parameters       .csv  .html
cat("========================================================\n")
## ========================================================

```