SA.knit

# PARAMETRIC SURVIVAL ANALYSIS — GROUP 2
# Dataset : Colon Cancer (from survival package)
# Event   : Death (etype == 2)
# ============================================================

rm(list = ls())

# ============================================================
library(survival)
library(flexsurv)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

library(knitr)
library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

# ============================================================
# ── Saving outputs ─────────────

base_dir  <- getwd()
fig_dir   <- file.path(base_dir, "outputs", "figures")
tbl_dir   <- file.path(base_dir, "outputs", "tables")

dir.create(fig_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(tbl_dir, recursive = TRUE, showWarnings = FALSE)

cat("Figures will be saved to :", fig_dir, "\n")

## Figures will be saved to : /cloud/project/outputs/figures

cat("Tables  will be saved to :", tbl_dir, "\n\n")

## Tables  will be saved to : /cloud/project/outputs/tables

# Data Exploration
# ============================================================

colon <- survival::colon

colon_raw <- colon %>%
  filter(etype == 2) %>%
  distinct(id, .keep_all = TRUE)

cat("=== Dimensions (death records, one row per patient) ===\n")

## === Dimensions (death records, one row per patient) ===

dim(colon_raw)

## [1] 929  16

cat("\n=== Missing Values per Variable (before imputation) ===\n")

## 
## === Missing Values per Variable (before imputation) ===

mv_before <- colSums(is.na(colon_raw))
print(mv_before[mv_before > 0])

##  nodes differ 
##     18     23

# ── Table 1: Summary Statistics of Key Variables ────────────
tbl1_data <- data.frame(
  Variable  = c("Time (days)", "Status", "Age (years)", "Sex", "Nodes"),
  Min       = c(min(colon_raw$time,   na.rm = TRUE),
                min(colon_raw$status, na.rm = TRUE),
                min(colon_raw$age,    na.rm = TRUE),
                min(colon_raw$sex,    na.rm = TRUE),
                min(colon_raw$nodes,  na.rm = TRUE)),
  Max       = c(max(colon_raw$time,   na.rm = TRUE),
                max(colon_raw$status, na.rm = TRUE),
                max(colon_raw$age,    na.rm = TRUE),
                max(colon_raw$sex,    na.rm = TRUE),
                max(colon_raw$nodes,  na.rm = TRUE)),
  Mean      = round(c(mean(colon_raw$time,   na.rm = TRUE),
                      mean(colon_raw$status, na.rm = TRUE),
                      mean(colon_raw$age,    na.rm = TRUE),
                      mean(colon_raw$sex,    na.rm = TRUE),
                      mean(colon_raw$nodes,  na.rm = TRUE)), 2),
  Median    = round(c(median(colon_raw$time,   na.rm = TRUE),
                      median(colon_raw$status, na.rm = TRUE),
                      median(colon_raw$age,    na.rm = TRUE),
                      median(colon_raw$sex,    na.rm = TRUE),
                      median(colon_raw$nodes,  na.rm = TRUE)), 2),
  Missing   = c(sum(is.na(colon_raw$time)),
                sum(is.na(colon_raw$status)),
                sum(is.na(colon_raw$age)),
                sum(is.na(colon_raw$sex)),
                sum(is.na(colon_raw$nodes)))
)

kable(tbl1_data,
      caption = "Table 1: Summary Statistics of Key Variables (Pre-Imputation)",
      align   = "lrrrrr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") %>%
  column_spec(1, bold = TRUE)

Table 1: Summary Statistics of Key Variables (Pre-Imputation)
Variable	Min	Max	Mean	Median	Missing
Time (days)	23	3329	1669.96	1976	0
Status	0	1	0.49	0	0
Age (years)	18	85	59.75	61	0
Sex	0	1	0.52	1	0
Nodes	0	33	3.66	2	18

# Data Preparation — LOCF and Survival Object Definition
# ============================================================

# ── Handling missings ──────────────────
locf <- function(x) {
  for (i in seq_along(x)) {
    if (is.na(x[i]) && i > 1) x[i] <- x[i - 1]
  }

  for (i in rev(seq_along(x))) {
    if (is.na(x[i]) && i < length(x)) x[i] <- x[i + 1]
  }
  return(x)
}

colon_clean <- colon_raw %>%
  arrange(id) %>%
  mutate(nodes = locf(nodes))

cat("=== Missing Values After LOCF Imputation ===\n")

## === Missing Values After LOCF Imputation ===

print(colSums(is.na(colon_clean[, c("time","status","age","sex","nodes")])))

##   time status    age    sex  nodes 
##      0      0      0      0      0

# ── Table 2: Sample Summary ───────────────────────────────────
n_total    <- nrow(colon_clean)
n_events   <- sum(colon_clean$status == 1, na.rm = TRUE)
n_censored <- sum(colon_clean$status == 0, na.rm = TRUE)
pct_event  <- round(100 * n_events  / n_total, 1)
pct_cens   <- round(100 * n_censored / n_total, 1)

tbl2_data <- data.frame(
  Statistic = c("Total Patients",
                "Events (Deaths)",
                "Censored Observations",
                "Percentage Events (%)",
                "Percentage Censored (%)",
                "Minimum Follow-up (days)",
                "Maximum Follow-up (days)",
                "Mean Follow-up (days)",
                "Median Follow-up (days)"),
  Value     = c(n_total,
                n_events,
                n_censored,
                paste0(pct_event,  "%"),
                paste0(pct_cens,   "%"),
                min(colon_clean$time,    na.rm = TRUE),
                max(colon_clean$time,    na.rm = TRUE),
                round(mean(colon_clean$time,   na.rm = TRUE), 1),
                round(median(colon_clean$time, na.rm = TRUE), 1))
)

kable(tbl2_data,
      caption   = "Table 2: Sample Summary — Colon Cancer Dataset (Post-Cleaning)",
      col.names = c("Statistic", "Value"),
      align     = "lr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") %>%
  column_spec(1, bold = TRUE)

Table 2: Sample Summary — Colon Cancer Dataset (Post-Cleaning)
Statistic	Value
Total Patients	929
Events (Deaths)	452
Censored Observations	477
Percentage Events (%)	48.7%
Percentage Censored (%)	51.3%
Minimum Follow-up (days)	23
Maximum Follow-up (days)	3329
Mean Follow-up (days)	1670
Median Follow-up (days)	1976

# ── Create Survival Object ────────────────────────────────────
SurvObj_colon <- Surv(time  = colon_clean$time,
                      event = colon_clean$status)


# Fit Multiple Parametric Models
# ============================================================

fits <- list(
  Exponential  = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "exp"),
  Weibull      = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "weibull"),
  LogNormal    = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "lnorm"),
  LogLogistic  = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "llogis"),
  Gamma        = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "gamma"),
  GenGamma     = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "gengamma"),
  Gompertz     = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "gompertz")
)

# ── Table 3: Model Comparison ─────────────────────────────────
extract_params <- function(model_name, fit) {
  params     <- fit$res[, "est"]
  param_str  <- paste(
    paste(rownames(fit$res), round(params, 4), sep = " = "),
    collapse = "; "
  )
  data.frame(
    Model      = model_name,
    Parameters = param_str,
    LogLik     = round(as.numeric(logLik(fit)), 3),
    AIC        = round(AIC(fit), 3),
    BIC        = round(BIC(fit), 3),
    stringsAsFactors = FALSE
  )
}

tbl3_data <- do.call(rbind, mapply(extract_params,
                                   names(fits), fits,
                                   SIMPLIFY = FALSE))
tbl3_data <- tbl3_data[order(tbl3_data$AIC), ]
rownames(tbl3_data) <- NULL

kable(tbl3_data,
      caption = "Table 3: Parametric Model Comparison — MLE Parameters, Log-Likelihood, AIC & BIC",
      align   = "llrrr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") #%>%

Table 3: Parametric Model Comparison — MLE Parameters, Log-Likelihood, AIC & BIC
Model	Parameters	LogLik	AIC	BIC
GenGamma	mu = 7.5454; sigma = 1.5753; Q = -0.4805	-4103.464	8212.927	8227.429
LogNormal	meanlog = 7.7773; sdlog = 1.4318	-4106.571	8217.142	8226.810
LogLogistic	shape = 1.2017; scale = 2309.7226	-4116.389	8236.779	8246.447
Gompertz	shape = -3e-04; rate = 4e-04	-4123.578	8251.156	8260.825
Exponential	rate = 3e-04	-4131.723	8265.445	8270.279
Gamma	shape = 1.0504; rate = 3e-04	-4131.348	8266.696	8276.364
Weibull	shape = 0.9991; scale = 3433.9201	-4131.722	8267.445	8277.113

  # column_spec(1, bold = TRUE) %>%
  # row_spec(1, bold = TRUE, color = "white", background = "#2C6E49")


# Model Selection
# ============================================================

best_model_name <- tbl3_data$Model[1]
best_model      <- fits[[best_model_name]]

cat("\n=== Best Model by AIC:", best_model_name, "===\n")

## 
## === Best Model by AIC: GenGamma ===

print(best_model)

## Call:
## flexsurvreg(formula = SurvObj_colon ~ 1, data = colon_clean, 
##     dist = "gengamma")
## 
## Estimates: 
##        est      L95%     U95%     se     
## mu      7.5454   7.3169   7.7739   0.1166
## sigma   1.5753   1.4382   1.7253   0.0731
## Q      -0.4805  -0.8550  -0.1061   0.1911
## 
## N = 929,  Events: 452,  Censored: 477
## Total time at risk: 1551389
## Log-likelihood = -4103.464, df = 3
## AIC = 8212.927

# ── Figure 1: AIC Bar Chart ───────────────────────────────────
aic_df        <- data.frame(Model = tbl3_data$Model, AIC = tbl3_data$AIC)
aic_df$Model  <- factor(aic_df$Model, levels = aic_df$Model[order(aic_df$AIC)])

p_aic <- ggplot(aic_df, aes(x = Model, y = AIC,
                             fill = Model == best_model_name)) +
  geom_bar(stat = "identity", width = 0.6, color = "black", linewidth = 0.3) +
  geom_text(aes(label = round(AIC, 1)), vjust = -0.4, size = 3.5, fontface = "bold") +
  scale_fill_manual(values = c("TRUE" = "#2C6E49", "FALSE" = "#A8DADC"),
                    guide = "none") +
  labs(#title    = "Figure 1: AIC Comparison Across Parametric Models",
       subtitle = paste("Best model:", best_model_name, "(lowest AIC, highlighted)"),
       x = "Model", y = "AIC") +
  theme_bw(base_size = 13) +
  theme(plot.title    = element_text(face = "bold"),
        axis.text.x   = element_text(angle = 20, hjust = 1))

print(p_aic)

# ── Figure 2: Log-Likelihood Dot Plot ────────────────────────
loglik_df        <- data.frame(Model = tbl3_data$Model, LogLik = tbl3_data$LogLik)
loglik_df$Model  <- factor(loglik_df$Model,
                            levels = loglik_df$Model[order(loglik_df$LogLik,
                                                           decreasing = TRUE)])

p_loglik <- ggplot(loglik_df, aes(x = LogLik, y = Model,
                                   color = Model == best_model_name)) +
  geom_point(size = 5) +
  geom_vline(xintercept = max(loglik_df$LogLik),
             linetype = "dashed", color = "#2C6E49") +
  geom_text(aes(label = round(LogLik, 1)), vjust = -1.2, size = 3.5) +
  scale_color_manual(values = c("TRUE" = "#2C6E49", "FALSE" = "black"),
                     guide = "none") +
  labs(#title    = "Figure 2: Log-Likelihood Comparison Across Parametric Models",
       subtitle = "Higher (less negative) = better fit",
       x = "Log-Likelihood", y = "Model") +
  theme_bw(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

print(p_loglik)

# ── Figure 3: Survival Curves — All Models ───────────────────
time_seq_all <- seq(0, max(colon_clean$time, na.rm = TRUE), by = 10)

colors_models <- c("#E63946","#457B9D","#2C6E49","#F4A261",
                   "#9B2226","#6A0572","#3D405B")
names(colors_models) <- names(fits)

surv_curves <- do.call(rbind, lapply(names(fits), function(m) {
  s <- summary(fits[[m]], t = time_seq_all, type = "survival", ci = FALSE)[[1]]
  data.frame(Time = s$time, Survival = s$est, Model = m)
}))

p_surv_compare <- ggplot(surv_curves, aes(x = Time, y = Survival, color = Model)) +
  geom_line(linewidth = 0.9) +
  scale_color_manual(values = colors_models) +
  labs(#title  = "Figure 3: Fitted Survival Curves — All Parametric Models",
       x      = "Time (days)",
       y      = "Survival Probability S(t)",
       color  = "Model") +
  theme_bw(base_size = 13) +
  theme(plot.title     = element_text(face = "bold"),
        legend.position = "right")

print(p_surv_compare)

# Life Functions of the Best Model
# ============================================================

time_seq <- seq(1, max(colon_clean$time, na.rm = TRUE), by = 10)

surv_best   <- summary(best_model, t = time_seq, type = "survival", ci = FALSE)[[1]]
haz_best    <- summary(best_model, t = time_seq, type = "hazard",   ci = FALSE)[[1]]

S_t <- surv_best$est
h_t <- haz_best$est
F_t <- 1 - S_t
f_t <- h_t * S_t


# ── Mean & Variance ─────────────────
dt     <- diff(time_seq)[1]
mean_T <- sum(S_t) * dt
ET2    <- 2 * sum(time_seq * S_t) * dt
var_T  <- ET2 - mean_T^2

cat(sprintf("\nMean survival time  : %.2f days (%.1f years)\n",
            mean_T, mean_T / 365))

## 
## Mean survival time  : 2130.40 days (5.8 years)

cat(sprintf("Variance            : %.2f days^2\n", var_T))

## Variance            : 1491721.66 days^2

cat(sprintf("Standard deviation  : %.2f days\n",  sqrt(var_T)))

## Standard deviation  : 1221.36 days

# ── Table 4: Life Functions at Selected Time Points ───────────
selected_times <- c(100, 200, 365, 500, 730, 1000, 1500, 2000, 2500, 3000)
lf_idx         <- sapply(selected_times,
                         function(tp) which.min(abs(time_seq - tp)))

tbl4_data <- data.frame(
  `Time (days)` = time_seq[lf_idx],
  `S(t)`        = round(S_t[lf_idx], 4),
  `h(t)`        = round(h_t[lf_idx], 6),
  `f(t)`        = round(f_t[lf_idx], 6),
  `F(t)`        = round(F_t[lf_idx], 4),
  check.names   = FALSE
)

kable(tbl4_data,
      caption = paste("Table 4: Life Functions at Selected Time Points —",
                      best_model_name, "Model"),
      align   = "rrrrr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") %>%
  add_header_above(c(" " = 1, "Survival" = 1, "Hazard" = 1,
                     "Density" = 1, "CDF" = 1)) %>%
  column_spec(1, bold = TRUE)

Table 4: Life Functions at Selected Time Points — GenGamma Model
	Survival	Hazard	Density	CDF
Time (days)	S(t)	h(t)	f(t)	F(t)
101	0.9901	0.000229	0.000227	0.0099
201	0.9605	0.000354	0.000340	0.0395
361	0.9035	0.000394	0.000356	0.0965
501	0.8554	0.000385	0.000329	0.1446
731	0.7856	0.000354	0.000278	0.2144
1001	0.7177	0.000317	0.000227	0.2823
1501	0.6212	0.000263	0.000164	0.3788
2001	0.5500	0.000226	0.000124	0.4500
2501	0.4950	0.000198	0.000098	0.5050
3001	0.4509	0.000176	0.000079	0.5491

# ── Table 5: Mean & Variance ──────────────────────────────────
tbl5_data <- data.frame(
  Statistic = c("Mean Survival Time (days)",
                "Mean Survival Time (years)",
                "Variance (days\u00b2)",
                "Standard Deviation (days)"),
  Value     = c(round(mean_T,        2),
                round(mean_T / 365,  2),
                round(var_T,         2),
                round(sqrt(var_T),   2))
)

kable(tbl5_data,
      caption = paste("Table 5: Mean and Variance of Survival Time —",
                      best_model_name, "Model"),
      align   = "lr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") %>%
  column_spec(1, bold = TRUE)

Table 5: Mean and Variance of Survival Time — GenGamma Model
Statistic	Value
Mean Survival Time (days)	2130.40
Mean Survival Time (years)	5.84
Variance (days²)	1491721.66
Standard Deviation (days)	1221.36

# ── Figure 4: 2×2 Life Function Plots ────────────────────────

df_life <- data.frame(Time = time_seq, S = S_t, h = h_t, f = f_t, F = F_t)

p_St <- ggplot(df_life, aes(x = Time, y = S)) +
  geom_line(color = "#457B9D", linewidth = 1.2) +
  labs(title = paste("Survival Function —", best_model_name),
       x = "Time (days)", y = "S(t)") +
  theme_bw(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 11))

p_ht <- ggplot(df_life, aes(x = Time, y = h)) +
  geom_line(color = "#E63946", linewidth = 1.2) +
  labs(title = paste("Hazard Function —", best_model_name),
       x = "Time (days)", y = "h(t)") +
  theme_bw(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 11))

p_ft <- ggplot(df_life, aes(x = Time, y = f)) +
  geom_line(color = "#2C6E49", linewidth = 1.2) +
  labs(title = paste("Density Function —", best_model_name),
       x = "Time (days)", y = "f(t)") +
  theme_bw(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 11))

p_Ft <- ggplot(df_life, aes(x = Time, y = F)) +
  geom_line(color = "#9B2226", linewidth = 1.2) +
  labs(title = paste("CDF —", best_model_name),
       x = "Time (days)", y = "F(t)") +
  theme_bw(base_size = 12) +
  theme(plot.title = element_text(face = "bold", size = 11))

p_life_grid <- gridExtra::grid.arrange(p_St, p_ht, p_ft, p_Ft, nrow = 2)

# MLE Parameter Table & Interpretation
# ============================================================

# ── Table 6: MLE Parameter Estimates ─────────────────────────

param_raw <- as.data.frame(best_model$res)

tbl6_data <- data.frame(
  Parameter       = rownames(param_raw),
  Estimate        = round(param_raw$est,    4),
  `Std. Error`    = round(param_raw$se,     4),
  `95% CI Lower`  = round(param_raw$`L95%`, 4),
  `95% CI Upper`  = round(param_raw$`U95%`, 4),
  check.names     = FALSE
)

kable(tbl6_data,
      caption = paste("Table 6: MLE Parameter Estimates —",
                      best_model_name, "Model"),
      align   = "lrrrr") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, position = "center") %>%
  column_spec(1, bold = TRUE)

Table 6: MLE Parameter Estimates — GenGamma Model
Parameter	Estimate	Std. Error	95% CI Lower	95% CI Upper
mu	7.5454	0.1166	7.3169	7.7739
sigma	1.5753	0.0731	1.4382	1.7253
Q	-0.4805	0.1911	-0.8550	-0.1061

cat("\n=== Full Model Summary ===\n")

## 
## === Full Model Summary ===

print(best_model)

## Call:
## flexsurvreg(formula = SurvObj_colon ~ 1, data = colon_clean, 
##     dist = "gengamma")
## 
## Estimates: 
##        est      L95%     U95%     se     
## mu      7.5454   7.3169   7.7739   0.1166
## sigma   1.5753   1.4382   1.7253   0.0731
## Q      -0.4805  -0.8550  -0.1061   0.1911
## 
## N = 929,  Events: 452,  Censored: 477
## Total time at risk: 1551389
## Log-likelihood = -4103.464, df = 3
## AIC = 8212.927

# ── Printed Interpretation ───────────────────────────────────
cat("\n========================================================\n")

## 
## ========================================================

cat("INTERPRETATION SUMMARY\n")

## INTERPRETATION SUMMARY

cat("========================================================\n")

## ========================================================

cat("Model     :", best_model_name, "\n")

## Model     : GenGamma

cat("Dataset   : Colon Cancer\n")

## Dataset   : Colon Cancer

cat("Patients  :", n_total, " | Events:", n_events,
    " | Censored:", n_censored, "\n\n")

## Patients  : 929  | Events: 452  | Censored: 477

cat("Shape parameter:\n")

## Shape parameter:

cat("  > 1 → Hazard INCREASES over time (aging / deterioration)\n")

##   > 1 → Hazard INCREASES over time (aging / deterioration)

cat("  < 1 → Hazard DECREASES over time (early risk dominates)\n")

##   < 1 → Hazard DECREASES over time (early risk dominates)

cat("  = 1 → Constant hazard (Exponential)\n\n")

##   = 1 → Constant hazard (Exponential)

cat("Scale parameter: sets the characteristic time scale of survival.\n\n")

## Scale parameter: sets the characteristic time scale of survival.

cat("Life Functions:\n")

## Life Functions:

cat("  S(t) : Probability of surviving beyond time t\n")

##   S(t) : Probability of surviving beyond time t

cat("  h(t) : Instantaneous risk of the event at time t\n")

##   h(t) : Instantaneous risk of the event at time t

cat("  f(t) : Probability density of the event near time t\n")

##   f(t) : Probability density of the event near time t

cat("  F(t) : Probability of having experienced the event by time t\n\n")

##   F(t) : Probability of having experienced the event by time t

cat(sprintf("Mean survival : %.2f days (%.1f years)\n", mean_T, mean_T / 365))

## Mean survival : 2130.40 days (5.8 years)

cat(sprintf("Std deviation : %.2f days\n", sqrt(var_T)))

## Std deviation : 1221.36 days

cat("========================================================\n")

## ========================================================

# Saving all figs & tables
# ============================================================

cat("\n\n=== SAVING OUTPUTS ===\n")

## 
## 
## === SAVING OUTPUTS ===

# ── Figures ──────────────────────────────────────────────────
ggsave(file.path(fig_dir, "Figure1_AIC_Comparison.png"),
       plot = p_aic, width = 8, height = 5, dpi = 300, bg = "white")
cat("Saved: Figure1_AIC_Comparison.png\n")

## Saved: Figure1_AIC_Comparison.png

ggsave(file.path(fig_dir, "Figure2_LogLikelihood_Comparison.png"),
       plot = p_loglik, width = 8, height = 5, dpi = 300, bg = "white")
cat("Saved: Figure2_LogLikelihood_Comparison.png\n")

## Saved: Figure2_LogLikelihood_Comparison.png

ggsave(file.path(fig_dir, "Figure3_Survival_Curves_AllModels.png"),
       plot = p_surv_compare, width = 9, height = 5, dpi = 300, bg = "white")
cat("Saved: Figure3_Survival_Curves_AllModels.png\n")

## Saved: Figure3_Survival_Curves_AllModels.png

ggsave(file.path(fig_dir, "Figure4_LifeFunctions_BestModel.png"),
       plot = p_life_grid, width = 10, height = 8, dpi = 300, bg = "white")
cat("Saved: Figure4_LifeFunctions_BestModel.png\n")

## Saved: Figure4_LifeFunctions_BestModel.png

# ── Tables as CSV ─────────────────────────────────────────────
write.csv(tbl1_data, file.path(tbl_dir, "Table1_Variable_Summary.csv"),  row.names = FALSE)
write.csv(tbl2_data, file.path(tbl_dir, "Table2_Sample_Summary.csv"),    row.names = FALSE)
write.csv(tbl3_data, file.path(tbl_dir, "Table3_Model_Comparison.csv"),  row.names = FALSE)
write.csv(tbl4_data, file.path(tbl_dir, "Table4_Life_Functions.csv"),    row.names = FALSE)
write.csv(tbl5_data, file.path(tbl_dir, "Table5_Mean_Variance.csv"),     row.names = FALSE)
write.csv(tbl6_data, file.path(tbl_dir, "Table6_MLE_Parameters.csv"),    row.names = FALSE)
cat("All CSV tables saved.\n")

## All CSV tables saved.

# ── Tables as HTML ───────────────────
save_kable(
  kable(tbl1_data,
        caption = "Table 1: Summary Statistics of Key Variables (Pre-Imputation)",
        align   = "lrrrrr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    column_spec(1, bold = TRUE),
  file = file.path(tbl_dir, "Table1_Variable_Summary.html")
)

save_kable(
  kable(tbl2_data,
        caption   = "Table 2: Sample Summary — Colon Cancer Dataset",
        col.names = c("Statistic", "Value"), align = "lr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    column_spec(1, bold = TRUE),
  file = file.path(tbl_dir, "Table2_Sample_Summary.html")
)

save_kable(
  kable(tbl3_data,
        caption = "Table 3: Parametric Model Comparison — MLE, Log-Likelihood, AIC & BIC",
        align   = "llrrr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    column_spec(1, bold = TRUE) %>%
    row_spec(1, bold = TRUE, color = "white", background = "#2C6E49"),
  file = file.path(tbl_dir, "Table3_Model_Comparison.html")
)

save_kable(
  kable(tbl4_data,
        caption = paste("Table 4: Life Functions at Selected Time Points —",
                        best_model_name, "Model"),
        align   = "rrrrr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    add_header_above(c(" " = 1, "Survival" = 1, "Hazard" = 1,
                       "Density" = 1, "CDF" = 1)) %>%
    column_spec(1, bold = TRUE),
  file = file.path(tbl_dir, "Table4_Life_Functions.html")
)

save_kable(
  kable(tbl5_data,
        caption = paste("Table 5: Mean and Variance of Survival Time —",
                        best_model_name, "Model"),
        align   = "lr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    column_spec(1, bold = TRUE),
  file = file.path(tbl_dir, "Table5_Mean_Variance.html")
)

save_kable(
  kable(tbl6_data,
        caption = paste("Table 6: MLE Parameter Estimates —",
                        best_model_name, "Model"),
        align   = "lrrrr") %>%
    kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                  full_width = FALSE, position = "center") %>%
    column_spec(1, bold = TRUE),
  file = file.path(tbl_dir, "Table6_MLE_Parameters.html")
)

cat("All HTML tables saved.\n")

## All HTML tables saved.

# Final confrmation
# ============================================================
cat("\n========================================================\n")

## 
## ========================================================

cat("ALL OUTPUTS SAVED SUCCESSFULLY\n")

## ALL OUTPUTS SAVED SUCCESSFULLY

cat("========================================================\n")

## ========================================================

cat("Root folder :", file.path(base_dir, "outputs"), "\n\n")

## Root folder : /cloud/project/outputs

cat("FIGURES (", fig_dir, ")\n", sep = "")

## FIGURES (/cloud/project/outputs/figures)

cat("  Figure1_AIC_Comparison.png\n")

##   Figure1_AIC_Comparison.png

cat("  Figure2_LogLikelihood_Comparison.png\n")

##   Figure2_LogLikelihood_Comparison.png

cat("  Figure3_Survival_Curves_AllModels.png\n")

##   Figure3_Survival_Curves_AllModels.png

cat("  Figure4_LifeFunctions_BestModel.png\n\n")

##   Figure4_LifeFunctions_BestModel.png

cat("TABLES  (", tbl_dir, ")\n", sep = "")

## TABLES  (/cloud/project/outputs/tables)

cat("  Table1_Variable_Summary     .csv  .html\n")

##   Table1_Variable_Summary     .csv  .html

cat("  Table2_Sample_Summary       .csv  .html\n")

##   Table2_Sample_Summary       .csv  .html

cat("  Table3_Model_Comparison     .csv  .html\n")

##   Table3_Model_Comparison     .csv  .html

cat("  Table4_Life_Functions       .csv  .html\n")

##   Table4_Life_Functions       .csv  .html

cat("  Table5_Mean_Variance        .csv  .html\n")

##   Table5_Mean_Variance        .csv  .html

cat("  Table6_MLE_Parameters       .csv  .html\n")

##   Table6_MLE_Parameters       .csv  .html

cat("========================================================\n")

## ========================================================

```