# PARAMETRIC SURVIVAL ANALYSIS — GROUP 2
# Dataset : Colon Cancer (from survival package)
# Event : Death (etype == 2)
# ============================================================
rm(list = ls())
# ============================================================
library(survival)
library(flexsurv)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# ============================================================
# ── Saving outputs ─────────────
base_dir <- getwd()
fig_dir <- file.path(base_dir, "outputs", "figures")
tbl_dir <- file.path(base_dir, "outputs", "tables")
dir.create(fig_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(tbl_dir, recursive = TRUE, showWarnings = FALSE)
cat("Figures will be saved to :", fig_dir, "\n")
## Figures will be saved to : /cloud/project/outputs/figures
cat("Tables will be saved to :", tbl_dir, "\n\n")
## Tables will be saved to : /cloud/project/outputs/tables
# Data Exploration
# ============================================================
colon <- survival::colon
colon_raw <- colon %>%
filter(etype == 2) %>%
distinct(id, .keep_all = TRUE)
cat("=== Dimensions (death records, one row per patient) ===\n")
## === Dimensions (death records, one row per patient) ===
dim(colon_raw)
## [1] 929 16
cat("\n=== Missing Values per Variable (before imputation) ===\n")
##
## === Missing Values per Variable (before imputation) ===
mv_before <- colSums(is.na(colon_raw))
print(mv_before[mv_before > 0])
## nodes differ
## 18 23
# ── Table 1: Summary Statistics of Key Variables ────────────
tbl1_data <- data.frame(
Variable = c("Time (days)", "Status", "Age (years)", "Sex", "Nodes"),
Min = c(min(colon_raw$time, na.rm = TRUE),
min(colon_raw$status, na.rm = TRUE),
min(colon_raw$age, na.rm = TRUE),
min(colon_raw$sex, na.rm = TRUE),
min(colon_raw$nodes, na.rm = TRUE)),
Max = c(max(colon_raw$time, na.rm = TRUE),
max(colon_raw$status, na.rm = TRUE),
max(colon_raw$age, na.rm = TRUE),
max(colon_raw$sex, na.rm = TRUE),
max(colon_raw$nodes, na.rm = TRUE)),
Mean = round(c(mean(colon_raw$time, na.rm = TRUE),
mean(colon_raw$status, na.rm = TRUE),
mean(colon_raw$age, na.rm = TRUE),
mean(colon_raw$sex, na.rm = TRUE),
mean(colon_raw$nodes, na.rm = TRUE)), 2),
Median = round(c(median(colon_raw$time, na.rm = TRUE),
median(colon_raw$status, na.rm = TRUE),
median(colon_raw$age, na.rm = TRUE),
median(colon_raw$sex, na.rm = TRUE),
median(colon_raw$nodes, na.rm = TRUE)), 2),
Missing = c(sum(is.na(colon_raw$time)),
sum(is.na(colon_raw$status)),
sum(is.na(colon_raw$age)),
sum(is.na(colon_raw$sex)),
sum(is.na(colon_raw$nodes)))
)
kable(tbl1_data,
caption = "Table 1: Summary Statistics of Key Variables (Pre-Imputation)",
align = "lrrrrr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE)
Table 1: Summary Statistics of Key Variables (Pre-Imputation)
|
Variable
|
Min
|
Max
|
Mean
|
Median
|
Missing
|
|
Time (days)
|
23
|
3329
|
1669.96
|
1976
|
0
|
|
Status
|
0
|
1
|
0.49
|
0
|
0
|
|
Age (years)
|
18
|
85
|
59.75
|
61
|
0
|
|
Sex
|
0
|
1
|
0.52
|
1
|
0
|
|
Nodes
|
0
|
33
|
3.66
|
2
|
18
|
# Data Preparation — LOCF and Survival Object Definition
# ============================================================
# ── Handling missings ──────────────────
locf <- function(x) {
for (i in seq_along(x)) {
if (is.na(x[i]) && i > 1) x[i] <- x[i - 1]
}
for (i in rev(seq_along(x))) {
if (is.na(x[i]) && i < length(x)) x[i] <- x[i + 1]
}
return(x)
}
colon_clean <- colon_raw %>%
arrange(id) %>%
mutate(nodes = locf(nodes))
cat("=== Missing Values After LOCF Imputation ===\n")
## === Missing Values After LOCF Imputation ===
print(colSums(is.na(colon_clean[, c("time","status","age","sex","nodes")])))
## time status age sex nodes
## 0 0 0 0 0
# ── Table 2: Sample Summary ───────────────────────────────────
n_total <- nrow(colon_clean)
n_events <- sum(colon_clean$status == 1, na.rm = TRUE)
n_censored <- sum(colon_clean$status == 0, na.rm = TRUE)
pct_event <- round(100 * n_events / n_total, 1)
pct_cens <- round(100 * n_censored / n_total, 1)
tbl2_data <- data.frame(
Statistic = c("Total Patients",
"Events (Deaths)",
"Censored Observations",
"Percentage Events (%)",
"Percentage Censored (%)",
"Minimum Follow-up (days)",
"Maximum Follow-up (days)",
"Mean Follow-up (days)",
"Median Follow-up (days)"),
Value = c(n_total,
n_events,
n_censored,
paste0(pct_event, "%"),
paste0(pct_cens, "%"),
min(colon_clean$time, na.rm = TRUE),
max(colon_clean$time, na.rm = TRUE),
round(mean(colon_clean$time, na.rm = TRUE), 1),
round(median(colon_clean$time, na.rm = TRUE), 1))
)
kable(tbl2_data,
caption = "Table 2: Sample Summary — Colon Cancer Dataset (Post-Cleaning)",
col.names = c("Statistic", "Value"),
align = "lr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE)
Table 2: Sample Summary — Colon Cancer Dataset (Post-Cleaning)
|
Statistic
|
Value
|
|
Total Patients
|
929
|
|
Events (Deaths)
|
452
|
|
Censored Observations
|
477
|
|
Percentage Events (%)
|
48.7%
|
|
Percentage Censored (%)
|
51.3%
|
|
Minimum Follow-up (days)
|
23
|
|
Maximum Follow-up (days)
|
3329
|
|
Mean Follow-up (days)
|
1670
|
|
Median Follow-up (days)
|
1976
|
# ── Create Survival Object ────────────────────────────────────
SurvObj_colon <- Surv(time = colon_clean$time,
event = colon_clean$status)
# Fit Multiple Parametric Models
# ============================================================
fits <- list(
Exponential = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "exp"),
Weibull = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "weibull"),
LogNormal = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "lnorm"),
LogLogistic = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "llogis"),
Gamma = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "gamma"),
GenGamma = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "gengamma"),
Gompertz = flexsurvreg(SurvObj_colon ~ 1, data = colon_clean, dist = "gompertz")
)
# ── Table 3: Model Comparison ─────────────────────────────────
extract_params <- function(model_name, fit) {
params <- fit$res[, "est"]
param_str <- paste(
paste(rownames(fit$res), round(params, 4), sep = " = "),
collapse = "; "
)
data.frame(
Model = model_name,
Parameters = param_str,
LogLik = round(as.numeric(logLik(fit)), 3),
AIC = round(AIC(fit), 3),
BIC = round(BIC(fit), 3),
stringsAsFactors = FALSE
)
}
tbl3_data <- do.call(rbind, mapply(extract_params,
names(fits), fits,
SIMPLIFY = FALSE))
tbl3_data <- tbl3_data[order(tbl3_data$AIC), ]
rownames(tbl3_data) <- NULL
kable(tbl3_data,
caption = "Table 3: Parametric Model Comparison — MLE Parameters, Log-Likelihood, AIC & BIC",
align = "llrrr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") #%>%
Table 3: Parametric Model Comparison — MLE Parameters, Log-Likelihood,
AIC & BIC
|
Model
|
Parameters
|
LogLik
|
AIC
|
BIC
|
|
GenGamma
|
mu = 7.5454; sigma = 1.5753; Q = -0.4805
|
-4103.464
|
8212.927
|
8227.429
|
|
LogNormal
|
meanlog = 7.7773; sdlog = 1.4318
|
-4106.571
|
8217.142
|
8226.810
|
|
LogLogistic
|
shape = 1.2017; scale = 2309.7226
|
-4116.389
|
8236.779
|
8246.447
|
|
Gompertz
|
shape = -3e-04; rate = 4e-04
|
-4123.578
|
8251.156
|
8260.825
|
|
Exponential
|
rate = 3e-04
|
-4131.723
|
8265.445
|
8270.279
|
|
Gamma
|
shape = 1.0504; rate = 3e-04
|
-4131.348
|
8266.696
|
8276.364
|
|
Weibull
|
shape = 0.9991; scale = 3433.9201
|
-4131.722
|
8267.445
|
8277.113
|
# column_spec(1, bold = TRUE) %>%
# row_spec(1, bold = TRUE, color = "white", background = "#2C6E49")
# Model Selection
# ============================================================
best_model_name <- tbl3_data$Model[1]
best_model <- fits[[best_model_name]]
cat("\n=== Best Model by AIC:", best_model_name, "===\n")
##
## === Best Model by AIC: GenGamma ===
print(best_model)
## Call:
## flexsurvreg(formula = SurvObj_colon ~ 1, data = colon_clean,
## dist = "gengamma")
##
## Estimates:
## est L95% U95% se
## mu 7.5454 7.3169 7.7739 0.1166
## sigma 1.5753 1.4382 1.7253 0.0731
## Q -0.4805 -0.8550 -0.1061 0.1911
##
## N = 929, Events: 452, Censored: 477
## Total time at risk: 1551389
## Log-likelihood = -4103.464, df = 3
## AIC = 8212.927
# ── Figure 1: AIC Bar Chart ───────────────────────────────────
aic_df <- data.frame(Model = tbl3_data$Model, AIC = tbl3_data$AIC)
aic_df$Model <- factor(aic_df$Model, levels = aic_df$Model[order(aic_df$AIC)])
p_aic <- ggplot(aic_df, aes(x = Model, y = AIC,
fill = Model == best_model_name)) +
geom_bar(stat = "identity", width = 0.6, color = "black", linewidth = 0.3) +
geom_text(aes(label = round(AIC, 1)), vjust = -0.4, size = 3.5, fontface = "bold") +
scale_fill_manual(values = c("TRUE" = "#2C6E49", "FALSE" = "#A8DADC"),
guide = "none") +
labs(#title = "Figure 1: AIC Comparison Across Parametric Models",
subtitle = paste("Best model:", best_model_name, "(lowest AIC, highlighted)"),
x = "Model", y = "AIC") +
theme_bw(base_size = 13) +
theme(plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 20, hjust = 1))
print(p_aic)

# ── Figure 2: Log-Likelihood Dot Plot ────────────────────────
loglik_df <- data.frame(Model = tbl3_data$Model, LogLik = tbl3_data$LogLik)
loglik_df$Model <- factor(loglik_df$Model,
levels = loglik_df$Model[order(loglik_df$LogLik,
decreasing = TRUE)])
p_loglik <- ggplot(loglik_df, aes(x = LogLik, y = Model,
color = Model == best_model_name)) +
geom_point(size = 5) +
geom_vline(xintercept = max(loglik_df$LogLik),
linetype = "dashed", color = "#2C6E49") +
geom_text(aes(label = round(LogLik, 1)), vjust = -1.2, size = 3.5) +
scale_color_manual(values = c("TRUE" = "#2C6E49", "FALSE" = "black"),
guide = "none") +
labs(#title = "Figure 2: Log-Likelihood Comparison Across Parametric Models",
subtitle = "Higher (less negative) = better fit",
x = "Log-Likelihood", y = "Model") +
theme_bw(base_size = 13) +
theme(plot.title = element_text(face = "bold"))
print(p_loglik)

# ── Figure 3: Survival Curves — All Models ───────────────────
time_seq_all <- seq(0, max(colon_clean$time, na.rm = TRUE), by = 10)
colors_models <- c("#E63946","#457B9D","#2C6E49","#F4A261",
"#9B2226","#6A0572","#3D405B")
names(colors_models) <- names(fits)
surv_curves <- do.call(rbind, lapply(names(fits), function(m) {
s <- summary(fits[[m]], t = time_seq_all, type = "survival", ci = FALSE)[[1]]
data.frame(Time = s$time, Survival = s$est, Model = m)
}))
p_surv_compare <- ggplot(surv_curves, aes(x = Time, y = Survival, color = Model)) +
geom_line(linewidth = 0.9) +
scale_color_manual(values = colors_models) +
labs(#title = "Figure 3: Fitted Survival Curves — All Parametric Models",
x = "Time (days)",
y = "Survival Probability S(t)",
color = "Model") +
theme_bw(base_size = 13) +
theme(plot.title = element_text(face = "bold"),
legend.position = "right")
print(p_surv_compare)

# Life Functions of the Best Model
# ============================================================
time_seq <- seq(1, max(colon_clean$time, na.rm = TRUE), by = 10)
surv_best <- summary(best_model, t = time_seq, type = "survival", ci = FALSE)[[1]]
haz_best <- summary(best_model, t = time_seq, type = "hazard", ci = FALSE)[[1]]
S_t <- surv_best$est
h_t <- haz_best$est
F_t <- 1 - S_t
f_t <- h_t * S_t
# ── Mean & Variance ─────────────────
dt <- diff(time_seq)[1]
mean_T <- sum(S_t) * dt
ET2 <- 2 * sum(time_seq * S_t) * dt
var_T <- ET2 - mean_T^2
cat(sprintf("\nMean survival time : %.2f days (%.1f years)\n",
mean_T, mean_T / 365))
##
## Mean survival time : 2130.40 days (5.8 years)
cat(sprintf("Variance : %.2f days^2\n", var_T))
## Variance : 1491721.66 days^2
cat(sprintf("Standard deviation : %.2f days\n", sqrt(var_T)))
## Standard deviation : 1221.36 days
# ── Table 4: Life Functions at Selected Time Points ───────────
selected_times <- c(100, 200, 365, 500, 730, 1000, 1500, 2000, 2500, 3000)
lf_idx <- sapply(selected_times,
function(tp) which.min(abs(time_seq - tp)))
tbl4_data <- data.frame(
`Time (days)` = time_seq[lf_idx],
`S(t)` = round(S_t[lf_idx], 4),
`h(t)` = round(h_t[lf_idx], 6),
`f(t)` = round(f_t[lf_idx], 6),
`F(t)` = round(F_t[lf_idx], 4),
check.names = FALSE
)
kable(tbl4_data,
caption = paste("Table 4: Life Functions at Selected Time Points —",
best_model_name, "Model"),
align = "rrrrr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") %>%
add_header_above(c(" " = 1, "Survival" = 1, "Hazard" = 1,
"Density" = 1, "CDF" = 1)) %>%
column_spec(1, bold = TRUE)
Table 4: Life Functions at Selected Time Points — GenGamma Model
|
|
Survival
|
Hazard
|
Density
|
CDF
|
|
Time (days)
|
S(t)
|
h(t)
|
f(t)
|
F(t)
|
|
101
|
0.9901
|
0.000229
|
0.000227
|
0.0099
|
|
201
|
0.9605
|
0.000354
|
0.000340
|
0.0395
|
|
361
|
0.9035
|
0.000394
|
0.000356
|
0.0965
|
|
501
|
0.8554
|
0.000385
|
0.000329
|
0.1446
|
|
731
|
0.7856
|
0.000354
|
0.000278
|
0.2144
|
|
1001
|
0.7177
|
0.000317
|
0.000227
|
0.2823
|
|
1501
|
0.6212
|
0.000263
|
0.000164
|
0.3788
|
|
2001
|
0.5500
|
0.000226
|
0.000124
|
0.4500
|
|
2501
|
0.4950
|
0.000198
|
0.000098
|
0.5050
|
|
3001
|
0.4509
|
0.000176
|
0.000079
|
0.5491
|
# ── Table 5: Mean & Variance ──────────────────────────────────
tbl5_data <- data.frame(
Statistic = c("Mean Survival Time (days)",
"Mean Survival Time (years)",
"Variance (days\u00b2)",
"Standard Deviation (days)"),
Value = c(round(mean_T, 2),
round(mean_T / 365, 2),
round(var_T, 2),
round(sqrt(var_T), 2))
)
kable(tbl5_data,
caption = paste("Table 5: Mean and Variance of Survival Time —",
best_model_name, "Model"),
align = "lr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE)
Table 5: Mean and Variance of Survival Time — GenGamma Model
|
Statistic
|
Value
|
|
Mean Survival Time (days)
|
2130.40
|
|
Mean Survival Time (years)
|
5.84
|
|
Variance (days²)
|
1491721.66
|
|
Standard Deviation (days)
|
1221.36
|
# ── Figure 4: 2×2 Life Function Plots ────────────────────────
df_life <- data.frame(Time = time_seq, S = S_t, h = h_t, f = f_t, F = F_t)
p_St <- ggplot(df_life, aes(x = Time, y = S)) +
geom_line(color = "#457B9D", linewidth = 1.2) +
labs(title = paste("Survival Function —", best_model_name),
x = "Time (days)", y = "S(t)") +
theme_bw(base_size = 12) +
theme(plot.title = element_text(face = "bold", size = 11))
p_ht <- ggplot(df_life, aes(x = Time, y = h)) +
geom_line(color = "#E63946", linewidth = 1.2) +
labs(title = paste("Hazard Function —", best_model_name),
x = "Time (days)", y = "h(t)") +
theme_bw(base_size = 12) +
theme(plot.title = element_text(face = "bold", size = 11))
p_ft <- ggplot(df_life, aes(x = Time, y = f)) +
geom_line(color = "#2C6E49", linewidth = 1.2) +
labs(title = paste("Density Function —", best_model_name),
x = "Time (days)", y = "f(t)") +
theme_bw(base_size = 12) +
theme(plot.title = element_text(face = "bold", size = 11))
p_Ft <- ggplot(df_life, aes(x = Time, y = F)) +
geom_line(color = "#9B2226", linewidth = 1.2) +
labs(title = paste("CDF —", best_model_name),
x = "Time (days)", y = "F(t)") +
theme_bw(base_size = 12) +
theme(plot.title = element_text(face = "bold", size = 11))
p_life_grid <- gridExtra::grid.arrange(p_St, p_ht, p_ft, p_Ft, nrow = 2)

# MLE Parameter Table & Interpretation
# ============================================================
# ── Table 6: MLE Parameter Estimates ─────────────────────────
param_raw <- as.data.frame(best_model$res)
tbl6_data <- data.frame(
Parameter = rownames(param_raw),
Estimate = round(param_raw$est, 4),
`Std. Error` = round(param_raw$se, 4),
`95% CI Lower` = round(param_raw$`L95%`, 4),
`95% CI Upper` = round(param_raw$`U95%`, 4),
check.names = FALSE
)
kable(tbl6_data,
caption = paste("Table 6: MLE Parameter Estimates —",
best_model_name, "Model"),
align = "lrrrr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE)
Table 6: MLE Parameter Estimates — GenGamma Model
|
Parameter
|
Estimate
|
Std. Error
|
95% CI Lower
|
95% CI Upper
|
|
mu
|
7.5454
|
0.1166
|
7.3169
|
7.7739
|
|
sigma
|
1.5753
|
0.0731
|
1.4382
|
1.7253
|
|
Q
|
-0.4805
|
0.1911
|
-0.8550
|
-0.1061
|
cat("\n=== Full Model Summary ===\n")
##
## === Full Model Summary ===
print(best_model)
## Call:
## flexsurvreg(formula = SurvObj_colon ~ 1, data = colon_clean,
## dist = "gengamma")
##
## Estimates:
## est L95% U95% se
## mu 7.5454 7.3169 7.7739 0.1166
## sigma 1.5753 1.4382 1.7253 0.0731
## Q -0.4805 -0.8550 -0.1061 0.1911
##
## N = 929, Events: 452, Censored: 477
## Total time at risk: 1551389
## Log-likelihood = -4103.464, df = 3
## AIC = 8212.927
# ── Printed Interpretation ───────────────────────────────────
cat("\n========================================================\n")
##
## ========================================================
cat("INTERPRETATION SUMMARY\n")
## INTERPRETATION SUMMARY
cat("========================================================\n")
## ========================================================
cat("Model :", best_model_name, "\n")
## Model : GenGamma
cat("Dataset : Colon Cancer\n")
## Dataset : Colon Cancer
cat("Patients :", n_total, " | Events:", n_events,
" | Censored:", n_censored, "\n\n")
## Patients : 929 | Events: 452 | Censored: 477
cat("Shape parameter:\n")
## Shape parameter:
cat(" > 1 → Hazard INCREASES over time (aging / deterioration)\n")
## > 1 → Hazard INCREASES over time (aging / deterioration)
cat(" < 1 → Hazard DECREASES over time (early risk dominates)\n")
## < 1 → Hazard DECREASES over time (early risk dominates)
cat(" = 1 → Constant hazard (Exponential)\n\n")
## = 1 → Constant hazard (Exponential)
cat("Scale parameter: sets the characteristic time scale of survival.\n\n")
## Scale parameter: sets the characteristic time scale of survival.
cat("Life Functions:\n")
## Life Functions:
cat(" S(t) : Probability of surviving beyond time t\n")
## S(t) : Probability of surviving beyond time t
cat(" h(t) : Instantaneous risk of the event at time t\n")
## h(t) : Instantaneous risk of the event at time t
cat(" f(t) : Probability density of the event near time t\n")
## f(t) : Probability density of the event near time t
cat(" F(t) : Probability of having experienced the event by time t\n\n")
## F(t) : Probability of having experienced the event by time t
cat(sprintf("Mean survival : %.2f days (%.1f years)\n", mean_T, mean_T / 365))
## Mean survival : 2130.40 days (5.8 years)
cat(sprintf("Std deviation : %.2f days\n", sqrt(var_T)))
## Std deviation : 1221.36 days
cat("========================================================\n")
## ========================================================
# Saving all figs & tables
# ============================================================
cat("\n\n=== SAVING OUTPUTS ===\n")
##
##
## === SAVING OUTPUTS ===
# ── Figures ──────────────────────────────────────────────────
ggsave(file.path(fig_dir, "Figure1_AIC_Comparison.png"),
plot = p_aic, width = 8, height = 5, dpi = 300, bg = "white")
cat("Saved: Figure1_AIC_Comparison.png\n")
## Saved: Figure1_AIC_Comparison.png
ggsave(file.path(fig_dir, "Figure2_LogLikelihood_Comparison.png"),
plot = p_loglik, width = 8, height = 5, dpi = 300, bg = "white")
cat("Saved: Figure2_LogLikelihood_Comparison.png\n")
## Saved: Figure2_LogLikelihood_Comparison.png
ggsave(file.path(fig_dir, "Figure3_Survival_Curves_AllModels.png"),
plot = p_surv_compare, width = 9, height = 5, dpi = 300, bg = "white")
cat("Saved: Figure3_Survival_Curves_AllModels.png\n")
## Saved: Figure3_Survival_Curves_AllModels.png
ggsave(file.path(fig_dir, "Figure4_LifeFunctions_BestModel.png"),
plot = p_life_grid, width = 10, height = 8, dpi = 300, bg = "white")
cat("Saved: Figure4_LifeFunctions_BestModel.png\n")
## Saved: Figure4_LifeFunctions_BestModel.png
# ── Tables as CSV ─────────────────────────────────────────────
write.csv(tbl1_data, file.path(tbl_dir, "Table1_Variable_Summary.csv"), row.names = FALSE)
write.csv(tbl2_data, file.path(tbl_dir, "Table2_Sample_Summary.csv"), row.names = FALSE)
write.csv(tbl3_data, file.path(tbl_dir, "Table3_Model_Comparison.csv"), row.names = FALSE)
write.csv(tbl4_data, file.path(tbl_dir, "Table4_Life_Functions.csv"), row.names = FALSE)
write.csv(tbl5_data, file.path(tbl_dir, "Table5_Mean_Variance.csv"), row.names = FALSE)
write.csv(tbl6_data, file.path(tbl_dir, "Table6_MLE_Parameters.csv"), row.names = FALSE)
cat("All CSV tables saved.\n")
## All CSV tables saved.
# ── Tables as HTML ───────────────────
save_kable(
kable(tbl1_data,
caption = "Table 1: Summary Statistics of Key Variables (Pre-Imputation)",
align = "lrrrrr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE),
file = file.path(tbl_dir, "Table1_Variable_Summary.html")
)
save_kable(
kable(tbl2_data,
caption = "Table 2: Sample Summary — Colon Cancer Dataset",
col.names = c("Statistic", "Value"), align = "lr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE),
file = file.path(tbl_dir, "Table2_Sample_Summary.html")
)
save_kable(
kable(tbl3_data,
caption = "Table 3: Parametric Model Comparison — MLE, Log-Likelihood, AIC & BIC",
align = "llrrr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE) %>%
row_spec(1, bold = TRUE, color = "white", background = "#2C6E49"),
file = file.path(tbl_dir, "Table3_Model_Comparison.html")
)
save_kable(
kable(tbl4_data,
caption = paste("Table 4: Life Functions at Selected Time Points —",
best_model_name, "Model"),
align = "rrrrr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") %>%
add_header_above(c(" " = 1, "Survival" = 1, "Hazard" = 1,
"Density" = 1, "CDF" = 1)) %>%
column_spec(1, bold = TRUE),
file = file.path(tbl_dir, "Table4_Life_Functions.html")
)
save_kable(
kable(tbl5_data,
caption = paste("Table 5: Mean and Variance of Survival Time —",
best_model_name, "Model"),
align = "lr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE),
file = file.path(tbl_dir, "Table5_Mean_Variance.html")
)
save_kable(
kable(tbl6_data,
caption = paste("Table 6: MLE Parameter Estimates —",
best_model_name, "Model"),
align = "lrrrr") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, position = "center") %>%
column_spec(1, bold = TRUE),
file = file.path(tbl_dir, "Table6_MLE_Parameters.html")
)
cat("All HTML tables saved.\n")
## All HTML tables saved.
# Final confrmation
# ============================================================
cat("\n========================================================\n")
##
## ========================================================
cat("ALL OUTPUTS SAVED SUCCESSFULLY\n")
## ALL OUTPUTS SAVED SUCCESSFULLY
cat("========================================================\n")
## ========================================================
cat("Root folder :", file.path(base_dir, "outputs"), "\n\n")
## Root folder : /cloud/project/outputs
cat("FIGURES (", fig_dir, ")\n", sep = "")
## FIGURES (/cloud/project/outputs/figures)
cat(" Figure1_AIC_Comparison.png\n")
## Figure1_AIC_Comparison.png
cat(" Figure2_LogLikelihood_Comparison.png\n")
## Figure2_LogLikelihood_Comparison.png
cat(" Figure3_Survival_Curves_AllModels.png\n")
## Figure3_Survival_Curves_AllModels.png
cat(" Figure4_LifeFunctions_BestModel.png\n\n")
## Figure4_LifeFunctions_BestModel.png
cat("TABLES (", tbl_dir, ")\n", sep = "")
## TABLES (/cloud/project/outputs/tables)
cat(" Table1_Variable_Summary .csv .html\n")
## Table1_Variable_Summary .csv .html
cat(" Table2_Sample_Summary .csv .html\n")
## Table2_Sample_Summary .csv .html
cat(" Table3_Model_Comparison .csv .html\n")
## Table3_Model_Comparison .csv .html
cat(" Table4_Life_Functions .csv .html\n")
## Table4_Life_Functions .csv .html
cat(" Table5_Mean_Variance .csv .html\n")
## Table5_Mean_Variance .csv .html
cat(" Table6_MLE_Parameters .csv .html\n")
## Table6_MLE_Parameters .csv .html
cat("========================================================\n")
## ========================================================
```